Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 4.99% |
---|
Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 4.99% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
0x43e460 PUSH %RBP |
0x43e461 MOV %RSP,%RBP |
0x43e464 PUSH %R15 |
0x43e466 PUSH %R14 |
0x43e468 PUSH %R13 |
0x43e46a PUSH %R12 |
0x43e46c PUSH %RBX |
0x43e46d MOV %RDI,%RBX |
0x43e470 AND $-0x40,%RSP |
0x43e474 SUB $0x1c0,%RSP |
0x43e47b MOV 0x68(%RDI),%EAX |
0x43e47e MOV 0x6c(%RDI),%EDX |
0x43e481 MOV 0x60(%RDI),%EDI |
0x43e484 MOV 0x64(%RBX),%ECX |
0x43e487 ADD $0x2,%EDX |
0x43e48a LEA 0x1(%RAX),%R15D |
0x43e48e LEA 0x1(%RDI),%ESI |
0x43e491 MOV %EDX,0x80(%RSP) |
0x43e498 MOV %ESI,0x7c(%RSP) |
0x43e49c CMP %EDX,%R15D |
0x43e49f JGE 43ef8b |
0x43e4a5 MOV %EDX,%R13D |
0x43e4a8 LEA 0x2(%RCX),%R14D |
0x43e4ac SUB %R15D,%R13D |
0x43e4af CMP %R14D,%ESI |
0x43e4b2 JGE 43ef8b |
0x43e4b8 MOV %R14D,%R8D |
0x43e4bb SUB %ESI,%R8D |
0x43e4be MOV %R8D,0x84(%RSP) |
0x43e4c6 CALL 404650 <omp_get_num_threads@plt> |
0x43e4cb MOV %EAX,%R12D |
0x43e4ce CALL 404540 <omp_get_thread_num@plt> |
0x43e4d3 XOR %EDX,%EDX |
0x43e4d5 MOV %EAX,%R9D |
0x43e4d8 MOV 0x84(%RSP),%EAX |
0x43e4df IMUL %R13D,%EAX |
0x43e4e3 DIV %R12D |
0x43e4e6 MOV %EAX,%ECX |
0x43e4e8 CMP %EDX,%R9D |
0x43e4eb JB 43efb7 |
0x43e4f1 IMUL %ECX,%R9D |
0x43e4f5 LEA (%R9,%RDX,1),%EAX |
0x43e4f9 LEA (%RCX,%RAX,1),%R10D |
0x43e4fd MOV %EAX,0x1b8(%RSP) |
0x43e504 MOV %R10D,0x78(%RSP) |
0x43e509 CMP %R10D,%EAX |
0x43e50c JAE 43ef8b |
0x43e512 XOR %EDX,%EDX |
0x43e514 MOV 0x7c(%RSP),%R11D |
0x43e519 VMOVSD 0x20eff(%RIP),%XMM2 |
0x43e521 DIVL 0x84(%RSP) |
0x43e528 MOV 0x8(%RBX),%RSI |
0x43e52c MOV 0x10(%RBX),%R13 |
0x43e530 MOV 0x58(%RBX),%R8 |
0x43e534 MOV 0x18(%RBX),%R12 |
0x43e538 MOV 0x40(%RBX),%R9 |
0x43e53c MOV 0x20(%RBX),%R10 |
0x43e540 MOV %RSI,0x70(%RSP) |
0x43e545 VMOVSD 0x2004b(%RIP),%XMM3 |
0x43e54d MOV %R13,0x60(%RSP) |
0x43e552 MOV %R8,0x58(%RSP) |
0x43e557 MOV %R12,0x50(%RSP) |
0x43e55c MOV %R9,0x48(%RSP) |
0x43e561 MOV %R10,0x40(%RSP) |
0x43e566 VMULSD (%RBX),%XMM2,%XMM2 |
0x43e56a VBROADCASTSD %XMM3,%YMM6 |
0x43e56f VBROADCASTSD %XMM3,%ZMM4 |
0x43e575 VBROADCASTSD %XMM2,%YMM1 |
0x43e57a VBROADCASTSD %XMM2,%ZMM5 |
0x43e580 LEA (%RDX,%R11,1),%EDI |
0x43e584 LEA (%RAX,%R15,1),%R15D |
0x43e588 MOV %R14D,%EAX |
0x43e58b MOV 0x48(%RBX),%RDX |
0x43e58f MOV 0x50(%RBX),%R14 |
0x43e593 MOV 0x30(%RBX),%R11 |
0x43e597 MOV %EDI,0x148(%RSP) |
0x43e59e SUB %EDI,%EAX |
0x43e5a0 MOV 0x38(%RBX),%RDI |
0x43e5a4 MOV 0x28(%RBX),%RBX |
0x43e5a8 MOVSXD %R15D,%R15 |
0x43e5ab MOV %RDX,0x38(%RSP) |
0x43e5b0 MOV %R14,0x68(%RSP) |
0x43e5b5 MOV %R11,0x30(%RSP) |
0x43e5ba MOV %RDI,0x28(%RSP) |
0x43e5bf MOV %RBX,0x20(%RSP) |
0x43e5c4 MOV %R15,0x140(%RSP) |
0x43e5cc NOPL (%RAX) |
(271) 0x43e5d0 CMP %EAX,%ECX |
(271) 0x43e5d2 CMOVBE %ECX,%EAX |
(271) 0x43e5d5 MOV 0x1b8(%RSP),%ECX |
(271) 0x43e5dc MOV %EAX,0x14c(%RSP) |
(271) 0x43e5e3 ADD %ECX,%EAX |
(271) 0x43e5e5 MOV %EAX,0x1bc(%RSP) |
(271) 0x43e5ec CMP %EAX,%ECX |
(271) 0x43e5ee JAE 43ef3e |
(271) 0x43e5f4 MOV 0x68(%RSP),%R14 |
(271) 0x43e5f9 MOV 0x140(%RSP),%RDI |
(271) 0x43e601 MOV 0x70(%RSP),%RAX |
(271) 0x43e606 MOV 0x58(%RSP),%R11 |
(271) 0x43e60b MOV (%R14),%R8 |
(271) 0x43e60e MOV %RDI,%RCX |
(271) 0x43e611 MOV 0x60(%RSP),%R9 |
(271) 0x43e616 MOV %RDI,%RBX |
(271) 0x43e619 MOV 0x10(%RAX),%R13 |
(271) 0x43e61d MOV (%RAX),%RSI |
(271) 0x43e620 IMUL %R8,%RCX |
(271) 0x43e624 MOV (%R11),%RAX |
(271) 0x43e627 MOV 0x10(%R11),%R15 |
(271) 0x43e62b MOV %RDI,%R11 |
(271) 0x43e62e MOV (%R9),%RDX |
(271) 0x43e631 MOV 0x10(%R9),%R10 |
(271) 0x43e635 IMUL %RDI,%RSI |
(271) 0x43e639 MOV %R13,0x90(%RSP) |
(271) 0x43e641 IMUL %RAX,%R11 |
(271) 0x43e645 MOV %R15,0x1b0(%RSP) |
(271) 0x43e64d MOV 0x48(%RSP),%R15 |
(271) 0x43e652 ADD %RCX,%R8 |
(271) 0x43e655 IMUL %RDX,%RBX |
(271) 0x43e659 MOV 0x10(%R14),%R12 |
(271) 0x43e65d MOV %R10,0xc0(%RSP) |
(271) 0x43e665 MOV %R8,0xb8(%RSP) |
(271) 0x43e66d MOV 0x50(%RSP),%R8 |
(271) 0x43e672 LEA (%RAX,%R11,1),%R14 |
(271) 0x43e676 MOV %RDI,%RAX |
(271) 0x43e679 MOV 0x10(%R15),%R10 |
(271) 0x43e67d MOV %RSI,0x88(%RSP) |
(271) 0x43e685 IMUL (%R8),%RDI |
(271) 0x43e689 MOV 0x10(%R8),%R9 |
(271) 0x43e68d ADD %RBX,%RDX |
(271) 0x43e690 MOV %R14,0xd0(%RSP) |
(271) 0x43e698 MOV %R10,0xe8(%RSP) |
(271) 0x43e6a0 MOV %R9,0xd8(%RSP) |
(271) 0x43e6a8 MOV %R12,0x1a8(%RSP) |
(271) 0x43e6b0 MOV %RCX,0x98(%RSP) |
(271) 0x43e6b8 MOV %RBX,0xa0(%RSP) |
(271) 0x43e6c0 MOV %R11,0xa8(%RSP) |
(271) 0x43e6c8 MOV %RDX,0xc8(%RSP) |
(271) 0x43e6d0 MOV %RDI,0xb0(%RSP) |
(271) 0x43e6d8 MOV (%R15),%RDX |
(271) 0x43e6db MOV 0x40(%RSP),%R9 |
(271) 0x43e6e0 MOV 0x38(%RSP),%R15 |
(271) 0x43e6e5 IMUL %RAX,%RDX |
(271) 0x43e6e9 MOV (%R9),%R8 |
(271) 0x43e6ec MOV 0x10(%R9),%R14 |
(271) 0x43e6f0 MOV 0x10(%R15),%R10 |
(271) 0x43e6f4 IMUL %RAX,%R8 |
(271) 0x43e6f8 MOV %R14,0xf8(%RSP) |
(271) 0x43e700 MOV 0x30(%RSP),%R14 |
(271) 0x43e705 MOV %RDX,0xe0(%RSP) |
(271) 0x43e70d MOV (%R15),%RDX |
(271) 0x43e710 MOV 0x28(%RSP),%R15 |
(271) 0x43e715 MOV 0x10(%R14),%R9 |
(271) 0x43e719 MOV %R10,0x138(%RSP) |
(271) 0x43e721 MOV %R8,0xf0(%RSP) |
(271) 0x43e729 MOV (%R14),%R8 |
(271) 0x43e72c IMUL %RAX,%RDX |
(271) 0x43e730 MOV 0x10(%R15),%R10 |
(271) 0x43e734 MOV (%R15),%R14 |
(271) 0x43e737 MOV %R9,0x128(%RSP) |
(271) 0x43e73f IMUL %RAX,%R8 |
(271) 0x43e743 MOV 0x20(%RSP),%R15 |
(271) 0x43e748 IMUL %RAX,%R14 |
(271) 0x43e74c MOV %R10,0x118(%RSP) |
(271) 0x43e754 MOV 0x10(%R15),%R9 |
(271) 0x43e758 MOV %RDX,0x100(%RSP) |
(271) 0x43e760 MOV %R8,0x130(%RSP) |
(271) 0x43e768 MOV (%R15),%R8 |
(271) 0x43e76b MOV %R14,0x120(%RSP) |
(271) 0x43e773 IMUL %RAX,%R8 |
(271) 0x43e777 MOV 0x14c(%RSP),%EAX |
(271) 0x43e77e MOV %R9,0x108(%RSP) |
(271) 0x43e786 LEA -0x1(%RAX),%R10D |
(271) 0x43e78a MOV %R8,0x110(%RSP) |
(271) 0x43e792 CMP $0x6,%R10D |
(271) 0x43e796 JBE 43efa0 |
(271) 0x43e79c MOVSXD 0x148(%RSP),%RAX |
(271) 0x43e7a4 MOV 0xb8(%RSP),%R9 |
(271) 0x43e7ac MOV %R12,%R8 |
(271) 0x43e7af LEA (%R9,%RAX,1),%R10 |
(271) 0x43e7b3 ADD %RAX,%RSI |
(271) 0x43e7b6 LEA (%R11,%RAX,1),%R9 |
(271) 0x43e7ba ADD %RAX,%RCX |
(271) 0x43e7bd SAL $0x3,%RSI |
(271) 0x43e7c1 SAL $0x3,%R10 |
(271) 0x43e7c5 LEA (%R12,%R10,1),%RDX |
(271) 0x43e7c9 SAL $0x3,%RCX |
(271) 0x43e7cd LEA (%R13,%RSI,1),%R15 |
(271) 0x43e7d2 SAL $0x3,%R9 |
(271) 0x43e7d6 LEA 0x8(%R13,%RSI,1),%R13 |
(271) 0x43e7db LEA 0x8(%R8,%R10,1),%RSI |
(271) 0x43e7e0 MOV 0xc0(%RSP),%R10 |
(271) 0x43e7e8 MOV %RDX,0x160(%RSP) |
(271) 0x43e7f0 LEA (%R12,%RCX,1),%R14 |
(271) 0x43e7f4 MOV %RSI,0x198(%RSP) |
(271) 0x43e7fc LEA 0x8(%R12,%RCX,1),%R12 |
(271) 0x43e801 MOV 0x1b0(%RSP),%RSI |
(271) 0x43e809 LEA (%RBX,%RAX,1),%RCX |
(271) 0x43e80d MOV 0xc8(%RSP),%RDX |
(271) 0x43e815 LEA (%R10,%RCX,8),%RBX |
(271) 0x43e819 MOV 0xd0(%RSP),%RCX |
(271) 0x43e821 LEA (%RSI,%R9,1),%R11 |
(271) 0x43e825 MOV %R11,0x1a0(%RSP) |
(271) 0x43e82d LEA 0x8(%RSI,%R9,1),%R11 |
(271) 0x43e832 LEA (%RDX,%RAX,1),%R8 |
(271) 0x43e836 LEA (%RCX,%RAX,1),%R9 |
(271) 0x43e83a MOV 0xe0(%RSP),%RCX |
(271) 0x43e842 LEA (%R10,%R8,8),%R10 |
(271) 0x43e846 SAL $0x3,%R9 |
(271) 0x43e84a LEA (%RSI,%R9,1),%RDX |
(271) 0x43e84e LEA 0x8(%RSI,%R9,1),%R9 |
(271) 0x43e853 ADD %RAX,%RCX |
(271) 0x43e856 LEA (%RDI,%RAX,1),%RSI |
(271) 0x43e85a MOV %RDX,0x190(%RSP) |
(271) 0x43e862 MOV 0xd8(%RSP),%RDI |
(271) 0x43e86a MOV 0xe8(%RSP),%RDX |
(271) 0x43e872 LEA (%RDI,%RSI,8),%R8 |
(271) 0x43e876 MOV 0xf0(%RSP),%RDI |
(271) 0x43e87e LEA (%RDX,%RCX,8),%RSI |
(271) 0x43e882 MOV 0xf8(%RSP),%RDX |
(271) 0x43e88a MOV %RSI,0x188(%RSP) |
(271) 0x43e892 MOV 0x100(%RSP),%RSI |
(271) 0x43e89a LEA (%RDI,%RAX,1),%RCX |
(271) 0x43e89e LEA (%RDX,%RCX,8),%RDX |
(271) 0x43e8a2 MOV 0x138(%RSP),%RDI |
(271) 0x43e8aa LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e8ae MOV 0x130(%RSP),%RSI |
(271) 0x43e8b6 LEA (%RDI,%RCX,8),%RDI |
(271) 0x43e8ba LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e8be MOV 0x128(%RSP),%RSI |
(271) 0x43e8c6 LEA (%RSI,%RCX,8),%RCX |
(271) 0x43e8ca MOV 0x120(%RSP),%RSI |
(271) 0x43e8d2 MOV %RCX,0x180(%RSP) |
(271) 0x43e8da LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e8de MOV 0x118(%RSP),%RSI |
(271) 0x43e8e6 LEA (%RSI,%RCX,8),%RCX |
(271) 0x43e8ea MOV 0x110(%RSP),%RSI |
(271) 0x43e8f2 MOV %RCX,0x158(%RSP) |
(271) 0x43e8fa MOV 0x108(%RSP),%RCX |
(271) 0x43e902 ADD %RSI,%RAX |
(271) 0x43e905 LEA (%RCX,%RAX,8),%RSI |
(271) 0x43e909 MOV 0x14c(%RSP),%ECX |
(271) 0x43e910 XOR %EAX,%EAX |
(271) 0x43e912 SHR $0x3,%ECX |
(271) 0x43e915 SAL $0x6,%RCX |
(271) 0x43e919 MOV %RCX,0x150(%RSP) |
(271) 0x43e921 NOPL (%RAX) |
(273) 0x43e928 MOV 0x160(%RSP),%RCX |
(273) 0x43e930 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(273) 0x43e937 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(273) 0x43e93f VMOVUPD (%RCX,%RAX,1),%ZMM7 |
(273) 0x43e946 MOV 0x1a0(%RSP),%RCX |
(273) 0x43e94e VADDPD %ZMM9,%ZMM9,%ZMM11 |
(273) 0x43e954 VMOVUPD (%R10,%RAX,1),%ZMM9 |
(273) 0x43e95b VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(273) 0x43e962 MOV 0x198(%RSP),%RCX |
(273) 0x43e96a VADDPD (%R14,%RAX,1),%ZMM7,%ZMM10 |
(273) 0x43e971 VADDPD %ZMM12,%ZMM12,%ZMM7 |
(273) 0x43e977 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(273) 0x43e97e MOV 0x190(%RSP),%RCX |
(273) 0x43e986 VADDPD (%R11,%RAX,1),%ZMM0,%ZMM8 |
(273) 0x43e98d VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(273) 0x43e994 VADDPD (%R12,%RAX,1),%ZMM13,%ZMM15 |
(273) 0x43e99b VMOVUPD (%R15,%RAX,1),%ZMM13 |
(273) 0x43e9a2 MOV 0x188(%RSP),%RCX |
(273) 0x43e9aa VMULPD %ZMM11,%ZMM8,%ZMM14 |
(273) 0x43e9b0 VADDPD (%R9,%RAX,1),%ZMM0,%ZMM8 |
(273) 0x43e9b7 VADDPD %ZMM9,%ZMM9,%ZMM11 |
(273) 0x43e9bd VMOVUPD (%RCX,%RAX,1),%ZMM9 |
(273) 0x43e9c4 MOV 0x180(%RSP),%RCX |
(273) 0x43e9cc VMULPD %ZMM11,%ZMM8,%ZMM12 |
(273) 0x43e9d2 VADDPD (%RDI,%RAX,1),%ZMM9,%ZMM11 |
(273) 0x43e9d9 VMOVUPD (%R8,%RAX,1),%ZMM8 |
(273) 0x43e9e0 VFMADD132PD %ZMM15,%ZMM12,%ZMM7 |
(273) 0x43e9e6 VADDPD %ZMM13,%ZMM13,%ZMM15 |
(273) 0x43e9ec VDIVPD (%RDX,%RAX,1),%ZMM11,%ZMM12 |
(273) 0x43e9f3 VDIVPD %ZMM8,%ZMM4,%ZMM13 |
(273) 0x43e9f9 VFMADD231PD %ZMM10,%ZMM15,%ZMM14 |
(273) 0x43e9ff VMULPD %ZMM13,%ZMM12,%ZMM15 |
(273) 0x43ea05 VSUBPD %ZMM14,%ZMM7,%ZMM10 |
(273) 0x43ea0b VMULPD %ZMM5,%ZMM10,%ZMM0 |
(273) 0x43ea11 VADDPD %ZMM8,%ZMM0,%ZMM14 |
(273) 0x43ea17 VFNMADD213PD (%RCX,%RAX,1),%ZMM15,%ZMM0 |
(273) 0x43ea1e MOV 0x158(%RSP),%RCX |
(273) 0x43ea26 VDIVPD %ZMM14,%ZMM8,%ZMM7 |
(273) 0x43ea2c VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(273) 0x43ea33 MOV 0x150(%RSP),%RCX |
(273) 0x43ea3b VMULPD (%RDX,%RAX,1),%ZMM7,%ZMM10 |
(273) 0x43ea42 VMOVUPD %ZMM10,(%RSI,%RAX,1) |
(273) 0x43ea49 ADD $0x40,%RAX |
(273) 0x43ea4d CMP %RCX,%RAX |
(273) 0x43ea50 JNE 43e928 |
(271) 0x43ea56 MOV 0x14c(%RSP),%R15D |
(271) 0x43ea5e MOV 0x148(%RSP),%R14D |
(271) 0x43ea66 MOV %R15D,%EDX |
(271) 0x43ea69 AND $-0x8,%EDX |
(271) 0x43ea6c LEA (%RDX,%R14,1),%R13D |
(271) 0x43ea70 ADD %EDX,0x1b8(%RSP) |
(271) 0x43ea77 MOV %R13D,0x190(%RSP) |
(271) 0x43ea7f TEST $0x7,%R15B |
(271) 0x43ea83 JE 43ef2e |
(271) 0x43ea89 MOV 0x14c(%RSP),%R12D |
(271) 0x43ea91 SUB %EDX,%R12D |
(271) 0x43ea94 MOV %R12D,0x188(%RSP) |
(271) 0x43ea9c DEC %R12D |
(271) 0x43ea9f CMP $0x2,%R12D |
(271) 0x43eaa3 JBE 43ecfa |
(271) 0x43eaa9 MOVSXD 0x148(%RSP),%RAX |
(271) 0x43eab1 MOV 0xb8(%RSP),%R11 |
(271) 0x43eab9 MOV 0xa8(%RSP),%R10 |
(271) 0x43eac1 MOV 0xb0(%RSP),%R15 |
(271) 0x43eac9 LEA (%R11,%RAX,1),%R8 |
(271) 0x43eacd MOV 0xf0(%RSP),%R11 |
(271) 0x43ead5 MOV 0xe0(%RSP),%R14 |
(271) 0x43eadd LEA (%R10,%RAX,1),%R9 |
(271) 0x43eae1 LEA (%R15,%RAX,1),%R13 |
(271) 0x43eae5 MOV 0xf8(%RSP),%R15 |
(271) 0x43eaed MOV 0x88(%RSP),%RBX |
(271) 0x43eaf5 LEA (%R11,%RAX,1),%R10 |
(271) 0x43eaf9 LEA (%R14,%RAX,1),%R12 |
(271) 0x43eafd MOV 0x100(%RSP),%R14 |
(271) 0x43eb05 MOV 0x98(%RSP),%RDI |
(271) 0x43eb0d ADD %RDX,%R10 |
(271) 0x43eb10 LEA (%RBX,%RAX,1),%RCX |
(271) 0x43eb14 LEA (%R12,%RDX,1),%RBX |
(271) 0x43eb18 ADD %RDX,%R8 |
(271) 0x43eb1b LEA (%R15,%R10,8),%R15 |
(271) 0x43eb1f MOV 0x110(%RSP),%R10 |
(271) 0x43eb27 LEA (%R14,%RAX,1),%R12 |
(271) 0x43eb2b ADD %RAX,%RDI |
(271) 0x43eb2e ADD %RDX,%RDI |
(271) 0x43eb31 ADD %RDX,%R9 |
(271) 0x43eb34 MOV %RBX,0x1a0(%RSP) |
(271) 0x43eb3c MOV 0xd0(%RSP),%RSI |
(271) 0x43eb44 LEA (%R10,%RAX,1),%R14 |
(271) 0x43eb48 MOV 0x130(%RSP),%RBX |
(271) 0x43eb50 MOV 0x120(%RSP),%R11 |
(271) 0x43eb58 ADD %RDX,%RCX |
(271) 0x43eb5b LEA (%R14,%RDX,1),%R10 |
(271) 0x43eb5f MOV 0x1a8(%RSP),%R14 |
(271) 0x43eb67 LEA (%RSI,%RAX,1),%RSI |
(271) 0x43eb6b ADD %RDX,%R13 |
(271) 0x43eb6e MOV %R10,0x198(%RSP) |
(271) 0x43eb76 MOV 0xa0(%RSP),%R10 |
(271) 0x43eb7e LEA (%RBX,%RAX,1),%RBX |
(271) 0x43eb82 ADD %RAX,%R11 |
(271) 0x43eb85 VMOVUPD (%R14,%R8,8),%YMM0 |
(271) 0x43eb8b ADD %RDX,%RSI |
(271) 0x43eb8e ADD %RDX,%R12 |
(271) 0x43eb91 ADD %RDX,%RBX |
(271) 0x43eb94 ADD %RDX,%R11 |
(271) 0x43eb97 VADDPD (%R14,%RDI,8),%YMM0,%YMM14 |
(271) 0x43eb9d LEA (%R10,%RAX,1),%R14 |
(271) 0x43eba1 MOV 0xc0(%RSP),%R10 |
(271) 0x43eba9 ADD %RDX,%R14 |
(271) 0x43ebac VMOVUPD (%R10,%R14,8),%YMM8 |
(271) 0x43ebb2 MOV 0x1b0(%RSP),%R14 |
(271) 0x43ebba VMOVUPD (%R14,%R9,8),%YMM7 |
(271) 0x43ebc0 VADDPD %YMM8,%YMM8,%YMM9 |
(271) 0x43ebc5 VADDPD 0x8(%R14,%R9,8),%YMM7,%YMM11 |
(271) 0x43ebcc MOV 0x1a8(%RSP),%R14 |
(271) 0x43ebd4 MOV 0x90(%RSP),%R9 |
(271) 0x43ebdc VMOVUPD 0x8(%R14,%R8,8),%YMM15 |
(271) 0x43ebe3 VMOVUPD 0x8(%R9,%RCX,8),%YMM13 |
(271) 0x43ebea VMULPD %YMM11,%YMM9,%YMM12 |
(271) 0x43ebef VADDPD 0x8(%R14,%RDI,8),%YMM15,%YMM10 |
(271) 0x43ebf6 MOV 0xc8(%RSP),%RDI |
(271) 0x43ebfe VADDPD %YMM13,%YMM13,%YMM0 |
(271) 0x43ec03 ADD %RDI,%RAX |
(271) 0x43ec06 ADD %RDX,%RAX |
(271) 0x43ec09 VMOVUPD (%R10,%RAX,8),%YMM8 |
(271) 0x43ec0f MOV 0x1b0(%RSP),%RDX |
(271) 0x43ec17 VMOVUPD (%R9,%RCX,8),%YMM15 |
(271) 0x43ec1d MOV 0xd8(%RSP),%RAX |
(271) 0x43ec25 VMOVUPD (%RDX,%RSI,8),%YMM7 |
(271) 0x43ec2a VADDPD %YMM8,%YMM8,%YMM9 |
(271) 0x43ec2f MOV 0x138(%RSP),%RCX |
(271) 0x43ec37 MOV 0xe8(%RSP),%R8 |
(271) 0x43ec3f VADDPD 0x8(%RDX,%RSI,8),%YMM7,%YMM11 |
(271) 0x43ec45 MOV 0x1a0(%RSP),%RSI |
(271) 0x43ec4d VMOVAPD %YMM7,0x160(%RSP) |
(271) 0x43ec56 VMULPD %YMM11,%YMM9,%YMM13 |
(271) 0x43ec5b VMOVUPD (%RCX,%R12,8),%YMM9 |
(271) 0x43ec61 MOV 0x118(%RSP),%R12 |
(271) 0x43ec69 VADDPD (%R8,%RSI,8),%YMM9,%YMM7 |
(271) 0x43ec6f VFMADD132PD %YMM10,%YMM13,%YMM0 |
(271) 0x43ec74 VADDPD %YMM15,%YMM15,%YMM10 |
(271) 0x43ec79 VDIVPD (%R15),%YMM7,%YMM13 |
(271) 0x43ec7e VFMADD132PD %YMM14,%YMM12,%YMM10 |
(271) 0x43ec83 VSUBPD %YMM10,%YMM0,%YMM14 |
(271) 0x43ec88 VMOVUPD (%RAX,%R13,8),%YMM0 |
(271) 0x43ec8e MOV 0x128(%RSP),%R13 |
(271) 0x43ec96 VDIVPD %YMM0,%YMM6,%YMM15 |
(271) 0x43ec9a VMULPD %YMM1,%YMM14,%YMM12 |
(271) 0x43ec9e VADDPD %YMM0,%YMM12,%YMM8 |
(271) 0x43eca2 VDIVPD %YMM8,%YMM0,%YMM11 |
(271) 0x43eca7 VMULPD %YMM15,%YMM13,%YMM10 |
(271) 0x43ecac VFNMADD213PD (%R13,%RBX,8),%YMM10,%YMM12 |
(271) 0x43ecb3 MOV 0x198(%RSP),%RBX |
(271) 0x43ecbb VMOVUPD %YMM12,(%R12,%R11,8) |
(271) 0x43ecc1 MOV 0x188(%RSP),%R11D |
(271) 0x43ecc9 VMULPD (%R15),%YMM11,%YMM14 |
(271) 0x43ecce MOV 0x108(%RSP),%R15 |
(271) 0x43ecd6 VMOVUPD %YMM14,(%R15,%RBX,8) |
(271) 0x43ecdc TEST $0x3,%R11B |
(271) 0x43ece0 JE 43ef2e |
(271) 0x43ece6 AND $-0x4,%R11D |
(271) 0x43ecea ADD %R11D,0x1b8(%RSP) |
(271) 0x43ecf2 ADD %R11D,0x190(%RSP) |
(271) 0x43ecfa MOV 0x1a8(%RSP),%RDX |
(271) 0x43ed02 MOVSXD 0x190(%RSP),%R9 |
(271) 0x43ed0a MOV 0x90(%RSP),%R14 |
(271) 0x43ed12 MOV 0x88(%RSP),%RDI |
(271) 0x43ed1a MOV 0x98(%RSP),%RAX |
(271) 0x43ed22 MOV 0xb8(%RSP),%RCX |
(271) 0x43ed2a MOV %R9,0x198(%RSP) |
(271) 0x43ed32 MOV %R9,%R10 |
(271) 0x43ed35 MOV 0x1b0(%RSP),%R12 |
(271) 0x43ed3d MOV 0xc0(%RSP),%R13 |
(271) 0x43ed45 LEA (%R14,%RDI,8),%R9 |
(271) 0x43ed49 MOV 0xa0(%RSP),%RSI |
(271) 0x43ed51 MOV 0xa8(%RSP),%RBX |
(271) 0x43ed59 LEA (%RDX,%RAX,8),%R8 |
(271) 0x43ed5d LEA (%RDX,%RCX,8),%RDI |
(271) 0x43ed61 MOV 0xc8(%RSP),%R11 |
(271) 0x43ed69 MOV 0xd0(%RSP),%RDX |
(271) 0x43ed71 MOV 0xd8(%RSP),%RAX |
(271) 0x43ed79 LEA (%R13,%RSI,8),%R15 |
(271) 0x43ed7e LEA (%R12,%RBX,8),%RSI |
(271) 0x43ed82 MOV 0xe0(%RSP),%RBX |
(271) 0x43ed8a LEA (%R13,%R11,8),%R14 |
(271) 0x43ed8f LEA (%R12,%RDX,8),%RCX |
(271) 0x43ed93 MOV 0xb0(%RSP),%R13 |
(271) 0x43ed9b MOV 0xe8(%RSP),%R12 |
(271) 0x43eda3 MOV 0xf8(%RSP),%R11 |
(271) 0x43edab MOV 0xf0(%RSP),%RDX |
(271) 0x43edb3 LEA (%RAX,%R13,8),%R13 |
(271) 0x43edb7 LEA (%R12,%RBX,8),%R12 |
(271) 0x43edbb MOV 0x138(%RSP),%RAX |
(271) 0x43edc3 MOV 0x100(%RSP),%RBX |
(271) 0x43edcb LEA (%R11,%RDX,8),%RDX |
(271) 0x43edcf LEA (%RAX,%RBX,8),%R11 |
(271) 0x43edd3 MOV 0x128(%RSP),%RAX |
(271) 0x43eddb MOV 0x130(%RSP),%RBX |
(271) 0x43ede3 MOV %R11,0x1b0(%RSP) |
(271) 0x43edeb LEA (%RAX,%RBX,8),%R11 |
(271) 0x43edef MOV 0x118(%RSP),%RAX |
(271) 0x43edf7 MOV 0x120(%RSP),%RBX |
(271) 0x43edff MOV %R11,0x1a8(%RSP) |
(271) 0x43ee07 LEA (%RAX,%RBX,8),%R11 |
(271) 0x43ee0b MOV 0x108(%RSP),%RAX |
(271) 0x43ee13 MOV 0x110(%RSP),%RBX |
(271) 0x43ee1b LEA (%RAX,%RBX,8),%RBX |
(271) 0x43ee1f MOV 0x1b8(%RSP),%EAX |
(271) 0x43ee26 SUB %R10D,%EAX |
(271) 0x43ee29 MOV %EAX,0x1a0(%RSP) |
(271) 0x43ee30 MOV 0x198(%RSP),%RAX |
(271) 0x43ee38 MOV %R11,0x198(%RSP) |
(272) 0x43ee40 VMOVSD (%RSI,%RAX,8),%XMM0 |
(272) 0x43ee45 VMOVSD (%R15,%RAX,8),%XMM9 |
(272) 0x43ee4b VMOVSD 0x8(%R9,%RAX,8),%XMM15 |
(272) 0x43ee52 VMOVSD (%R8,%RAX,8),%XMM12 |
(272) 0x43ee58 VADDSD 0x8(%RSI,%RAX,8),%XMM0,%XMM8 |
(272) 0x43ee5e VADDSD %XMM9,%XMM9,%XMM7 |
(272) 0x43ee63 VMOVSD (%RCX,%RAX,8),%XMM0 |
(272) 0x43ee68 VMOVSD (%R14,%RAX,8),%XMM9 |
(272) 0x43ee6e VMOVSD 0x8(%R8,%RAX,8),%XMM14 |
(272) 0x43ee75 VADDSD %XMM15,%XMM15,%XMM10 |
(272) 0x43ee7a VADDSD (%RDI,%RAX,8),%XMM12,%XMM11 |
(272) 0x43ee7f MOV 0x1b0(%RSP),%R10 |
(272) 0x43ee87 VMULSD %XMM7,%XMM8,%XMM13 |
(272) 0x43ee8b VADDSD 0x8(%RCX,%RAX,8),%XMM0,%XMM8 |
(272) 0x43ee91 VADDSD %XMM9,%XMM9,%XMM7 |
(272) 0x43ee96 MOV 0x1a8(%RSP),%R11 |
(272) 0x43ee9e VADDSD 0x8(%RDI,%RAX,8),%XMM14,%XMM12 |
(272) 0x43eea4 VMOVSD (%R9,%RAX,8),%XMM14 |
(272) 0x43eeaa VMULSD %XMM7,%XMM8,%XMM15 |
(272) 0x43eeae VMOVSD (%R12,%RAX,8),%XMM8 |
(272) 0x43eeb4 VADDSD (%R10,%RAX,8),%XMM8,%XMM7 |
(272) 0x43eeba MOV 0x198(%RSP),%R10 |
(272) 0x43eec2 VFMADD132SD %XMM12,%XMM15,%XMM10 |
(272) 0x43eec7 VADDSD %XMM14,%XMM14,%XMM12 |
(272) 0x43eecc VDIVSD (%RDX,%RAX,8),%XMM7,%XMM15 |
(272) 0x43eed1 VFMADD132SD %XMM11,%XMM13,%XMM12 |
(272) 0x43eed6 VSUBSD %XMM12,%XMM10,%XMM11 |
(272) 0x43eedb VMOVSD (%R13,%RAX,8),%XMM10 |
(272) 0x43eee2 VDIVSD %XMM10,%XMM3,%XMM14 |
(272) 0x43eee7 VMULSD %XMM2,%XMM11,%XMM13 |
(272) 0x43eeeb VADDSD %XMM10,%XMM13,%XMM0 |
(272) 0x43eef0 VDIVSD %XMM0,%XMM10,%XMM9 |
(272) 0x43eef4 VMULSD %XMM14,%XMM15,%XMM12 |
(272) 0x43eef9 VFNMADD213SD (%R11,%RAX,8),%XMM12,%XMM13 |
(272) 0x43eeff MOV 0x1bc(%RSP),%R11D |
(272) 0x43ef07 VMOVSD %XMM13,(%R10,%RAX,8) |
(272) 0x43ef0d MOV 0x1a0(%RSP),%R10D |
(272) 0x43ef15 VMULSD (%RDX,%RAX,8),%XMM9,%XMM11 |
(272) 0x43ef1a VMOVSD %XMM11,(%RBX,%RAX,8) |
(272) 0x43ef1f INC %RAX |
(272) 0x43ef22 ADD %EAX,%R10D |
(272) 0x43ef25 CMP %R11D,%R10D |
(272) 0x43ef28 JB 43ee40 |
(271) 0x43ef2e MOV 0x1bc(%RSP),%R9D |
(271) 0x43ef36 MOV %R9D,0x1b8(%RSP) |
(271) 0x43ef3e INCQ 0x140(%RSP) |
(271) 0x43ef46 MOV 0x140(%RSP),%R8 |
(271) 0x43ef4e ADD $0,%R8D |
(271) 0x43ef52 CMP %R8D,0x80(%RSP) |
(271) 0x43ef5a JLE 43ef88 |
(271) 0x43ef5c MOV 0x78(%RSP),%ECX |
(271) 0x43ef60 MOV 0x1b8(%RSP),%EDI |
(271) 0x43ef67 MOV 0x7c(%RSP),%R15D |
(271) 0x43ef6c MOV 0x84(%RSP),%EAX |
(271) 0x43ef73 SUB %EDI,%ECX |
(271) 0x43ef75 MOV %R15D,0x148(%RSP) |
(271) 0x43ef7d JMP 43e5d0 |
0x43ef82 NOPW (%RAX,%RAX,1) |
0x43ef88 VZEROUPPER |
0x43ef8b LEA -0x28(%RBP),%RSP |
0x43ef8f POP %RBX |
0x43ef90 POP %R12 |
0x43ef92 POP %R13 |
0x43ef94 POP %R14 |
0x43ef96 POP %R15 |
0x43ef98 POP %RBP |
0x43ef99 RET |
0x43ef9a NOPW (%RAX,%RAX,1) |
(271) 0x43efa0 MOV 0x148(%RSP),%R13D |
(271) 0x43efa8 XOR %EDX,%EDX |
(271) 0x43efaa MOV %R13D,0x190(%RSP) |
(271) 0x43efb2 JMP 43ea89 |
0x43efb7 INC %ECX |
0x43efb9 XOR %EDX,%EDX |
0x43efbb JMP 43e4f1 |
Path / |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDI),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43efb7 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20eff(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2004b(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43e4f1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDI),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43efb7 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43ef8b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20eff(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2004b(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43e4f1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0– | 4.99 | 1.67 |
▼Loop 271 - PdV.cpp:50-63 - exec– | 0 | 0 |
○Loop 273 - PdV.cpp:51-63 - exec | 4.98 | 1.66 |
○Loop 272 - PdV.cpp:55-63 - exec | 0 | 0 |