Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.41% |
---|
Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.41% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
0x43ce20 PUSH %RBP |
0x43ce21 MOV %RSP,%RBP |
0x43ce24 PUSH %R15 |
0x43ce26 PUSH %R14 |
0x43ce28 PUSH %R13 |
0x43ce2a PUSH %R12 |
0x43ce2c PUSH %RBX |
0x43ce2d MOV %RDI,%RBX |
0x43ce30 AND $-0x40,%RSP |
0x43ce34 SUB $0x1c0,%RSP |
0x43ce3b MOV 0x68(%RDI),%EAX |
0x43ce3e MOV 0x6c(%RDI),%EDX |
0x43ce41 MOV 0x60(%RDI),%ESI |
0x43ce44 MOV 0x64(%RBX),%ECX |
0x43ce47 ADD $0x2,%EDX |
0x43ce4a LEA 0x1(%RAX),%R15D |
0x43ce4e LEA 0x1(%RSI),%EDI |
0x43ce51 MOV %EDX,0x80(%RSP) |
0x43ce58 MOV %EDI,0x7c(%RSP) |
0x43ce5c CMP %EDX,%R15D |
0x43ce5f JGE 43d94b |
0x43ce65 MOV %EDX,%R13D |
0x43ce68 LEA 0x2(%RCX),%R14D |
0x43ce6c SUB %R15D,%R13D |
0x43ce6f CMP %R14D,%EDI |
0x43ce72 JGE 43d94b |
0x43ce78 MOV %R14D,%R8D |
0x43ce7b SUB %EDI,%R8D |
0x43ce7e MOV %R8D,0x84(%RSP) |
0x43ce86 CALL 4046c0 <omp_get_num_threads@plt> |
0x43ce8b MOV %EAX,%R12D |
0x43ce8e CALL 4045b0 <omp_get_thread_num@plt> |
0x43ce93 XOR %EDX,%EDX |
0x43ce95 MOV %EAX,%R9D |
0x43ce98 MOV 0x84(%RSP),%EAX |
0x43ce9f IMUL %R13D,%EAX |
0x43cea3 DIV %R12D |
0x43cea6 MOV %EAX,%ECX |
0x43cea8 CMP %EDX,%R9D |
0x43ceab JB 43d977 |
0x43ceb1 IMUL %ECX,%R9D |
0x43ceb5 LEA (%R9,%RDX,1),%EAX |
0x43ceb9 LEA (%RCX,%RAX,1),%R10D |
0x43cebd MOV %EAX,0x1b8(%RSP) |
0x43cec4 MOV %R10D,0x78(%RSP) |
0x43cec9 CMP %R10D,%EAX |
0x43cecc JAE 43d94b |
0x43ced2 XOR %EDX,%EDX |
0x43ced4 MOV 0x7c(%RSP),%R11D |
0x43ced9 VMOVSD 0x266ef(%RIP),%XMM2 |
0x43cee1 DIVL 0x84(%RSP) |
0x43cee8 MOV 0x8(%RBX),%RDI |
0x43ceec MOV 0x10(%RBX),%R13 |
0x43cef0 MOV 0x58(%RBX),%R8 |
0x43cef4 MOV 0x18(%RBX),%R12 |
0x43cef8 MOV 0x40(%RBX),%R9 |
0x43cefc MOV 0x20(%RBX),%R10 |
0x43cf00 MOV %RDI,0x70(%RSP) |
0x43cf05 VMOVSD 0x266cb(%RIP),%XMM3 |
0x43cf0d MOV %R13,0x60(%RSP) |
0x43cf12 MOV %R8,0x58(%RSP) |
0x43cf17 MOV %R12,0x50(%RSP) |
0x43cf1c MOV %R9,0x48(%RSP) |
0x43cf21 MOV %R10,0x40(%RSP) |
0x43cf26 VMULSD (%RBX),%XMM2,%XMM2 |
0x43cf2a VBROADCASTSD %XMM3,%YMM6 |
0x43cf2f VBROADCASTSD %XMM3,%ZMM4 |
0x43cf35 VBROADCASTSD %XMM2,%YMM1 |
0x43cf3a VBROADCASTSD %XMM2,%ZMM5 |
0x43cf40 LEA (%RDX,%R11,1),%ESI |
0x43cf44 LEA (%RAX,%R15,1),%R15D |
0x43cf48 MOV %R14D,%EAX |
0x43cf4b MOV 0x48(%RBX),%RDX |
0x43cf4f MOV 0x50(%RBX),%R14 |
0x43cf53 MOV 0x30(%RBX),%R11 |
0x43cf57 MOV %ESI,0x148(%RSP) |
0x43cf5e SUB %ESI,%EAX |
0x43cf60 MOV 0x38(%RBX),%RSI |
0x43cf64 MOV 0x28(%RBX),%RBX |
0x43cf68 MOVSXD %R15D,%R15 |
0x43cf6b MOV %RDX,0x38(%RSP) |
0x43cf70 MOV %R14,0x68(%RSP) |
0x43cf75 MOV %R11,0x30(%RSP) |
0x43cf7a MOV %RSI,0x28(%RSP) |
0x43cf7f MOV %RBX,0x20(%RSP) |
0x43cf84 MOV %R15,0x140(%RSP) |
0x43cf8c NOPL (%RAX) |
(223) 0x43cf90 CMP %EAX,%ECX |
(223) 0x43cf92 CMOVBE %ECX,%EAX |
(223) 0x43cf95 MOV 0x1b8(%RSP),%ECX |
(223) 0x43cf9c MOV %EAX,0x14c(%RSP) |
(223) 0x43cfa3 ADD %ECX,%EAX |
(223) 0x43cfa5 MOV %EAX,0x1bc(%RSP) |
(223) 0x43cfac CMP %EAX,%ECX |
(223) 0x43cfae JAE 43d8fe |
(223) 0x43cfb4 MOV 0x68(%RSP),%R14 |
(223) 0x43cfb9 MOV 0x140(%RSP),%RDI |
(223) 0x43cfc1 MOV 0x70(%RSP),%RAX |
(223) 0x43cfc6 MOV 0x58(%RSP),%R11 |
(223) 0x43cfcb MOV (%R14),%R8 |
(223) 0x43cfce MOV %RDI,%RCX |
(223) 0x43cfd1 MOV 0x60(%RSP),%R9 |
(223) 0x43cfd6 MOV %RDI,%RBX |
(223) 0x43cfd9 MOV 0x10(%RAX),%R13 |
(223) 0x43cfdd MOV (%RAX),%RSI |
(223) 0x43cfe0 IMUL %R8,%RCX |
(223) 0x43cfe4 MOV (%R11),%RAX |
(223) 0x43cfe7 MOV 0x10(%R11),%R15 |
(223) 0x43cfeb MOV %RDI,%R11 |
(223) 0x43cfee MOV (%R9),%RDX |
(223) 0x43cff1 MOV 0x10(%R9),%R10 |
(223) 0x43cff5 IMUL %RDI,%RSI |
(223) 0x43cff9 MOV %R13,0x90(%RSP) |
(223) 0x43d001 IMUL %RAX,%R11 |
(223) 0x43d005 MOV %R15,0x1b0(%RSP) |
(223) 0x43d00d MOV 0x48(%RSP),%R15 |
(223) 0x43d012 ADD %RCX,%R8 |
(223) 0x43d015 IMUL %RDX,%RBX |
(223) 0x43d019 MOV 0x10(%R14),%R12 |
(223) 0x43d01d MOV %R10,0xc0(%RSP) |
(223) 0x43d025 MOV %R8,0xb8(%RSP) |
(223) 0x43d02d MOV 0x50(%RSP),%R8 |
(223) 0x43d032 LEA (%RAX,%R11,1),%R14 |
(223) 0x43d036 MOV %RDI,%RAX |
(223) 0x43d039 MOV 0x10(%R15),%R10 |
(223) 0x43d03d MOV %RSI,0x88(%RSP) |
(223) 0x43d045 IMUL (%R8),%RDI |
(223) 0x43d049 MOV 0x10(%R8),%R9 |
(223) 0x43d04d ADD %RBX,%RDX |
(223) 0x43d050 MOV %R14,0xd0(%RSP) |
(223) 0x43d058 MOV %R10,0xe8(%RSP) |
(223) 0x43d060 MOV %R9,0xd8(%RSP) |
(223) 0x43d068 MOV %R12,0x1a8(%RSP) |
(223) 0x43d070 MOV %RCX,0x98(%RSP) |
(223) 0x43d078 MOV %RBX,0xa0(%RSP) |
(223) 0x43d080 MOV %R11,0xa8(%RSP) |
(223) 0x43d088 MOV %RDX,0xc8(%RSP) |
(223) 0x43d090 MOV %RDI,0xb0(%RSP) |
(223) 0x43d098 MOV (%R15),%RDX |
(223) 0x43d09b MOV 0x40(%RSP),%R9 |
(223) 0x43d0a0 MOV 0x38(%RSP),%R15 |
(223) 0x43d0a5 IMUL %RAX,%RDX |
(223) 0x43d0a9 MOV (%R9),%R8 |
(223) 0x43d0ac MOV 0x10(%R9),%R14 |
(223) 0x43d0b0 MOV 0x10(%R15),%R10 |
(223) 0x43d0b4 IMUL %RAX,%R8 |
(223) 0x43d0b8 MOV %R14,0xf8(%RSP) |
(223) 0x43d0c0 MOV 0x30(%RSP),%R14 |
(223) 0x43d0c5 MOV %RDX,0xe0(%RSP) |
(223) 0x43d0cd MOV (%R15),%RDX |
(223) 0x43d0d0 MOV 0x28(%RSP),%R15 |
(223) 0x43d0d5 MOV 0x10(%R14),%R9 |
(223) 0x43d0d9 MOV %R10,0x138(%RSP) |
(223) 0x43d0e1 MOV %R8,0xf0(%RSP) |
(223) 0x43d0e9 MOV (%R14),%R8 |
(223) 0x43d0ec IMUL %RAX,%RDX |
(223) 0x43d0f0 MOV 0x10(%R15),%R10 |
(223) 0x43d0f4 MOV (%R15),%R14 |
(223) 0x43d0f7 MOV %R9,0x128(%RSP) |
(223) 0x43d0ff IMUL %RAX,%R8 |
(223) 0x43d103 MOV 0x20(%RSP),%R15 |
(223) 0x43d108 IMUL %RAX,%R14 |
(223) 0x43d10c MOV %R10,0x118(%RSP) |
(223) 0x43d114 MOV 0x10(%R15),%R9 |
(223) 0x43d118 MOV %RDX,0x100(%RSP) |
(223) 0x43d120 MOV %R8,0x130(%RSP) |
(223) 0x43d128 MOV (%R15),%R8 |
(223) 0x43d12b MOV %R14,0x120(%RSP) |
(223) 0x43d133 IMUL %RAX,%R8 |
(223) 0x43d137 MOV 0x14c(%RSP),%EAX |
(223) 0x43d13e MOV %R9,0x108(%RSP) |
(223) 0x43d146 LEA -0x1(%RAX),%R10D |
(223) 0x43d14a MOV %R8,0x110(%RSP) |
(223) 0x43d152 CMP $0x6,%R10D |
(223) 0x43d156 JBE 43d960 |
(223) 0x43d15c MOVSXD 0x148(%RSP),%RAX |
(223) 0x43d164 MOV 0xb8(%RSP),%R9 |
(223) 0x43d16c MOV %R12,%R8 |
(223) 0x43d16f LEA (%R9,%RAX,1),%R10 |
(223) 0x43d173 ADD %RAX,%RSI |
(223) 0x43d176 LEA (%R11,%RAX,1),%R9 |
(223) 0x43d17a ADD %RAX,%RCX |
(223) 0x43d17d SAL $0x3,%RSI |
(223) 0x43d181 SAL $0x3,%R10 |
(223) 0x43d185 LEA (%R13,%RSI,1),%R15 |
(223) 0x43d18a LEA 0x8(%R13,%RSI,1),%R13 |
(223) 0x43d18f SAL $0x3,%R9 |
(223) 0x43d193 SAL $0x3,%RCX |
(223) 0x43d197 LEA 0x8(%R8,%R10,1),%RSI |
(223) 0x43d19c LEA (%R12,%R10,1),%RDX |
(223) 0x43d1a0 MOV 0xc8(%RSP),%R8 |
(223) 0x43d1a8 MOV 0xc0(%RSP),%R10 |
(223) 0x43d1b0 MOV %RSI,0x180(%RSP) |
(223) 0x43d1b8 MOV 0x1b0(%RSP),%RSI |
(223) 0x43d1c0 LEA (%R12,%RCX,1),%R14 |
(223) 0x43d1c4 LEA 0x8(%R12,%RCX,1),%R12 |
(223) 0x43d1c9 MOV %RDX,0x158(%RSP) |
(223) 0x43d1d1 LEA (%RBX,%RAX,1),%RCX |
(223) 0x43d1d5 LEA 0x8(%RSI,%R9,1),%RDX |
(223) 0x43d1da LEA (%RSI,%R9,1),%R11 |
(223) 0x43d1de MOV 0xd0(%RSP),%R9 |
(223) 0x43d1e6 MOV %RDX,0x188(%RSP) |
(223) 0x43d1ee LEA (%R10,%RCX,8),%RBX |
(223) 0x43d1f2 LEA (%R8,%RAX,1),%RCX |
(223) 0x43d1f6 LEA (%R9,%RAX,1),%RDX |
(223) 0x43d1fa LEA (%R10,%RCX,8),%R10 |
(223) 0x43d1fe MOV 0xe0(%RSP),%RCX |
(223) 0x43d206 SAL $0x3,%RDX |
(223) 0x43d20a LEA (%RDI,%RAX,1),%R8 |
(223) 0x43d20e MOV 0xd8(%RSP),%RDI |
(223) 0x43d216 LEA (%RSI,%RDX,1),%R9 |
(223) 0x43d21a LEA 0x8(%RSI,%RDX,1),%RSI |
(223) 0x43d21f MOV 0xe8(%RSP),%RDX |
(223) 0x43d227 ADD %RAX,%RCX |
(223) 0x43d22a MOV %RSI,0x190(%RSP) |
(223) 0x43d232 MOV 0xf0(%RSP),%RSI |
(223) 0x43d23a LEA (%RDI,%R8,8),%R8 |
(223) 0x43d23e LEA (%RDX,%RCX,8),%RDI |
(223) 0x43d242 MOV 0xf8(%RSP),%RCX |
(223) 0x43d24a LEA (%RSI,%RAX,1),%RDX |
(223) 0x43d24e MOV 0x100(%RSP),%RSI |
(223) 0x43d256 LEA (%RCX,%RDX,8),%RDX |
(223) 0x43d25a LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d25e MOV 0x138(%RSP),%RSI |
(223) 0x43d266 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d26a MOV 0x130(%RSP),%RSI |
(223) 0x43d272 MOV %RCX,0x160(%RSP) |
(223) 0x43d27a LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d27e MOV 0x128(%RSP),%RSI |
(223) 0x43d286 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d28a MOV 0x120(%RSP),%RSI |
(223) 0x43d292 MOV %RCX,0x198(%RSP) |
(223) 0x43d29a LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d29e MOV 0x118(%RSP),%RSI |
(223) 0x43d2a6 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d2aa MOV 0x110(%RSP),%RSI |
(223) 0x43d2b2 MOV %RCX,0x1a0(%RSP) |
(223) 0x43d2ba MOV 0x108(%RSP),%RCX |
(223) 0x43d2c2 ADD %RSI,%RAX |
(223) 0x43d2c5 LEA (%RCX,%RAX,8),%RSI |
(223) 0x43d2c9 MOV 0x14c(%RSP),%ECX |
(223) 0x43d2d0 XOR %EAX,%EAX |
(223) 0x43d2d2 SHR $0x3,%ECX |
(223) 0x43d2d5 SAL $0x6,%RCX |
(223) 0x43d2d9 MOV %RCX,0x150(%RSP) |
(223) 0x43d2e1 NOPL (%RAX) |
(225) 0x43d2e8 MOV 0x158(%RSP),%RCX |
(225) 0x43d2f0 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(225) 0x43d2f7 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(225) 0x43d2ff VMOVUPD (%RCX,%RAX,1),%ZMM7 |
(225) 0x43d306 MOV 0x188(%RSP),%RCX |
(225) 0x43d30e VADDPD %ZMM9,%ZMM9,%ZMM11 |
(225) 0x43d314 VMOVUPD (%R10,%RAX,1),%ZMM9 |
(225) 0x43d31b VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(225) 0x43d322 MOV 0x180(%RSP),%RCX |
(225) 0x43d32a VADDPD (%R14,%RAX,1),%ZMM7,%ZMM10 |
(225) 0x43d331 VADDPD %ZMM12,%ZMM12,%ZMM7 |
(225) 0x43d337 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(225) 0x43d33e MOV 0x190(%RSP),%RCX |
(225) 0x43d346 VADDPD (%R11,%RAX,1),%ZMM0,%ZMM8 |
(225) 0x43d34d VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(225) 0x43d354 VADDPD (%R12,%RAX,1),%ZMM13,%ZMM15 |
(225) 0x43d35b VMOVUPD (%R15,%RAX,1),%ZMM13 |
(225) 0x43d362 MOV 0x160(%RSP),%RCX |
(225) 0x43d36a VMULPD %ZMM11,%ZMM8,%ZMM14 |
(225) 0x43d370 VADDPD (%R9,%RAX,1),%ZMM0,%ZMM8 |
(225) 0x43d377 VADDPD %ZMM9,%ZMM9,%ZMM11 |
(225) 0x43d37d VMOVUPD (%RCX,%RAX,1),%ZMM9 |
(225) 0x43d384 MOV 0x198(%RSP),%RCX |
(225) 0x43d38c VMULPD %ZMM11,%ZMM8,%ZMM12 |
(225) 0x43d392 VADDPD (%RDI,%RAX,1),%ZMM9,%ZMM11 |
(225) 0x43d399 VMOVUPD (%R8,%RAX,1),%ZMM8 |
(225) 0x43d3a0 VFMADD132PD %ZMM15,%ZMM12,%ZMM7 |
(225) 0x43d3a6 VADDPD %ZMM13,%ZMM13,%ZMM15 |
(225) 0x43d3ac VDIVPD (%RDX,%RAX,1),%ZMM11,%ZMM12 |
(225) 0x43d3b3 VDIVPD %ZMM8,%ZMM4,%ZMM13 |
(225) 0x43d3b9 VFMADD231PD %ZMM10,%ZMM15,%ZMM14 |
(225) 0x43d3bf VMULPD %ZMM13,%ZMM12,%ZMM15 |
(225) 0x43d3c5 VSUBPD %ZMM14,%ZMM7,%ZMM10 |
(225) 0x43d3cb VMULPD %ZMM5,%ZMM10,%ZMM0 |
(225) 0x43d3d1 VADDPD %ZMM0,%ZMM8,%ZMM14 |
(225) 0x43d3d7 VFNMADD213PD (%RCX,%RAX,1),%ZMM15,%ZMM0 |
(225) 0x43d3de MOV 0x1a0(%RSP),%RCX |
(225) 0x43d3e6 VDIVPD %ZMM14,%ZMM8,%ZMM7 |
(225) 0x43d3ec VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(225) 0x43d3f3 MOV 0x150(%RSP),%RCX |
(225) 0x43d3fb VMULPD (%RDX,%RAX,1),%ZMM7,%ZMM10 |
(225) 0x43d402 VMOVUPD %ZMM10,(%RSI,%RAX,1) |
(225) 0x43d409 ADD $0x40,%RAX |
(225) 0x43d40d CMP %RCX,%RAX |
(225) 0x43d410 JNE 43d2e8 |
(223) 0x43d416 MOV 0x14c(%RSP),%R15D |
(223) 0x43d41e MOV 0x148(%RSP),%R14D |
(223) 0x43d426 MOV %R15D,%EDX |
(223) 0x43d429 AND $-0x8,%EDX |
(223) 0x43d42c ADD %EDX,%R14D |
(223) 0x43d42f ADD %EDX,0x1b8(%RSP) |
(223) 0x43d436 MOV %R14D,0x190(%RSP) |
(223) 0x43d43e TEST $0x7,%R15B |
(223) 0x43d442 JE 43d8ee |
(223) 0x43d448 MOV 0x14c(%RSP),%R13D |
(223) 0x43d450 SUB %EDX,%R13D |
(223) 0x43d453 MOV %R13D,0x188(%RSP) |
(223) 0x43d45b DEC %R13D |
(223) 0x43d45e CMP $0x2,%R13D |
(223) 0x43d462 JBE 43d6b9 |
(223) 0x43d468 MOVSXD 0x148(%RSP),%RAX |
(223) 0x43d470 MOV 0xa8(%RSP),%R11 |
(223) 0x43d478 MOV 0xd0(%RSP),%R10 |
(223) 0x43d480 MOV 0xb0(%RSP),%R15 |
(223) 0x43d488 LEA (%R11,%RAX,1),%R9 |
(223) 0x43d48c MOV 0xf0(%RSP),%R11 |
(223) 0x43d494 MOV 0x88(%RSP),%R12 |
(223) 0x43d49c MOV 0xe0(%RSP),%R14 |
(223) 0x43d4a4 LEA (%R10,%RAX,1),%RSI |
(223) 0x43d4a8 LEA (%R15,%RAX,1),%R13 |
(223) 0x43d4ac MOV 0xf8(%RSP),%R15 |
(223) 0x43d4b4 LEA (%R11,%RAX,1),%R10 |
(223) 0x43d4b8 MOV 0xb8(%RSP),%RBX |
(223) 0x43d4c0 LEA (%R12,%RAX,1),%RCX |
(223) 0x43d4c4 MOV 0x98(%RSP),%RDI |
(223) 0x43d4cc ADD %RDX,%R10 |
(223) 0x43d4cf LEA (%R14,%RAX,1),%R12 |
(223) 0x43d4d3 MOV 0x100(%RSP),%R14 |
(223) 0x43d4db ADD %RDX,%R9 |
(223) 0x43d4de LEA (%R15,%R10,8),%R15 |
(223) 0x43d4e2 MOV 0x110(%RSP),%R10 |
(223) 0x43d4ea LEA (%RBX,%RAX,1),%R8 |
(223) 0x43d4ee ADD %RAX,%RDI |
(223) 0x43d4f1 LEA (%R12,%RDX,1),%RBX |
(223) 0x43d4f5 LEA (%R14,%RAX,1),%R12 |
(223) 0x43d4f9 ADD %RDX,%R8 |
(223) 0x43d4fc ADD %RDX,%RDI |
(223) 0x43d4ff LEA (%R10,%RAX,1),%R14 |
(223) 0x43d503 MOV %RBX,0x1a0(%RSP) |
(223) 0x43d50b MOV 0x120(%RSP),%R11 |
(223) 0x43d513 ADD %RDX,%RCX |
(223) 0x43d516 LEA (%R14,%RDX,1),%R10 |
(223) 0x43d51a MOV 0x1a8(%RSP),%R14 |
(223) 0x43d522 MOV 0x130(%RSP),%RBX |
(223) 0x43d52a ADD %RDX,%RSI |
(223) 0x43d52d MOV %R10,0x198(%RSP) |
(223) 0x43d535 MOV 0xa0(%RSP),%R10 |
(223) 0x43d53d ADD %RAX,%R11 |
(223) 0x43d540 ADD %RDX,%R13 |
(223) 0x43d543 VMOVUPD (%R14,%R8,8),%YMM0 |
(223) 0x43d549 LEA (%RBX,%RAX,1),%RBX |
(223) 0x43d54d ADD %RDX,%R12 |
(223) 0x43d550 ADD %RDX,%R11 |
(223) 0x43d553 ADD %RDX,%RBX |
(223) 0x43d556 VADDPD (%R14,%RDI,8),%YMM0,%YMM14 |
(223) 0x43d55c LEA (%R10,%RAX,1),%R14 |
(223) 0x43d560 MOV 0xc0(%RSP),%R10 |
(223) 0x43d568 ADD %RDX,%R14 |
(223) 0x43d56b VMOVUPD (%R10,%R14,8),%YMM8 |
(223) 0x43d571 MOV 0x1b0(%RSP),%R14 |
(223) 0x43d579 VMOVUPD (%R14,%R9,8),%YMM7 |
(223) 0x43d57f VADDPD %YMM8,%YMM8,%YMM9 |
(223) 0x43d584 VADDPD 0x8(%R14,%R9,8),%YMM7,%YMM11 |
(223) 0x43d58b MOV 0x1a8(%RSP),%R14 |
(223) 0x43d593 MOV 0x90(%RSP),%R9 |
(223) 0x43d59b VMOVUPD 0x8(%R14,%R8,8),%YMM15 |
(223) 0x43d5a2 VMOVUPD 0x8(%R9,%RCX,8),%YMM13 |
(223) 0x43d5a9 VMULPD %YMM11,%YMM9,%YMM12 |
(223) 0x43d5ae VADDPD 0x8(%R14,%RDI,8),%YMM15,%YMM10 |
(223) 0x43d5b5 MOV 0xc8(%RSP),%RDI |
(223) 0x43d5bd VADDPD %YMM13,%YMM13,%YMM0 |
(223) 0x43d5c2 ADD %RDI,%RAX |
(223) 0x43d5c5 ADD %RDX,%RAX |
(223) 0x43d5c8 VMOVUPD (%R10,%RAX,8),%YMM8 |
(223) 0x43d5ce MOV 0x1b0(%RSP),%RDX |
(223) 0x43d5d6 VMOVUPD (%R9,%RCX,8),%YMM15 |
(223) 0x43d5dc MOV 0xd8(%RSP),%RAX |
(223) 0x43d5e4 VMOVUPD (%RDX,%RSI,8),%YMM7 |
(223) 0x43d5e9 VADDPD %YMM8,%YMM8,%YMM9 |
(223) 0x43d5ee MOV 0x138(%RSP),%RCX |
(223) 0x43d5f6 MOV 0xe8(%RSP),%R8 |
(223) 0x43d5fe VADDPD 0x8(%RDX,%RSI,8),%YMM7,%YMM11 |
(223) 0x43d604 MOV 0x1a0(%RSP),%RSI |
(223) 0x43d60c VMOVAPD %YMM7,0x160(%RSP) |
(223) 0x43d615 VMULPD %YMM11,%YMM9,%YMM13 |
(223) 0x43d61a VMOVUPD (%RCX,%R12,8),%YMM9 |
(223) 0x43d620 MOV 0x118(%RSP),%R12 |
(223) 0x43d628 VADDPD (%R8,%RSI,8),%YMM9,%YMM7 |
(223) 0x43d62e VFMADD132PD %YMM10,%YMM13,%YMM0 |
(223) 0x43d633 VADDPD %YMM15,%YMM15,%YMM10 |
(223) 0x43d638 VDIVPD (%R15),%YMM7,%YMM13 |
(223) 0x43d63d VFMADD132PD %YMM14,%YMM12,%YMM10 |
(223) 0x43d642 VSUBPD %YMM10,%YMM0,%YMM14 |
(223) 0x43d647 VMOVUPD (%RAX,%R13,8),%YMM0 |
(223) 0x43d64d MOV 0x128(%RSP),%R13 |
(223) 0x43d655 VDIVPD %YMM0,%YMM6,%YMM15 |
(223) 0x43d659 VMULPD %YMM1,%YMM14,%YMM12 |
(223) 0x43d65d VADDPD %YMM0,%YMM12,%YMM8 |
(223) 0x43d661 VDIVPD %YMM8,%YMM0,%YMM11 |
(223) 0x43d666 VMULPD %YMM15,%YMM13,%YMM10 |
(223) 0x43d66b VFNMADD213PD (%R13,%RBX,8),%YMM10,%YMM12 |
(223) 0x43d672 MOV 0x198(%RSP),%RBX |
(223) 0x43d67a VMOVUPD %YMM12,(%R12,%R11,8) |
(223) 0x43d680 MOV 0x188(%RSP),%R11D |
(223) 0x43d688 VMULPD (%R15),%YMM11,%YMM14 |
(223) 0x43d68d MOV 0x108(%RSP),%R15 |
(223) 0x43d695 VMOVUPD %YMM14,(%R15,%RBX,8) |
(223) 0x43d69b TEST $0x3,%R11B |
(223) 0x43d69f JE 43d8ee |
(223) 0x43d6a5 AND $-0x4,%R11D |
(223) 0x43d6a9 ADD %R11D,0x1b8(%RSP) |
(223) 0x43d6b1 ADD %R11D,0x190(%RSP) |
(223) 0x43d6b9 MOV 0x1a8(%RSP),%RDX |
(223) 0x43d6c1 MOVSXD 0x190(%RSP),%R9 |
(223) 0x43d6c9 MOV 0x90(%RSP),%R14 |
(223) 0x43d6d1 MOV 0x88(%RSP),%RDI |
(223) 0x43d6d9 MOV 0x98(%RSP),%RAX |
(223) 0x43d6e1 MOV 0xb8(%RSP),%RCX |
(223) 0x43d6e9 MOV %R9,0x198(%RSP) |
(223) 0x43d6f1 MOV %R9,%R10 |
(223) 0x43d6f4 MOV 0x1b0(%RSP),%R12 |
(223) 0x43d6fc MOV 0xc0(%RSP),%R13 |
(223) 0x43d704 LEA (%R14,%RDI,8),%R9 |
(223) 0x43d708 MOV 0xa0(%RSP),%RSI |
(223) 0x43d710 MOV 0xa8(%RSP),%RBX |
(223) 0x43d718 LEA (%RDX,%RAX,8),%R8 |
(223) 0x43d71c LEA (%RDX,%RCX,8),%RDI |
(223) 0x43d720 MOV 0xc8(%RSP),%R11 |
(223) 0x43d728 MOV 0xd0(%RSP),%RDX |
(223) 0x43d730 MOV 0xd8(%RSP),%RAX |
(223) 0x43d738 LEA (%R13,%RSI,8),%R15 |
(223) 0x43d73d LEA (%R12,%RBX,8),%RSI |
(223) 0x43d741 MOV 0xe0(%RSP),%RBX |
(223) 0x43d749 LEA (%R13,%R11,8),%R14 |
(223) 0x43d74e LEA (%R12,%RDX,8),%RCX |
(223) 0x43d752 MOV 0xb0(%RSP),%R13 |
(223) 0x43d75a MOV 0xe8(%RSP),%R12 |
(223) 0x43d762 MOV 0xf8(%RSP),%R11 |
(223) 0x43d76a MOV 0xf0(%RSP),%RDX |
(223) 0x43d772 LEA (%RAX,%R13,8),%R13 |
(223) 0x43d776 LEA (%R12,%RBX,8),%R12 |
(223) 0x43d77a MOV 0x138(%RSP),%RAX |
(223) 0x43d782 MOV 0x100(%RSP),%RBX |
(223) 0x43d78a LEA (%R11,%RDX,8),%RDX |
(223) 0x43d78e LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d792 MOV 0x128(%RSP),%RAX |
(223) 0x43d79a MOV 0x130(%RSP),%RBX |
(223) 0x43d7a2 MOV %R11,0x1b0(%RSP) |
(223) 0x43d7aa LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d7ae MOV 0x118(%RSP),%RAX |
(223) 0x43d7b6 MOV 0x120(%RSP),%RBX |
(223) 0x43d7be MOV %R11,0x1a8(%RSP) |
(223) 0x43d7c6 LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d7ca MOV 0x108(%RSP),%RAX |
(223) 0x43d7d2 MOV 0x110(%RSP),%RBX |
(223) 0x43d7da LEA (%RAX,%RBX,8),%RBX |
(223) 0x43d7de MOV 0x1b8(%RSP),%EAX |
(223) 0x43d7e5 SUB %R10D,%EAX |
(223) 0x43d7e8 MOV %EAX,0x1a0(%RSP) |
(223) 0x43d7ef MOV 0x198(%RSP),%RAX |
(223) 0x43d7f7 MOV %R11,0x198(%RSP) |
(223) 0x43d7ff NOP |
(224) 0x43d800 VMOVSD 0x8(%RSI,%RAX,8),%XMM0 |
(224) 0x43d806 VMOVSD (%R15,%RAX,8),%XMM9 |
(224) 0x43d80c VMOVSD 0x8(%R9,%RAX,8),%XMM15 |
(224) 0x43d813 VMOVSD (%RDI,%RAX,8),%XMM12 |
(224) 0x43d818 VADDSD (%RSI,%RAX,8),%XMM0,%XMM8 |
(224) 0x43d81d VADDSD %XMM9,%XMM9,%XMM7 |
(224) 0x43d822 VMOVSD 0x8(%RCX,%RAX,8),%XMM0 |
(224) 0x43d828 VMOVSD (%R14,%RAX,8),%XMM9 |
(224) 0x43d82e VMOVSD 0x8(%RDI,%RAX,8),%XMM14 |
(224) 0x43d834 VADDSD %XMM15,%XMM15,%XMM10 |
(224) 0x43d839 VADDSD (%R8,%RAX,8),%XMM12,%XMM11 |
(224) 0x43d83f MOV 0x1b0(%RSP),%R10 |
(224) 0x43d847 VMULSD %XMM7,%XMM8,%XMM13 |
(224) 0x43d84b VADDSD (%RCX,%RAX,8),%XMM0,%XMM8 |
(224) 0x43d850 VADDSD %XMM9,%XMM9,%XMM7 |
(224) 0x43d855 MOV 0x1a8(%RSP),%R11 |
(224) 0x43d85d VADDSD 0x8(%R8,%RAX,8),%XMM14,%XMM12 |
(224) 0x43d864 VMOVSD (%R9,%RAX,8),%XMM14 |
(224) 0x43d86a VMULSD %XMM7,%XMM8,%XMM15 |
(224) 0x43d86e VMOVSD (%R12,%RAX,8),%XMM8 |
(224) 0x43d874 VADDSD (%R10,%RAX,8),%XMM8,%XMM7 |
(224) 0x43d87a MOV 0x198(%RSP),%R10 |
(224) 0x43d882 VFMADD132SD %XMM12,%XMM15,%XMM10 |
(224) 0x43d887 VADDSD %XMM14,%XMM14,%XMM12 |
(224) 0x43d88c VDIVSD (%RDX,%RAX,8),%XMM7,%XMM15 |
(224) 0x43d891 VFMADD132SD %XMM11,%XMM13,%XMM12 |
(224) 0x43d896 VSUBSD %XMM12,%XMM10,%XMM11 |
(224) 0x43d89b VMOVSD (%R13,%RAX,8),%XMM10 |
(224) 0x43d8a2 VDIVSD %XMM10,%XMM3,%XMM14 |
(224) 0x43d8a7 VMULSD %XMM2,%XMM11,%XMM13 |
(224) 0x43d8ab VADDSD %XMM13,%XMM10,%XMM0 |
(224) 0x43d8b0 VDIVSD %XMM0,%XMM10,%XMM9 |
(224) 0x43d8b4 VMULSD %XMM14,%XMM15,%XMM12 |
(224) 0x43d8b9 VFNMADD213SD (%R11,%RAX,8),%XMM12,%XMM13 |
(224) 0x43d8bf MOV 0x1bc(%RSP),%R11D |
(224) 0x43d8c7 VMOVSD %XMM13,(%R10,%RAX,8) |
(224) 0x43d8cd MOV 0x1a0(%RSP),%R10D |
(224) 0x43d8d5 VMULSD (%RDX,%RAX,8),%XMM9,%XMM11 |
(224) 0x43d8da VMOVSD %XMM11,(%RBX,%RAX,8) |
(224) 0x43d8df INC %RAX |
(224) 0x43d8e2 ADD %EAX,%R10D |
(224) 0x43d8e5 CMP %R11D,%R10D |
(224) 0x43d8e8 JB 43d800 |
(223) 0x43d8ee MOV 0x1bc(%RSP),%R9D |
(223) 0x43d8f6 MOV %R9D,0x1b8(%RSP) |
(223) 0x43d8fe INCQ 0x140(%RSP) |
(223) 0x43d906 MOV 0x140(%RSP),%R8 |
(223) 0x43d90e ADD $0,%R8D |
(223) 0x43d912 CMP %R8D,0x80(%RSP) |
(223) 0x43d91a JLE 43d948 |
(223) 0x43d91c MOV 0x78(%RSP),%ECX |
(223) 0x43d920 MOV 0x1b8(%RSP),%EDI |
(223) 0x43d927 MOV 0x7c(%RSP),%R15D |
(223) 0x43d92c MOV 0x84(%RSP),%EAX |
(223) 0x43d933 SUB %EDI,%ECX |
(223) 0x43d935 MOV %R15D,0x148(%RSP) |
(223) 0x43d93d JMP 43cf90 |
0x43d942 NOPW (%RAX,%RAX,1) |
0x43d948 VZEROUPPER |
0x43d94b LEA -0x28(%RBP),%RSP |
0x43d94f POP %RBX |
0x43d950 POP %R12 |
0x43d952 POP %R13 |
0x43d954 POP %R14 |
0x43d956 POP %R15 |
0x43d958 POP %RBP |
0x43d959 RET |
0x43d95a NOPW (%RAX,%RAX,1) |
(223) 0x43d960 MOV 0x148(%RSP),%R13D |
(223) 0x43d968 XOR %EDX,%EDX |
(223) 0x43d96a MOV %R13D,0x190(%RSP) |
(223) 0x43d972 JMP 43d448 |
0x43d977 INC %ECX |
0x43d979 XOR %EDX,%EDX |
0x43d97b JMP 43ceb1 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.21 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.79 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RSI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43d977 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x266ef(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x266cb(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43ceb1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RSI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43d977 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43d94b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x266ef(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x266cb(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43ceb1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D | 5.41 | 4.04 |
▼Loop 223 - PdV.cpp:51-63 - exec– | 0 | 0 |
○Loop 225 - PdV.cpp:51-63 - exec | 5.41 | 4.04 |
○Loop 224 - PdV.cpp:55-63 - exec | 0 | 0 |