Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 3.02% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 3.02% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 95 - 100 |
-------------------------------------------------------------------------------- |
95: #pragma omp parallel for simd collapse(2) |
96: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
97: for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { |
98: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
99: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
100: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i - 1, j + 0) + node_flux(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42e1b0 PUSH %RBP |
0x42e1b1 MOV %RSP,%RBP |
0x42e1b4 PUSH %R15 |
0x42e1b6 PUSH %R14 |
0x42e1b8 PUSH %R13 |
0x42e1ba PUSH %R12 |
0x42e1bc PUSH %RBX |
0x42e1bd AND $-0x40,%RSP |
0x42e1c1 ADD $-0x80,%RSP |
0x42e1c5 MOV 0x30(%RDI),%EAX |
0x42e1c8 MOV 0x34(%RDI),%EDX |
0x42e1cb MOV 0x28(%RDI),%ESI |
0x42e1ce MOV 0x2c(%RDI),%EBX |
0x42e1d1 ADD $0x3,%EDX |
0x42e1d4 LEA 0x1(%RAX),%R14D |
0x42e1d8 MOV %ESI,0x20(%RSP) |
0x42e1dc MOV %EDX,0x24(%RSP) |
0x42e1e0 CMP %EDX,%R14D |
0x42e1e3 JGE 42e963 |
0x42e1e9 LEA 0x4(%RBX),%R12D |
0x42e1ed MOV %EDX,%EBX |
0x42e1ef SUB %R14D,%EBX |
0x42e1f2 CMP %R12D,%ESI |
0x42e1f5 JGE 42e963 |
0x42e1fb MOV %R12D,%ECX |
0x42e1fe MOV %RDI,%R13 |
0x42e201 SUB %ESI,%ECX |
0x42e203 MOV %ECX,0x30(%RSP) |
0x42e207 CALL 404650 <omp_get_num_threads@plt> |
0x42e20c MOV %EAX,%R15D |
0x42e20f CALL 404540 <omp_get_thread_num@plt> |
0x42e214 XOR %EDX,%EDX |
0x42e216 MOV %EAX,%EDI |
0x42e218 MOV 0x30(%RSP),%EAX |
0x42e21c IMUL %EBX,%EAX |
0x42e21f DIV %R15D |
0x42e222 MOV %EAX,%R8D |
0x42e225 CMP %EDX,%EDI |
0x42e227 JB 42e98b |
0x42e22d IMUL %R8D,%EDI |
0x42e231 LEA (%RDI,%RDX,1),%EBX |
0x42e234 LEA (%R8,%RBX,1),%R9D |
0x42e238 MOV %R9D,0x1c(%RSP) |
0x42e23d CMP %R9D,%EBX |
0x42e240 JAE 42e963 |
0x42e246 MOV %EBX,%EAX |
0x42e248 XOR %EDX,%EDX |
0x42e24a MOV 0x20(%RSP),%R10D |
0x42e24f VMOVQ (%R13),%XMM21 |
0x42e256 DIVL 0x30(%RSP) |
0x42e25a VMOVQ 0x20(%R13),%XMM20 |
0x42e261 VMOVQ 0x10(%R13),%XMM19 |
0x42e268 MOV %R12D,%R11D |
0x42e26b VMOVQ 0x8(%R13),%XMM18 |
0x42e272 VMOVQ 0x18(%R13),%XMM17 |
0x42e279 VMOVSD 0x3253d(%RIP),%XMM22 |
0x42e283 VBROADCASTSD %XMM22,%YMM23 |
0x42e289 VBROADCASTSD %XMM22,%ZMM16 |
0x42e28f ADD %EDX,%R10D |
0x42e292 ADD %R14D,%EAX |
0x42e295 MOV %R8D,%EDX |
0x42e298 SUB %R10D,%R11D |
0x42e29b MOV %EAX,0x48(%RSP) |
0x42e29f CLTQ |
0x42e2a1 MOV %R10D,0x4c(%RSP) |
0x42e2a6 MOV %R11D,%ESI |
0x42e2a9 MOV %RAX,0x28(%RSP) |
0x42e2ae XCHG %AX,%AX |
(185) 0x42e2b0 CMP %ESI,%EDX |
(185) 0x42e2b2 CMOVA %ESI,%EDX |
(185) 0x42e2b5 LEA (%RBX,%RDX,1),%ESI |
(185) 0x42e2b8 MOV %ESI,0x34(%RSP) |
(185) 0x42e2bc CMP %ESI,%EBX |
(185) 0x42e2be JAE 42e933 |
(185) 0x42e2c4 MOV 0x48(%RSP),%R14D |
(185) 0x42e2c9 VMOVQ %XMM20,%RDI |
(185) 0x42e2cf MOV 0x28(%RSP),%RAX |
(185) 0x42e2d4 VMOVQ %XMM21,%R15 |
(185) 0x42e2da MOV (%RDI),%R10 |
(185) 0x42e2dd VMOVQ %XMM19,%R11 |
(185) 0x42e2e3 VMOVQ %XMM18,%RCX |
(185) 0x42e2e9 MOV 0x10(%R15),%R13 |
(185) 0x42e2ed MOV (%R15),%R15 |
(185) 0x42e2f0 VMOVQ 0x10(%R11),%XMM12 |
(185) 0x42e2f6 VMOVQ 0x10(%RCX),%XMM3 |
(185) 0x42e2fb LEA -0x1(%R14),%R12D |
(185) 0x42e2ff MOV 0x10(%RDI),%R14 |
(185) 0x42e303 MOV (%RCX),%RDI |
(185) 0x42e306 MOVSXD %R12D,%R9 |
(185) 0x42e309 MOV (%R11),%R12 |
(185) 0x42e30c MOV %R9,%R8 |
(185) 0x42e30f IMUL %R10,%R9 |
(185) 0x42e313 IMUL %RAX,%R10 |
(185) 0x42e317 IMUL %R15,%R8 |
(185) 0x42e31b IMUL %RAX,%R12 |
(185) 0x42e31f MOV %R9,0x60(%RSP) |
(185) 0x42e324 MOV %R10,0x78(%RSP) |
(185) 0x42e329 VMOVQ %XMM17,%R10 |
(185) 0x42e32f IMUL %RAX,%R15 |
(185) 0x42e333 IMUL %RAX,%RDI |
(185) 0x42e337 IMUL (%R10),%RAX |
(185) 0x42e33b VMOVQ 0x10(%R10),%XMM11 |
(185) 0x42e341 MOV %R8,0x58(%RSP) |
(185) 0x42e346 MOV %R12,0x40(%RSP) |
(185) 0x42e34b MOV %R15,0x68(%RSP) |
(185) 0x42e350 MOV %RDI,0x70(%RSP) |
(185) 0x42e355 MOV %RAX,0x50(%RSP) |
(185) 0x42e35a LEA -0x1(%RDX),%EAX |
(185) 0x42e35d CMP $0x6,%EAX |
(185) 0x42e360 JBE 42e980 |
(185) 0x42e366 MOVSXD 0x4c(%RSP),%RCX |
(185) 0x42e36b MOV 0x78(%RSP),%RDI |
(185) 0x42e370 ADD %RCX,%R9 |
(185) 0x42e373 LEA (%R15,%RCX,1),%RAX |
(185) 0x42e377 LEA (%RDI,%RCX,1),%RSI |
(185) 0x42e37b LEA (%R8,%RCX,1),%R10 |
(185) 0x42e37f SAL $0x3,%R9 |
(185) 0x42e383 VMOVQ %XMM3,%RDI |
(185) 0x42e388 SAL $0x3,%RAX |
(185) 0x42e38c SAL $0x3,%RSI |
(185) 0x42e390 LEA (%R14,%R9,1),%R11 |
(185) 0x42e394 LEA (%R13,%RAX,1),%R15 |
(185) 0x42e399 LEA (%R12,%RCX,1),%R12 |
(185) 0x42e39d VMOVQ %R11,%XMM8 |
(185) 0x42e3a2 VMOVQ %R15,%XMM9 |
(185) 0x42e3a7 LEA (%R14,%RSI,1),%R11 |
(185) 0x42e3ab LEA -0x8(%R14,%R9,1),%R15 |
(185) 0x42e3b0 LEA -0x8(%R13,%RAX,1),%R9 |
(185) 0x42e3b5 LEA -0x8(%R14,%RSI,1),%RAX |
(185) 0x42e3ba MOV 0x70(%RSP),%RSI |
(185) 0x42e3bf SAL $0x3,%R10 |
(185) 0x42e3c3 LEA (%R13,%R10,1),%R8 |
(185) 0x42e3c8 LEA -0x8(%R13,%R10,1),%R10 |
(185) 0x42e3cd ADD %RCX,%RSI |
(185) 0x42e3d0 VMOVQ %R10,%XMM2 |
(185) 0x42e3d5 VMOVQ %R8,%XMM7 |
(185) 0x42e3da SAL $0x3,%RSI |
(185) 0x42e3de VMOVQ %XMM12,%R8 |
(185) 0x42e3e3 LEA -0x8(%RDI,%RSI,1),%R10 |
(185) 0x42e3e8 ADD %RDI,%RSI |
(185) 0x42e3eb MOV 0x50(%RSP),%RDI |
(185) 0x42e3f0 LEA (%R8,%R12,8),%R12 |
(185) 0x42e3f4 MOV %EDX,%R8D |
(185) 0x42e3f7 VMOVQ %R10,%XMM1 |
(185) 0x42e3fc VMOVQ %XMM11,%R10 |
(185) 0x42e401 SHR $0x3,%R8D |
(185) 0x42e405 ADD %RDI,%RCX |
(185) 0x42e408 LEA (%R10,%RCX,8),%R10 |
(185) 0x42e40c MOV %R8,%RCX |
(185) 0x42e40f SAL $0x6,%RCX |
(185) 0x42e413 MOV %RCX,0x38(%RSP) |
(185) 0x42e418 XOR %ECX,%ECX |
(185) 0x42e41a AND $0x1,%R8D |
(185) 0x42e41e JE 42e4ab |
(185) 0x42e424 VMOVQ %XMM9,%RDI |
(185) 0x42e429 VMOVUPD (%R9),%ZMM5 |
(185) 0x42e42f VMULPD (%RAX),%ZMM5,%ZMM24 |
(185) 0x42e435 VMOVQ %XMM8,%R8 |
(185) 0x42e43a VMOVUPD (%RDI),%ZMM6 |
(185) 0x42e440 VMOVQ %XMM7,%RCX |
(185) 0x42e445 VMOVUPD (%R8),%ZMM4 |
(185) 0x42e44b VMULPD (%R11),%ZMM6,%ZMM0 |
(185) 0x42e451 VMOVQ %XMM2,%RDI |
(185) 0x42e456 VMOVQ %XMM1,%R8 |
(185) 0x42e45b VMOVUPD (%RDI),%ZMM10 |
(185) 0x42e461 VFMADD231PD (%RCX),%ZMM4,%ZMM0 |
(185) 0x42e467 VFMADD231PD (%R15),%ZMM10,%ZMM24 |
(185) 0x42e46d VADDPD %ZMM24,%ZMM0,%ZMM13 |
(185) 0x42e473 MOV 0x38(%RSP),%RDI |
(185) 0x42e478 MOV $0x40,%ECX |
(185) 0x42e47d VMULPD %ZMM16,%ZMM13,%ZMM14 |
(185) 0x42e483 VMOVUPD %ZMM14,(%R12) |
(185) 0x42e48a VMOVUPD (%RSI),%ZMM15 |
(185) 0x42e490 VSUBPD (%R8),%ZMM15,%ZMM25 |
(185) 0x42e496 VADDPD %ZMM14,%ZMM25,%ZMM6 |
(185) 0x42e49c VMOVUPD %ZMM6,(%R10) |
(185) 0x42e4a2 CMP %RDI,%RCX |
(185) 0x42e4a5 JE 42e5dc |
(185) 0x42e4ab VMOVQ %R13,%XMM10 |
(185) 0x42e4b0 MOV %EBX,0x18(%RSP) |
(185) 0x42e4b4 MOV %R10,%R13 |
(185) 0x42e4b7 MOV %EDX,0x14(%RSP) |
(185) 0x42e4bb MOV %RAX,%R10 |
(185) 0x42e4be VMOVQ %R14,%XMM4 |
(185) 0x42e4c3 MOV %RSI,%RAX |
(185) 0x42e4c6 VMOVQ %XMM8,%RDX |
(185) 0x42e4cb VMOVQ %XMM7,%RDI |
(185) 0x42e4d0 VMOVQ %XMM2,%RBX |
(185) 0x42e4d5 VMOVQ %XMM9,%RSI |
(186) 0x42e4da VMOVUPD (%RSI,%RCX,1),%ZMM7 |
(186) 0x42e4e1 VMOVUPD (%R9,%RCX,1),%ZMM2 |
(186) 0x42e4e8 VMOVUPD (%RDX,%RCX,1),%ZMM9 |
(186) 0x42e4ef VMOVQ %XMM1,%R14 |
(186) 0x42e4f4 VMULPD (%R11,%RCX,1),%ZMM7,%ZMM8 |
(186) 0x42e4fb VMULPD (%R10,%RCX,1),%ZMM2,%ZMM26 |
(186) 0x42e502 VMOVUPD (%RBX,%RCX,1),%ZMM0 |
(186) 0x42e509 VFMADD231PD (%RDI,%RCX,1),%ZMM9,%ZMM8 |
(186) 0x42e510 VFMADD231PD (%R15,%RCX,1),%ZMM0,%ZMM26 |
(186) 0x42e517 VADDPD %ZMM26,%ZMM8,%ZMM5 |
(186) 0x42e51d VMULPD %ZMM16,%ZMM5,%ZMM13 |
(186) 0x42e523 VMOVUPD %ZMM13,(%R12,%RCX,1) |
(186) 0x42e52a VMOVUPD (%RAX,%RCX,1),%ZMM14 |
(186) 0x42e531 VSUBPD (%R14,%RCX,1),%ZMM14,%ZMM27 |
(186) 0x42e538 VADDPD %ZMM13,%ZMM27,%ZMM15 |
(186) 0x42e53e VMOVUPD %ZMM15,(%R13,%RCX,1) |
(186) 0x42e546 VMOVUPD 0x40(%RSI,%RCX,1),%ZMM6 |
(186) 0x42e54e VMOVUPD 0x40(%R9,%RCX,1),%ZMM9 |
(186) 0x42e556 VMOVUPD 0x40(%RDX,%RCX,1),%ZMM7 |
(186) 0x42e55e VMULPD 0x40(%R11,%RCX,1),%ZMM6,%ZMM8 |
(186) 0x42e566 VMULPD 0x40(%R10,%RCX,1),%ZMM9,%ZMM28 |
(186) 0x42e56e VMOVUPD 0x40(%RBX,%RCX,1),%ZMM2 |
(186) 0x42e576 VFMADD231PD 0x40(%RDI,%RCX,1),%ZMM7,%ZMM8 |
(186) 0x42e57e VFMADD231PD 0x40(%R15,%RCX,1),%ZMM2,%ZMM28 |
(186) 0x42e586 VADDPD %ZMM28,%ZMM8,%ZMM0 |
(186) 0x42e58c VMULPD %ZMM16,%ZMM0,%ZMM5 |
(186) 0x42e592 VMOVUPD %ZMM5,0x40(%R12,%RCX,1) |
(186) 0x42e59a VMOVUPD 0x40(%RAX,%RCX,1),%ZMM13 |
(186) 0x42e5a2 VSUBPD 0x40(%R14,%RCX,1),%ZMM13,%ZMM29 |
(186) 0x42e5aa MOV 0x38(%RSP),%R14 |
(186) 0x42e5af VADDPD %ZMM5,%ZMM29,%ZMM14 |
(186) 0x42e5b5 VMOVUPD %ZMM14,0x40(%R13,%RCX,1) |
(186) 0x42e5bd SUB $-0x80,%RCX |
(186) 0x42e5c1 CMP %R14,%RCX |
(186) 0x42e5c4 JNE 42e4da |
(185) 0x42e5ca MOV 0x18(%RSP),%EBX |
(185) 0x42e5ce MOV 0x14(%RSP),%EDX |
(185) 0x42e5d2 VMOVQ %XMM4,%R14 |
(185) 0x42e5d7 VMOVQ %XMM10,%R13 |
(185) 0x42e5dc MOV 0x4c(%RSP),%R11D |
(185) 0x42e5e1 MOV %EDX,%ECX |
(185) 0x42e5e3 AND $-0x8,%ECX |
(185) 0x42e5e6 ADD %ECX,%EBX |
(185) 0x42e5e8 LEA (%RCX,%R11,1),%ESI |
(185) 0x42e5ec TEST $0x7,%DL |
(185) 0x42e5ef JE 42e92f |
(185) 0x42e5f5 SUB %ECX,%EDX |
(185) 0x42e5f7 LEA -0x1(%RDX),%R15D |
(185) 0x42e5fb CMP $0x2,%R15D |
(185) 0x42e5ff JBE 42e6de |
(185) 0x42e605 MOVSXD 0x4c(%RSP),%RAX |
(185) 0x42e60a MOV 0x78(%RSP),%R8 |
(185) 0x42e60f MOV 0x60(%RSP),%R12 |
(185) 0x42e614 MOV 0x68(%RSP),%RDI |
(185) 0x42e619 MOV 0x58(%RSP),%R9 |
(185) 0x42e61e MOV 0x40(%RSP),%R15 |
(185) 0x42e623 LEA (%R8,%RAX,1),%R8 |
(185) 0x42e627 LEA (%R12,%RAX,1),%R10 |
(185) 0x42e62b ADD %RCX,%R8 |
(185) 0x42e62e LEA (%RDI,%RAX,1),%R12 |
(185) 0x42e632 LEA (%R9,%RAX,1),%R11 |
(185) 0x42e636 ADD %RCX,%R10 |
(185) 0x42e639 SAL $0x3,%R8 |
(185) 0x42e63d ADD %RCX,%R12 |
(185) 0x42e640 ADD %RCX,%R11 |
(185) 0x42e643 VMOVUPD -0x8(%R14,%R8,1),%YMM15 |
(185) 0x42e64a MOV 0x70(%RSP),%R9 |
(185) 0x42e64f VMULPD -0x8(%R13,%R12,8),%YMM15,%YMM6 |
(185) 0x42e656 VMOVUPD (%R13,%R12,8),%YMM1 |
(185) 0x42e65d MOV 0x50(%RSP),%RDI |
(185) 0x42e662 LEA (%R15,%RAX,1),%R15 |
(185) 0x42e666 VMULPD (%R14,%R8,1),%YMM1,%YMM10 |
(185) 0x42e66c VMOVUPD (%R13,%R11,8),%YMM4 |
(185) 0x42e673 VMOVUPD -0x8(%R13,%R11,8),%YMM8 |
(185) 0x42e67a VFMADD231PD -0x8(%R14,%R10,8),%YMM8,%YMM6 |
(185) 0x42e681 VFMADD231PD (%R14,%R10,8),%YMM4,%YMM10 |
(185) 0x42e687 VADDPD %YMM6,%YMM10,%YMM7 |
(185) 0x42e68b ADD %RCX,%R15 |
(185) 0x42e68e VMOVQ %XMM3,%R11 |
(185) 0x42e693 VMOVQ %XMM11,%R10 |
(185) 0x42e698 LEA (%R9,%RAX,1),%R9 |
(185) 0x42e69c VMULPD %YMM23,%YMM7,%YMM9 |
(185) 0x42e6a2 ADD %RDI,%RAX |
(185) 0x42e6a5 ADD %RCX,%R9 |
(185) 0x42e6a8 ADD %RCX,%RAX |
(185) 0x42e6ab VMOVQ %XMM12,%RCX |
(185) 0x42e6b0 VMOVUPD %YMM9,(%RCX,%R15,8) |
(185) 0x42e6b6 VMOVUPD (%R11,%R9,8),%YMM2 |
(185) 0x42e6bc VSUBPD -0x8(%R11,%R9,8),%YMM2,%YMM0 |
(185) 0x42e6c3 VADDPD %YMM9,%YMM0,%YMM5 |
(185) 0x42e6c8 VMOVUPD %YMM5,(%R10,%RAX,8) |
(185) 0x42e6ce TEST $0x3,%DL |
(185) 0x42e6d1 JE 42e92f |
(185) 0x42e6d7 AND $-0x4,%EDX |
(185) 0x42e6da ADD %EDX,%EBX |
(185) 0x42e6dc ADD %EDX,%ESI |
(185) 0x42e6de MOV 0x60(%RSP),%R12 |
(185) 0x42e6e3 MOVSXD %ESI,%RAX |
(185) 0x42e6e6 MOV 0x58(%RSP),%RCX |
(185) 0x42e6eb LEA (%R12,%RAX,1),%R15 |
(185) 0x42e6ef LEA (%RCX,%RAX,1),%RDX |
(185) 0x42e6f3 LEA (%R14,%R15,8),%R9 |
(185) 0x42e6f7 MOV 0x68(%RSP),%R15 |
(185) 0x42e6fc LEA (%R13,%RDX,8),%R8 |
(185) 0x42e701 LEA -0x1(%RSI),%EDX |
(185) 0x42e704 MOVSXD %EDX,%RDX |
(185) 0x42e707 VMOVSD (%R9),%XMM1 |
(185) 0x42e70c ADD %RDX,%R12 |
(185) 0x42e70f ADD %RDX,%RCX |
(185) 0x42e712 VMOVSD (%R14,%R12,8),%XMM15 |
(185) 0x42e718 LEA (%R15,%RAX,1),%RDI |
(185) 0x42e71c ADD %RDX,%R15 |
(185) 0x42e71f LEA (%R13,%RDI,8),%R10 |
(185) 0x42e724 MOV 0x78(%RSP),%RDI |
(185) 0x42e729 MOV %RDI,%R11 |
(185) 0x42e72c ADD %RDX,%RDI |
(185) 0x42e72f VMOVSD (%R14,%RDI,8),%XMM10 |
(185) 0x42e735 VMULSD (%R13,%R15,8),%XMM10,%XMM4 |
(185) 0x42e73c VFMADD231SD (%R13,%RCX,8),%XMM15,%XMM4 |
(185) 0x42e743 MOV 0x40(%RSP),%RCX |
(185) 0x42e748 ADD %RAX,%R11 |
(185) 0x42e74b VMOVQ %XMM12,%R15 |
(185) 0x42e750 VMOVQ %XMM3,%RDI |
(185) 0x42e755 LEA (%R14,%R11,8),%R11 |
(185) 0x42e759 VMOVSD (%R11),%XMM13 |
(185) 0x42e75e VMULSD (%R10),%XMM13,%XMM14 |
(185) 0x42e763 VFMADD231SD (%R8),%XMM1,%XMM14 |
(185) 0x42e768 VADDSD %XMM4,%XMM14,%XMM6 |
(185) 0x42e76c VMULSD %XMM22,%XMM6,%XMM8 |
(185) 0x42e772 LEA (%RCX,%RAX,1),%R12 |
(185) 0x42e776 VMOVSD %XMM8,(%R15,%R12,8) |
(185) 0x42e77c MOV 0x70(%RSP),%R12 |
(185) 0x42e781 MOV 0x50(%RSP),%R15 |
(185) 0x42e786 LEA (%R12,%RAX,1),%RCX |
(185) 0x42e78a ADD %R15,%RAX |
(185) 0x42e78d MOV 0x34(%RSP),%R15D |
(185) 0x42e792 ADD %R12,%RDX |
(185) 0x42e795 LEA (%RDI,%RCX,8),%RCX |
(185) 0x42e799 VMOVSD (%RCX),%XMM7 |
(185) 0x42e79d VSUBSD (%RDI,%RDX,8),%XMM7,%XMM9 |
(185) 0x42e7a2 VMOVQ %XMM11,%RDX |
(185) 0x42e7a7 LEA 0x1(%RBX),%EDI |
(185) 0x42e7aa VADDSD %XMM8,%XMM9,%XMM2 |
(185) 0x42e7af VMOVSD %XMM2,(%RDX,%RAX,8) |
(185) 0x42e7b4 LEA 0x1(%RSI),%EAX |
(185) 0x42e7b7 CMP %R15D,%EDI |
(185) 0x42e7ba JAE 42e92f |
(185) 0x42e7c0 MOV 0x60(%RSP),%RDX |
(185) 0x42e7c5 CLTQ |
(185) 0x42e7c7 MOV 0x58(%RSP),%R12 |
(185) 0x42e7cc VMOVSD (%R11),%XMM10 |
(185) 0x42e7d1 VMULSD (%R10),%XMM10,%XMM4 |
(185) 0x42e7d6 MOV 0x40(%RSP),%R10 |
(185) 0x42e7db VMOVSD (%R9),%XMM15 |
(185) 0x42e7e0 VFMADD231SD (%R8),%XMM15,%XMM4 |
(185) 0x42e7e5 VMOVQ %XMM12,%R8 |
(185) 0x42e7ea ADD $0x2,%EBX |
(185) 0x42e7ed ADD $0x2,%ESI |
(185) 0x42e7f0 LEA (%RDX,%RAX,1),%RDI |
(185) 0x42e7f4 MOV 0x68(%RSP),%RDX |
(185) 0x42e7f9 ADD %RAX,%R12 |
(185) 0x42e7fc LEA (%R13,%R12,8),%R15 |
(185) 0x42e801 LEA (%R14,%RDI,8),%R12 |
(185) 0x42e805 LEA (%R10,%RAX,1),%R9 |
(185) 0x42e809 VMOVQ %R12,%XMM13 |
(185) 0x42e80e LEA (%RDX,%RAX,1),%RDI |
(185) 0x42e812 MOV 0x78(%RSP),%RDX |
(185) 0x42e817 LEA (%R13,%RDI,8),%R12 |
(185) 0x42e81c VMOVQ %R12,%XMM14 |
(185) 0x42e821 LEA (%RDX,%RAX,1),%RDI |
(185) 0x42e825 VMOVQ %XMM14,%RDX |
(185) 0x42e82a LEA (%R14,%RDI,8),%R12 |
(185) 0x42e82e VMOVQ %XMM13,%RDI |
(185) 0x42e833 VMOVSD (%R12),%XMM0 |
(185) 0x42e839 VMOVSD (%RDI),%XMM1 |
(185) 0x42e83d VMULSD (%RDX),%XMM0,%XMM5 |
(185) 0x42e841 VFMADD231SD (%R15),%XMM1,%XMM5 |
(185) 0x42e846 VADDSD %XMM5,%XMM4,%XMM6 |
(185) 0x42e84a VMULSD %XMM22,%XMM6,%XMM8 |
(185) 0x42e850 VMOVSD %XMM8,(%R8,%R9,8) |
(185) 0x42e856 MOV 0x70(%RSP),%R9 |
(185) 0x42e85b VMOVQ %XMM3,%RDX |
(185) 0x42e860 LEA (%R9,%RAX,1),%R11 |
(185) 0x42e864 LEA (%RDX,%R11,8),%R8 |
(185) 0x42e868 MOV 0x50(%RSP),%R11 |
(185) 0x42e86d VMOVSD (%R8),%XMM7 |
(185) 0x42e872 VSUBSD (%RCX),%XMM7,%XMM9 |
(185) 0x42e876 VMOVQ %XMM11,%RCX |
(185) 0x42e87b VADDSD %XMM8,%XMM9,%XMM2 |
(185) 0x42e880 ADD %R11,%RAX |
(185) 0x42e883 VMOVSD %XMM2,(%RCX,%RAX,8) |
(185) 0x42e888 MOV 0x34(%RSP),%EAX |
(185) 0x42e88c CMP %EAX,%EBX |
(185) 0x42e88e JAE 42e92f |
(185) 0x42e894 MOV 0x58(%RSP),%RBX |
(185) 0x42e899 MOV 0x68(%RSP),%RCX |
(185) 0x42e89e MOV 0x60(%RSP),%RDI |
(185) 0x42e8a3 MOVSXD %ESI,%RSI |
(185) 0x42e8a6 MOV 0x78(%RSP),%RDX |
(185) 0x42e8ab ADD %RSI,%R10 |
(185) 0x42e8ae ADD %RSI,%R9 |
(185) 0x42e8b1 ADD %RSI,%R11 |
(185) 0x42e8b4 ADD %RSI,%RBX |
(185) 0x42e8b7 ADD %RSI,%RCX |
(185) 0x42e8ba ADD %RSI,%RDI |
(185) 0x42e8bd VMOVSD (%R13,%RCX,8),%XMM0 |
(185) 0x42e8c4 VMOVSD (%R13,%RBX,8),%XMM5 |
(185) 0x42e8cb ADD %RSI,%RDX |
(185) 0x42e8ce VMOVQ %XMM14,%R13 |
(185) 0x42e8d3 VMULSD (%R14,%RDX,8),%XMM0,%XMM10 |
(185) 0x42e8d9 VMOVSD (%R13),%XMM14 |
(185) 0x42e8df VFMADD231SD (%R14,%RDI,8),%XMM5,%XMM10 |
(185) 0x42e8e5 VMULSD (%R12),%XMM14,%XMM1 |
(185) 0x42e8eb VMOVQ %XMM13,%R14 |
(185) 0x42e8f0 VMOVQ %XMM3,%R12 |
(185) 0x42e8f5 VMOVSD (%R15),%XMM13 |
(185) 0x42e8fa VMOVQ %XMM12,%R15 |
(185) 0x42e8ff VFMADD231SD (%R14),%XMM13,%XMM1 |
(185) 0x42e904 VADDSD %XMM1,%XMM10,%XMM4 |
(185) 0x42e908 VMULSD %XMM22,%XMM4,%XMM15 |
(185) 0x42e90e VMOVSD %XMM15,(%R15,%R10,8) |
(185) 0x42e914 VMOVSD (%R12,%R9,8),%XMM12 |
(185) 0x42e91a VSUBSD (%R8),%XMM12,%XMM3 |
(185) 0x42e91f VMOVQ %XMM11,%R8 |
(185) 0x42e924 VADDSD %XMM15,%XMM3,%XMM6 |
(185) 0x42e929 VMOVSD %XMM6,(%R8,%R11,8) |
(185) 0x42e92f MOV 0x34(%RSP),%EBX |
(185) 0x42e933 INCL 0x48(%RSP) |
(185) 0x42e937 MOV 0x48(%RSP),%EAX |
(185) 0x42e93b INCQ 0x28(%RSP) |
(185) 0x42e940 CMP %EAX,0x24(%RSP) |
(185) 0x42e944 JLE 42e960 |
(185) 0x42e946 MOV 0x1c(%RSP),%EDX |
(185) 0x42e94a MOV 0x20(%RSP),%EDI |
(185) 0x42e94e MOV 0x30(%RSP),%ESI |
(185) 0x42e952 SUB %EBX,%EDX |
(185) 0x42e954 MOV %EDI,0x4c(%RSP) |
(185) 0x42e958 JMP 42e2b0 |
0x42e95d NOPL (%RAX) |
0x42e960 VZEROUPPER |
0x42e963 LEA -0x28(%RBP),%RSP |
0x42e967 POP %RBX |
0x42e968 POP %R12 |
0x42e96a POP %R13 |
0x42e96c POP %R14 |
0x42e96e POP %R15 |
0x42e970 POP %RBP |
0x42e971 RET |
0x42e972 NOPW %CS:(%RAX,%RAX,1) |
0x42e97d NOPL (%RAX) |
(185) 0x42e980 MOV 0x4c(%RSP),%ESI |
(185) 0x42e984 XOR %ECX,%ECX |
(185) 0x42e986 JMP 42e5f5 |
0x42e98b INC %R8D |
0x42e98e XOR %EDX,%EDX |
0x42e990 JMP 42e22d |
0x42e995 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 84 |
nb uops | 83 |
loop length | 312 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 8 |
micro-operation queue | 13.83 cycles |
front end | 13.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.83 |
Dispatch | 7.33 |
DIV/SQRT | 12.00 |
Overall L1 | 13.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x3,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R15D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42e98b <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7db> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R8D,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%RBX,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x30(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R12D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x3253d(%RIP),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R14D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R10D,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CLTQ | |||||||||||||||||
MOV %R10D,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42e22d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 84 |
nb uops | 83 |
loop length | 312 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 8 |
micro-operation queue | 13.83 cycles |
front end | 13.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.83 |
Dispatch | 7.33 |
DIV/SQRT | 12.00 |
Overall L1 | 13.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x3,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R15D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42e98b <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7db> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R8D,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%RBX,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42e963 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x30(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R12D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x3253d(%RIP),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R14D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R10D,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CLTQ | |||||||||||||||||
MOV %R10D,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42e22d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5+0x7d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.5– | 3.02 | 1.52 |
▼Loop 185 - advec_mom.cpp:97-100 - exec– | 0 | 0 |
○Loop 186 - advec_mom.cpp:98-100 - exec | 3.01 | 1.52 |