Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.23% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.23% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 71 - 75 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for simd collapse(2) |
72: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
73: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
74: post_vol(i, j) = volume(i, j); |
75: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
0x42bfe0 PUSH %RBP |
0x42bfe1 MOV %RSP,%RBP |
0x42bfe4 PUSH %R15 |
0x42bfe6 PUSH %R14 |
0x42bfe8 PUSH %R13 |
0x42bfea PUSH %R12 |
0x42bfec PUSH %RBX |
0x42bfed AND $-0x40,%RSP |
0x42bff1 ADD $-0x80,%RSP |
0x42bff5 MOV 0x28(%RDI),%EAX |
0x42bff8 MOV 0x2c(%RDI),%EDX |
0x42bffb MOV 0x20(%RDI),%EBX |
0x42bffe MOV 0x24(%RDI),%ECX |
0x42c001 ADD $0x4,%EDX |
0x42c004 LEA -0x1(%RAX),%R15D |
0x42c008 LEA -0x1(%RBX),%ESI |
0x42c00b MOV %EDX,0x48(%RSP) |
0x42c00f MOV %ESI,0x44(%RSP) |
0x42c013 CMP %EDX,%R15D |
0x42c016 JGE 42c5c3 |
0x42c01c MOV %EDX,%EBX |
0x42c01e LEA 0x4(%RCX),%R14D |
0x42c022 SUB %R15D,%EBX |
0x42c025 CMP %R14D,%ESI |
0x42c028 JGE 42c5c3 |
0x42c02e MOV %RDI,%R13 |
0x42c031 MOV %R14D,%EDI |
0x42c034 SUB %ESI,%EDI |
0x42c036 MOV %EDI,0x4c(%RSP) |
0x42c03a CALL 404650 <omp_get_num_threads@plt> |
0x42c03f MOV %EAX,%R12D |
0x42c042 CALL 404540 <omp_get_thread_num@plt> |
0x42c047 XOR %EDX,%EDX |
0x42c049 MOV %EAX,%R8D |
0x42c04c MOV 0x4c(%RSP),%EAX |
0x42c050 IMUL %EBX,%EAX |
0x42c053 DIV %R12D |
0x42c056 MOV %EAX,%EDI |
0x42c058 CMP %EDX,%R8D |
0x42c05b JB 42c5e4 |
0x42c061 IMUL %EDI,%R8D |
0x42c065 LEA (%R8,%RDX,1),%R11D |
0x42c069 LEA (%RDI,%R11,1),%R9D |
0x42c06d MOV %R9D,0x40(%RSP) |
0x42c072 CMP %R9D,%R11D |
0x42c075 JAE 42c5c3 |
0x42c07b MOV %R11D,%EAX |
0x42c07e XOR %EDX,%EDX |
0x42c080 MOV 0x44(%RSP),%R10D |
0x42c085 MOV 0x18(%R13),%RSI |
0x42c089 DIVL 0x4c(%RSP) |
0x42c08d MOV 0x10(%R13),%RBX |
0x42c091 MOV %RSI,0x30(%RSP) |
0x42c096 MOV %RBX,0x20(%RSP) |
0x42c09b ADD %EDX,%R10D |
0x42c09e ADD %R15D,%EAX |
0x42c0a1 MOV %R14D,%EDX |
0x42c0a4 MOV 0x8(%R13),%R15 |
0x42c0a8 MOV (%R13),%R14 |
0x42c0ac MOV %R10D,0x74(%RSP) |
0x42c0b1 SUB %R10D,%EDX |
0x42c0b4 MOVSXD %EAX,%R12 |
0x42c0b7 MOV %R15,0x38(%RSP) |
0x42c0bc MOV %R14,0x28(%RSP) |
0x42c0c1 NOPL (%RAX) |
(184) 0x42c0c8 CMP %EDX,%EDI |
(184) 0x42c0ca CMOVBE %EDI,%EDX |
(184) 0x42c0cd LEA (%R11,%RDX,1),%ECX |
(184) 0x42c0d1 MOV %ECX,0x70(%RSP) |
(184) 0x42c0d5 CMP %ECX,%R11D |
(184) 0x42c0d8 JAE 42c596 |
(184) 0x42c0de MOV 0x38(%RSP),%R13 |
(184) 0x42c0e3 MOV 0x30(%RSP),%RDI |
(184) 0x42c0e8 MOV 0x20(%RSP),%R10 |
(184) 0x42c0ed MOV 0x28(%RSP),%RAX |
(184) 0x42c0f2 MOV (%R13),%R8 |
(184) 0x42c0f6 MOV (%RDI),%R9 |
(184) 0x42c0f9 MOV 0x10(%RDI),%R14 |
(184) 0x42c0fd MOV (%R10),%RDI |
(184) 0x42c100 IMUL %R12,%R8 |
(184) 0x42c104 MOV 0x10(%R13),%R15 |
(184) 0x42c108 MOV 0x10(%R10),%RSI |
(184) 0x42c10c IMUL %R12,%R9 |
(184) 0x42c110 MOV (%RAX),%R13 |
(184) 0x42c113 MOV 0x10(%RAX),%RBX |
(184) 0x42c117 LEA -0x1(%RDX),%EAX |
(184) 0x42c11a IMUL %R12,%RDI |
(184) 0x42c11e MOV %RSI,0x78(%RSP) |
(184) 0x42c123 MOV %R8,0x58(%RSP) |
(184) 0x42c128 IMUL %R12,%R13 |
(184) 0x42c12c MOV %R9,0x60(%RSP) |
(184) 0x42c131 MOV %RDI,0x68(%RSP) |
(184) 0x42c136 CMP $0x6,%EAX |
(184) 0x42c139 JBE 42c5d8 |
(184) 0x42c13f MOVSXD 0x74(%RSP),%RAX |
(184) 0x42c144 MOV 0x68(%RSP),%RSI |
(184) 0x42c149 LEA (%R9,%RAX,1),%RCX |
(184) 0x42c14d LEA (%R8,%RAX,1),%R8 |
(184) 0x42c151 LEA (%R14,%RCX,8),%R9 |
(184) 0x42c155 MOV 0x78(%RSP),%RCX |
(184) 0x42c15a LEA 0x1(%R13,%RAX,1),%RDI |
(184) 0x42c15f ADD %RSI,%RAX |
(184) 0x42c162 SAL $0x3,%RDI |
(184) 0x42c166 LEA (%R15,%R8,8),%R10 |
(184) 0x42c16a LEA (%RCX,%RAX,8),%RSI |
(184) 0x42c16e MOV %EDX,%ECX |
(184) 0x42c170 LEA (%RBX,%RDI,1),%R8 |
(184) 0x42c174 XOR %EAX,%EAX |
(184) 0x42c176 SHR $0x3,%ECX |
(184) 0x42c179 LEA -0x8(%RBX,%RDI,1),%RDI |
(184) 0x42c17e SAL $0x6,%RCX |
(184) 0x42c182 MOV %RCX,0x50(%RSP) |
(184) 0x42c187 SUB $0x40,%RCX |
(184) 0x42c18b SHR $0x6,%RCX |
(184) 0x42c18f INC %RCX |
(184) 0x42c192 AND $0x7,%ECX |
(184) 0x42c195 JE 42c2e3 |
(184) 0x42c19b CMP $0x1,%RCX |
(184) 0x42c19f JE 42c2b1 |
(184) 0x42c1a5 CMP $0x2,%RCX |
(184) 0x42c1a9 JE 42c28a |
(184) 0x42c1af CMP $0x3,%RCX |
(184) 0x42c1b3 JE 42c263 |
(184) 0x42c1b9 CMP $0x4,%RCX |
(184) 0x42c1bd JE 42c23c |
(184) 0x42c1bf CMP $0x5,%RCX |
(184) 0x42c1c3 JE 42c215 |
(184) 0x42c1c5 CMP $0x6,%RCX |
(184) 0x42c1c9 JE 42c1ee |
(184) 0x42c1cb VMOVUPD (%R10),%ZMM0 |
(184) 0x42c1d1 MOV $0x40,%EAX |
(184) 0x42c1d6 VMOVUPD %ZMM0,(%R9) |
(184) 0x42c1dc VADDPD (%R8),%ZMM0,%ZMM1 |
(184) 0x42c1e2 VSUBPD (%RDI),%ZMM1,%ZMM2 |
(184) 0x42c1e8 VMOVUPD %ZMM2,(%RSI) |
(184) 0x42c1ee VMOVUPD (%R10,%RAX,1),%ZMM3 |
(184) 0x42c1f5 VMOVUPD %ZMM3,(%R9,%RAX,1) |
(184) 0x42c1fc VADDPD (%R8,%RAX,1),%ZMM3,%ZMM4 |
(184) 0x42c203 VSUBPD (%RDI,%RAX,1),%ZMM4,%ZMM5 |
(184) 0x42c20a VMOVUPD %ZMM5,(%RSI,%RAX,1) |
(184) 0x42c211 ADD $0x40,%RAX |
(184) 0x42c215 VMOVUPD (%R10,%RAX,1),%ZMM6 |
(184) 0x42c21c VMOVUPD %ZMM6,(%R9,%RAX,1) |
(184) 0x42c223 VADDPD (%R8,%RAX,1),%ZMM6,%ZMM7 |
(184) 0x42c22a VSUBPD (%RDI,%RAX,1),%ZMM7,%ZMM8 |
(184) 0x42c231 VMOVUPD %ZMM8,(%RSI,%RAX,1) |
(184) 0x42c238 ADD $0x40,%RAX |
(184) 0x42c23c VMOVUPD (%R10,%RAX,1),%ZMM9 |
(184) 0x42c243 VMOVUPD %ZMM9,(%R9,%RAX,1) |
(184) 0x42c24a VADDPD (%R8,%RAX,1),%ZMM9,%ZMM10 |
(184) 0x42c251 VSUBPD (%RDI,%RAX,1),%ZMM10,%ZMM11 |
(184) 0x42c258 VMOVUPD %ZMM11,(%RSI,%RAX,1) |
(184) 0x42c25f ADD $0x40,%RAX |
(184) 0x42c263 VMOVUPD (%R10,%RAX,1),%ZMM12 |
(184) 0x42c26a VMOVUPD %ZMM12,(%R9,%RAX,1) |
(184) 0x42c271 VADDPD (%R8,%RAX,1),%ZMM12,%ZMM13 |
(184) 0x42c278 VSUBPD (%RDI,%RAX,1),%ZMM13,%ZMM14 |
(184) 0x42c27f VMOVUPD %ZMM14,(%RSI,%RAX,1) |
(184) 0x42c286 ADD $0x40,%RAX |
(184) 0x42c28a VMOVUPD (%R10,%RAX,1),%ZMM15 |
(184) 0x42c291 VMOVUPD %ZMM15,(%R9,%RAX,1) |
(184) 0x42c298 VADDPD (%R8,%RAX,1),%ZMM15,%ZMM0 |
(184) 0x42c29f VSUBPD (%RDI,%RAX,1),%ZMM0,%ZMM1 |
(184) 0x42c2a6 VMOVUPD %ZMM1,(%RSI,%RAX,1) |
(184) 0x42c2ad ADD $0x40,%RAX |
(184) 0x42c2b1 VMOVUPD (%R10,%RAX,1),%ZMM2 |
(184) 0x42c2b8 VMOVUPD %ZMM2,(%R9,%RAX,1) |
(184) 0x42c2bf VADDPD (%R8,%RAX,1),%ZMM2,%ZMM3 |
(184) 0x42c2c6 VSUBPD (%RDI,%RAX,1),%ZMM3,%ZMM4 |
(184) 0x42c2cd VMOVUPD %ZMM4,(%RSI,%RAX,1) |
(184) 0x42c2d4 ADD $0x40,%RAX |
(184) 0x42c2d8 CMP %RAX,0x50(%RSP) |
(184) 0x42c2dd JE 42c42f |
(185) 0x42c2e3 VMOVUPD (%R10,%RAX,1),%ZMM5 |
(185) 0x42c2ea VMOVUPD %ZMM5,(%R9,%RAX,1) |
(185) 0x42c2f1 VADDPD (%R8,%RAX,1),%ZMM5,%ZMM6 |
(185) 0x42c2f8 VSUBPD (%RDI,%RAX,1),%ZMM6,%ZMM7 |
(185) 0x42c2ff VMOVUPD %ZMM7,(%RSI,%RAX,1) |
(185) 0x42c306 VMOVUPD 0x40(%R10,%RAX,1),%ZMM8 |
(185) 0x42c30e VMOVUPD %ZMM8,0x40(%R9,%RAX,1) |
(185) 0x42c316 VADDPD 0x40(%R8,%RAX,1),%ZMM8,%ZMM9 |
(185) 0x42c31e VSUBPD 0x40(%RDI,%RAX,1),%ZMM9,%ZMM10 |
(185) 0x42c326 VMOVUPD %ZMM10,0x40(%RSI,%RAX,1) |
(185) 0x42c32e VMOVUPD 0x80(%R10,%RAX,1),%ZMM11 |
(185) 0x42c336 VMOVUPD %ZMM11,0x80(%R9,%RAX,1) |
(185) 0x42c33e VADDPD 0x80(%R8,%RAX,1),%ZMM11,%ZMM12 |
(185) 0x42c346 VSUBPD 0x80(%RDI,%RAX,1),%ZMM12,%ZMM13 |
(185) 0x42c34e VMOVUPD %ZMM13,0x80(%RSI,%RAX,1) |
(185) 0x42c356 VMOVUPD 0xc0(%R10,%RAX,1),%ZMM14 |
(185) 0x42c35e VMOVUPD %ZMM14,0xc0(%R9,%RAX,1) |
(185) 0x42c366 VADDPD 0xc0(%R8,%RAX,1),%ZMM14,%ZMM15 |
(185) 0x42c36e VSUBPD 0xc0(%RDI,%RAX,1),%ZMM15,%ZMM0 |
(185) 0x42c376 VMOVUPD %ZMM0,0xc0(%RSI,%RAX,1) |
(185) 0x42c37e VMOVUPD 0x100(%R10,%RAX,1),%ZMM1 |
(185) 0x42c386 VMOVUPD %ZMM1,0x100(%R9,%RAX,1) |
(185) 0x42c38e VADDPD 0x100(%R8,%RAX,1),%ZMM1,%ZMM2 |
(185) 0x42c396 VSUBPD 0x100(%RDI,%RAX,1),%ZMM2,%ZMM3 |
(185) 0x42c39e VMOVUPD %ZMM3,0x100(%RSI,%RAX,1) |
(185) 0x42c3a6 VMOVUPD 0x140(%R10,%RAX,1),%ZMM4 |
(185) 0x42c3ae VMOVUPD %ZMM4,0x140(%R9,%RAX,1) |
(185) 0x42c3b6 VADDPD 0x140(%R8,%RAX,1),%ZMM4,%ZMM5 |
(185) 0x42c3be VSUBPD 0x140(%RDI,%RAX,1),%ZMM5,%ZMM6 |
(185) 0x42c3c6 VMOVUPD %ZMM6,0x140(%RSI,%RAX,1) |
(185) 0x42c3ce VMOVUPD 0x180(%R10,%RAX,1),%ZMM7 |
(185) 0x42c3d6 VMOVUPD %ZMM7,0x180(%R9,%RAX,1) |
(185) 0x42c3de VADDPD 0x180(%R8,%RAX,1),%ZMM7,%ZMM8 |
(185) 0x42c3e6 VSUBPD 0x180(%RDI,%RAX,1),%ZMM8,%ZMM9 |
(185) 0x42c3ee VMOVUPD %ZMM9,0x180(%RSI,%RAX,1) |
(185) 0x42c3f6 VMOVUPD 0x1c0(%R10,%RAX,1),%ZMM10 |
(185) 0x42c3fe VMOVUPD %ZMM10,0x1c0(%R9,%RAX,1) |
(185) 0x42c406 VADDPD 0x1c0(%R8,%RAX,1),%ZMM10,%ZMM11 |
(185) 0x42c40e VSUBPD 0x1c0(%RDI,%RAX,1),%ZMM11,%ZMM12 |
(185) 0x42c416 VMOVUPD %ZMM12,0x1c0(%RSI,%RAX,1) |
(185) 0x42c41e ADD $0x200,%RAX |
(185) 0x42c424 CMP %RAX,0x50(%RSP) |
(185) 0x42c429 JNE 42c2e3 |
(184) 0x42c42f MOV 0x74(%RSP),%R10D |
(184) 0x42c434 MOV %EDX,%R9D |
(184) 0x42c437 AND $-0x8,%R9D |
(184) 0x42c43b ADD %R9D,%R11D |
(184) 0x42c43e LEA (%R9,%R10,1),%ESI |
(184) 0x42c442 TEST $0x7,%DL |
(184) 0x42c445 JE 42c591 |
(184) 0x42c44b SUB %R9D,%EDX |
(184) 0x42c44e LEA -0x1(%RDX),%R8D |
(184) 0x42c452 CMP $0x2,%R8D |
(184) 0x42c456 JBE 42c4c0 |
(184) 0x42c458 MOVSXD 0x74(%RSP),%RAX |
(184) 0x42c45d MOV 0x58(%RSP),%R10 |
(184) 0x42c462 MOV 0x60(%RSP),%R8 |
(184) 0x42c467 ADD %RAX,%R10 |
(184) 0x42c46a LEA (%R13,%RAX,1),%RDI |
(184) 0x42c46f ADD %R9,%R10 |
(184) 0x42c472 ADD %RAX,%R8 |
(184) 0x42c475 LEA 0x1(%R9,%RDI,1),%RCX |
(184) 0x42c47a MOV 0x68(%RSP),%RDI |
(184) 0x42c47f VMOVUPD (%R15,%R10,8),%YMM13 |
(184) 0x42c485 ADD %R9,%R8 |
(184) 0x42c488 ADD %RDI,%RAX |
(184) 0x42c48b VMOVUPD %YMM13,(%R14,%R8,8) |
(184) 0x42c491 ADD %R9,%RAX |
(184) 0x42c494 MOV 0x78(%RSP),%R9 |
(184) 0x42c499 VMOVUPD (%RBX,%RCX,8),%YMM14 |
(184) 0x42c49e VSUBPD -0x8(%RBX,%RCX,8),%YMM14,%YMM15 |
(184) 0x42c4a4 VADDPD %YMM13,%YMM15,%YMM0 |
(184) 0x42c4a9 VMOVUPD %YMM0,(%R9,%RAX,8) |
(184) 0x42c4af TEST $0x3,%DL |
(184) 0x42c4b2 JE 42c591 |
(184) 0x42c4b8 AND $-0x4,%EDX |
(184) 0x42c4bb ADD %EDX,%R11D |
(184) 0x42c4be ADD %EDX,%ESI |
(184) 0x42c4c0 MOV 0x58(%RSP),%R9 |
(184) 0x42c4c5 MOVSXD %ESI,%RDX |
(184) 0x42c4c8 MOV 0x60(%RSP),%R10 |
(184) 0x42c4cd LEA (%R9,%RDX,1),%RAX |
(184) 0x42c4d1 LEA (%R10,%RDX,1),%RCX |
(184) 0x42c4d5 VMOVSD (%R15,%RAX,8),%XMM1 |
(184) 0x42c4db LEA 0x1(%RSI),%EAX |
(184) 0x42c4de CLTQ |
(184) 0x42c4e0 LEA (%R13,%RAX,1),%R8 |
(184) 0x42c4e5 VMOVSD %XMM1,(%R14,%RCX,8) |
(184) 0x42c4eb LEA (%RBX,%R8,8),%RCX |
(184) 0x42c4ef MOV 0x68(%RSP),%R8 |
(184) 0x42c4f4 VMOVSD (%RCX),%XMM2 |
(184) 0x42c4f8 LEA (%R8,%RDX,1),%RDI |
(184) 0x42c4fc ADD %R13,%RDX |
(184) 0x42c4ff VSUBSD (%RBX,%RDX,8),%XMM2,%XMM3 |
(184) 0x42c504 MOV 0x78(%RSP),%RDX |
(184) 0x42c509 VADDSD %XMM1,%XMM3,%XMM4 |
(184) 0x42c50d VMOVSD %XMM4,(%RDX,%RDI,8) |
(184) 0x42c512 MOV 0x70(%RSP),%EDI |
(184) 0x42c516 LEA 0x1(%R11),%EDX |
(184) 0x42c51a CMP %EDI,%EDX |
(184) 0x42c51c JAE 42c591 |
(184) 0x42c51e LEA (%RAX,%R9,1),%RDX |
(184) 0x42c522 LEA (%RAX,%R10,1),%RDI |
(184) 0x42c526 ADD %R8,%RAX |
(184) 0x42c529 ADD $0x2,%R11D |
(184) 0x42c52d VMOVSD (%R15,%RDX,8),%XMM5 |
(184) 0x42c533 LEA 0x2(%RSI),%EDX |
(184) 0x42c536 MOVSXD %EDX,%RDX |
(184) 0x42c539 VMOVSD %XMM5,(%R14,%RDI,8) |
(184) 0x42c53f LEA (%R13,%RDX,1),%RDI |
(184) 0x42c544 LEA (%RBX,%RDI,8),%RDI |
(184) 0x42c548 VADDSD (%RDI),%XMM5,%XMM6 |
(184) 0x42c54c VSUBSD (%RCX),%XMM6,%XMM7 |
(184) 0x42c550 MOV 0x78(%RSP),%RCX |
(184) 0x42c555 VMOVSD %XMM7,(%RCX,%RAX,8) |
(184) 0x42c55a MOV 0x70(%RSP),%EAX |
(184) 0x42c55e CMP %EAX,%R11D |
(184) 0x42c561 JAE 42c591 |
(184) 0x42c563 ADD %RDX,%R9 |
(184) 0x42c566 ADD $0x3,%ESI |
(184) 0x42c569 ADD %RDX,%R10 |
(184) 0x42c56c ADD %RDX,%R8 |
(184) 0x42c56f VMOVSD (%R15,%R9,8),%XMM8 |
(184) 0x42c575 MOVSXD %ESI,%R11 |
(184) 0x42c578 ADD %R13,%R11 |
(184) 0x42c57b VMOVSD %XMM8,(%R14,%R10,8) |
(184) 0x42c581 VADDSD (%RBX,%R11,8),%XMM8,%XMM9 |
(184) 0x42c587 VSUBSD (%RDI),%XMM9,%XMM10 |
(184) 0x42c58b VMOVSD %XMM10,(%RCX,%R8,8) |
(184) 0x42c591 MOV 0x70(%RSP),%R11D |
(184) 0x42c596 INC %R12 |
(184) 0x42c599 LEA (%R12),%R15D |
(184) 0x42c59d CMP %R15D,0x48(%RSP) |
(184) 0x42c5a2 JLE 42c5c0 |
(184) 0x42c5a4 MOV 0x40(%RSP),%EDI |
(184) 0x42c5a8 MOV 0x44(%RSP),%R14D |
(184) 0x42c5ad MOV 0x4c(%RSP),%EDX |
(184) 0x42c5b1 MOV %R14D,0x74(%RSP) |
(184) 0x42c5b6 SUB %R11D,%EDI |
(184) 0x42c5b9 JMP 42c0c8 |
0x42c5be XCHG %AX,%AX |
0x42c5c0 VZEROUPPER |
0x42c5c3 LEA -0x28(%RBP),%RSP |
0x42c5c7 POP %RBX |
0x42c5c8 POP %R12 |
0x42c5ca POP %R13 |
0x42c5cc POP %R14 |
0x42c5ce POP %R15 |
0x42c5d0 POP %RBP |
0x42c5d1 RET |
0x42c5d2 NOPW (%RAX,%RAX,1) |
(184) 0x42c5d8 MOV 0x74(%RSP),%ESI |
(184) 0x42c5dc XOR %R9D,%R9D |
(184) 0x42c5df JMP 42c44b |
0x42c5e4 INC %EDI |
0x42c5e6 XOR %EDX,%EDX |
0x42c5e8 JMP 42c061 |
0x42c5ed NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42c5e4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42c061 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42c5e4 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c5c3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42c061 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3– | 2.23 | 0.75 |
▼Loop 184 - advec_mom.cpp:73-75 - exec– | 0.01 | 0 |
○Loop 185 - advec_mom.cpp:74-75 - exec | 2.22 | 0.74 |