Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.06% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.06% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42f3a0 PUSH %RBP |
0x42f3a1 MOV %RSP,%RBP |
0x42f3a4 PUSH %R15 |
0x42f3a6 PUSH %R14 |
0x42f3a8 PUSH %R13 |
0x42f3aa PUSH %R12 |
0x42f3ac PUSH %RBX |
0x42f3ad AND $-0x40,%RSP |
0x42f3b1 ADD $-0x80,%RSP |
0x42f3b5 MOV 0x34(%RDI),%EAX |
0x42f3b8 MOV 0x28(%RDI),%EBX |
0x42f3bb MOV 0x30(%RDI),%R14D |
0x42f3bf MOV 0x2c(%RDI),%EDX |
0x42f3c2 ADD $0x4,%EAX |
0x42f3c5 LEA 0x1(%RBX),%ECX |
0x42f3c8 MOV %EAX,0x1c(%RSP) |
0x42f3cc MOV %ECX,0x18(%RSP) |
0x42f3d0 CMP %EAX,%R14D |
0x42f3d3 JGE 42fb53 |
0x42f3d9 MOV %EAX,%EBX |
0x42f3db LEA 0x3(%RDX),%R15D |
0x42f3df SUB %R14D,%EBX |
0x42f3e2 CMP %R15D,%ECX |
0x42f3e5 JGE 42fb53 |
0x42f3eb MOV %R15D,%ESI |
0x42f3ee MOV %RDI,%R13 |
0x42f3f1 SUB %ECX,%ESI |
0x42f3f3 MOV %ESI,0x28(%RSP) |
0x42f3f7 CALL 404650 <omp_get_num_threads@plt> |
0x42f3fc MOV %EAX,%R12D |
0x42f3ff CALL 404540 <omp_get_thread_num@plt> |
0x42f404 XOR %EDX,%EDX |
0x42f406 MOV %EAX,%EDI |
0x42f408 MOV 0x28(%RSP),%EAX |
0x42f40c IMUL %EBX,%EAX |
0x42f40f DIV %R12D |
0x42f412 MOV %EAX,%ECX |
0x42f414 CMP %EDX,%EDI |
0x42f416 JB 42fb81 |
0x42f41c IMUL %ECX,%EDI |
0x42f41f LEA (%RDI,%RDX,1),%R11D |
0x42f423 LEA (%RCX,%R11,1),%R8D |
0x42f427 MOV %R8D,0x14(%RSP) |
0x42f42c CMP %R8D,%R11D |
0x42f42f JAE 42fb53 |
0x42f435 MOV %R11D,%EAX |
0x42f438 XOR %EDX,%EDX |
0x42f43a MOV 0x18(%RSP),%R9D |
0x42f43f VMOVQ (%R13),%XMM21 |
0x42f446 DIVL 0x28(%RSP) |
0x42f44a VMOVQ 0x20(%R13),%XMM20 |
0x42f451 VMOVQ 0x10(%R13),%XMM19 |
0x42f458 MOV %R15D,%EDI |
0x42f45b VMOVQ 0x8(%R13),%XMM18 |
0x42f462 VMOVQ 0x18(%R13),%XMM17 |
0x42f469 VMOVSD 0x3134d(%RIP),%XMM22 |
0x42f473 VBROADCASTSD %XMM22,%YMM23 |
0x42f479 VBROADCASTSD %XMM22,%ZMM16 |
0x42f47f ADD %EDX,%R9D |
0x42f482 ADD %R14D,%EAX |
0x42f485 MOVSXD %EAX,%R10 |
0x42f488 SUB %R9D,%EDI |
0x42f48b MOV %R9D,0x44(%RSP) |
0x42f490 MOV %EAX,0x40(%RSP) |
0x42f494 MOV %R10,0x20(%RSP) |
0x42f499 NOPL (%RAX) |
(191) 0x42f4a0 CMP %EDI,%ECX |
(191) 0x42f4a2 CMOVA %EDI,%ECX |
(191) 0x42f4a5 LEA (%R11,%RCX,1),%R14D |
(191) 0x42f4a9 MOV %R14D,0x2c(%RSP) |
(191) 0x42f4ae CMP %R14D,%R11D |
(191) 0x42f4b1 JAE 42fb19 |
(191) 0x42f4b7 MOV 0x40(%RSP),%R15D |
(191) 0x42f4bc VMOVQ %XMM18,%R9 |
(191) 0x42f4c2 VMOVQ %XMM21,%R13 |
(191) 0x42f4c8 MOV 0x20(%RSP),%RAX |
(191) 0x42f4cd VMOVQ %XMM20,%RSI |
(191) 0x42f4d3 VMOVQ %XMM19,%RDX |
(191) 0x42f4d9 VMOVQ 0x10(%R9),%XMM2 |
(191) 0x42f4df MOV 0x10(%RSI),%R12 |
(191) 0x42f4e3 MOV (%RDX),%R10 |
(191) 0x42f4e6 MOV (%RSI),%RSI |
(191) 0x42f4e9 VMOVQ 0x10(%RDX),%XMM11 |
(191) 0x42f4ee LEA -0x1(%R15),%EBX |
(191) 0x42f4f2 MOV (%R9),%R15 |
(191) 0x42f4f5 MOVSXD %EBX,%RDI |
(191) 0x42f4f8 MOV 0x10(%R13),%RBX |
(191) 0x42f4fc MOV (%R13),%R13 |
(191) 0x42f500 MOV %RDI,%R8 |
(191) 0x42f503 MOV %RDI,%R14 |
(191) 0x42f506 IMUL %RSI,%R14 |
(191) 0x42f50a IMUL %RAX,%R10 |
(191) 0x42f50e IMUL %R15,%RDI |
(191) 0x42f512 IMUL %R13,%R8 |
(191) 0x42f516 MOV %R14,0x58(%RSP) |
(191) 0x42f51b IMUL %RAX,%R13 |
(191) 0x42f51f MOV %R10,0x30(%RSP) |
(191) 0x42f524 MOV %RDI,0x70(%RSP) |
(191) 0x42f529 VMOVQ %XMM17,%RDI |
(191) 0x42f52f IMUL %RAX,%RSI |
(191) 0x42f533 IMUL %RAX,%R15 |
(191) 0x42f537 IMUL (%RDI),%RAX |
(191) 0x42f53b VMOVQ 0x10(%RDI),%XMM10 |
(191) 0x42f540 MOV %R8,0x38(%RSP) |
(191) 0x42f545 MOV %R13,0x60(%RSP) |
(191) 0x42f54a MOV %RSI,0x68(%RSP) |
(191) 0x42f54f MOV %R15,0x78(%RSP) |
(191) 0x42f554 MOV %RAX,0x48(%RSP) |
(191) 0x42f559 LEA -0x1(%RCX),%EAX |
(191) 0x42f55c CMP $0x6,%EAX |
(191) 0x42f55f JBE 42fb70 |
(191) 0x42f565 MOVSXD 0x44(%RSP),%RDX |
(191) 0x42f56a LEA (%R14,%RDX,1),%RAX |
(191) 0x42f56e LEA (%R13,%RDX,1),%RDI |
(191) 0x42f573 LEA (%R8,%RDX,1),%R9 |
(191) 0x42f577 ADD %RDX,%RSI |
(191) 0x42f57a SAL $0x3,%RAX |
(191) 0x42f57e SAL $0x3,%RDI |
(191) 0x42f582 SAL $0x3,%R9 |
(191) 0x42f586 SAL $0x3,%RSI |
(191) 0x42f58a LEA (%R12,%RAX,1),%R14 |
(191) 0x42f58e LEA (%RBX,%RDI,1),%R13 |
(191) 0x42f592 LEA (%RBX,%R9,1),%R15 |
(191) 0x42f596 LEA -0x8(%RBX,%RDI,1),%RDI |
(191) 0x42f59b VMOVQ %R14,%XMM7 |
(191) 0x42f5a0 VMOVQ %R13,%XMM8 |
(191) 0x42f5a5 LEA -0x8(%R12,%RAX,1),%R14 |
(191) 0x42f5aa LEA (%R12,%RSI,1),%R13 |
(191) 0x42f5ae LEA -0x8(%R12,%RSI,1),%RAX |
(191) 0x42f5b3 LEA (%R10,%RDX,1),%RSI |
(191) 0x42f5b7 VMOVQ %XMM11,%R10 |
(191) 0x42f5bc VMOVQ %R15,%XMM6 |
(191) 0x42f5c1 LEA (%R10,%RSI,8),%R8 |
(191) 0x42f5c5 LEA -0x8(%RBX,%R9,1),%R15 |
(191) 0x42f5ca MOV 0x70(%RSP),%R9 |
(191) 0x42f5cf VMOVQ %XMM2,%R10 |
(191) 0x42f5d4 VMOVQ %R8,%XMM1 |
(191) 0x42f5d9 MOV 0x78(%RSP),%R8 |
(191) 0x42f5de LEA (%R9,%RDX,1),%RSI |
(191) 0x42f5e2 LEA (%R10,%RSI,8),%R9 |
(191) 0x42f5e6 LEA (%R8,%RDX,1),%RSI |
(191) 0x42f5ea MOV 0x48(%RSP),%R8 |
(191) 0x42f5ef LEA (%R10,%RSI,8),%R10 |
(191) 0x42f5f3 VMOVQ %XMM10,%RSI |
(191) 0x42f5f8 ADD %R8,%RDX |
(191) 0x42f5fb LEA (%RSI,%RDX,8),%R8 |
(191) 0x42f5ff MOV %ECX,%ESI |
(191) 0x42f601 SHR $0x3,%ESI |
(191) 0x42f604 MOV %RSI,%RDX |
(191) 0x42f607 SAL $0x6,%RDX |
(191) 0x42f60b MOV %RDX,0x50(%RSP) |
(191) 0x42f610 XOR %EDX,%EDX |
(191) 0x42f612 AND $0x1,%ESI |
(191) 0x42f615 JE 42f69d |
(191) 0x42f61b VMOVQ %XMM8,%RSI |
(191) 0x42f620 VMOVUPD (%RDI),%ZMM4 |
(191) 0x42f626 VMULPD (%RAX),%ZMM4,%ZMM24 |
(191) 0x42f62c VMOVQ %XMM6,%RDX |
(191) 0x42f631 VMOVUPD (%RSI),%ZMM5 |
(191) 0x42f637 VMOVQ %XMM7,%RSI |
(191) 0x42f63c VMOVUPD (%R15),%ZMM9 |
(191) 0x42f642 VMULPD (%R13),%ZMM5,%ZMM0 |
(191) 0x42f649 VMOVUPD (%RSI),%ZMM3 |
(191) 0x42f64f VFMADD231PD (%R14),%ZMM9,%ZMM24 |
(191) 0x42f655 VMOVQ %XMM1,%RSI |
(191) 0x42f65a VFMADD231PD (%RDX),%ZMM3,%ZMM0 |
(191) 0x42f660 VADDPD %ZMM24,%ZMM0,%ZMM12 |
(191) 0x42f666 MOV $0x40,%EDX |
(191) 0x42f66b VMULPD %ZMM16,%ZMM12,%ZMM13 |
(191) 0x42f671 VMOVUPD %ZMM13,(%RSI) |
(191) 0x42f677 VMOVUPD (%R10),%ZMM14 |
(191) 0x42f67d VSUBPD (%R9),%ZMM14,%ZMM25 |
(191) 0x42f683 MOV 0x50(%RSP),%RSI |
(191) 0x42f688 VADDPD %ZMM13,%ZMM25,%ZMM15 |
(191) 0x42f68e VMOVUPD %ZMM15,(%R8) |
(191) 0x42f694 CMP %RSI,%RDX |
(191) 0x42f697 JE 42f7b8 |
(191) 0x42f69d MOV %R11D,0x10(%RSP) |
(191) 0x42f6a2 MOV %ECX,0xc(%RSP) |
(191) 0x42f6a6 VMOVQ %RBX,%XMM4 |
(191) 0x42f6ab VMOVQ %XMM8,%RCX |
(191) 0x42f6b0 VMOVQ %XMM7,%R11 |
(192) 0x42f6b5 VMOVUPD (%RCX,%RDX,1),%ZMM7 |
(192) 0x42f6bc VMOVUPD (%RDI,%RDX,1),%ZMM0 |
(192) 0x42f6c3 VMOVQ %XMM6,%RBX |
(192) 0x42f6c8 VMOVUPD (%R11,%RDX,1),%ZMM5 |
(192) 0x42f6cf VMULPD (%R13,%RDX,1),%ZMM7,%ZMM8 |
(192) 0x42f6d7 VMULPD (%RAX,%RDX,1),%ZMM0,%ZMM26 |
(192) 0x42f6de VMOVUPD (%R15,%RDX,1),%ZMM3 |
(192) 0x42f6e5 VFMADD231PD (%RBX,%RDX,1),%ZMM5,%ZMM8 |
(192) 0x42f6ec VFMADD231PD (%R14,%RDX,1),%ZMM3,%ZMM26 |
(192) 0x42f6f3 VMOVQ %XMM1,%RSI |
(192) 0x42f6f8 VADDPD %ZMM26,%ZMM8,%ZMM9 |
(192) 0x42f6fe VMULPD %ZMM16,%ZMM9,%ZMM12 |
(192) 0x42f704 VMOVUPD %ZMM12,(%RSI,%RDX,1) |
(192) 0x42f70b VMOVUPD (%R10,%RDX,1),%ZMM13 |
(192) 0x42f712 VSUBPD (%R9,%RDX,1),%ZMM13,%ZMM27 |
(192) 0x42f719 VADDPD %ZMM12,%ZMM27,%ZMM14 |
(192) 0x42f71f VMOVUPD %ZMM14,(%R8,%RDX,1) |
(192) 0x42f726 VMOVUPD 0x40(%RCX,%RDX,1),%ZMM15 |
(192) 0x42f72e VMOVUPD 0x40(%RDI,%RDX,1),%ZMM5 |
(192) 0x42f736 VMOVUPD 0x40(%R11,%RDX,1),%ZMM7 |
(192) 0x42f73e VMULPD 0x40(%R13,%RDX,1),%ZMM15,%ZMM8 |
(192) 0x42f746 VMULPD 0x40(%RAX,%RDX,1),%ZMM5,%ZMM28 |
(192) 0x42f74e VMOVUPD 0x40(%R15,%RDX,1),%ZMM0 |
(192) 0x42f756 VFMADD231PD 0x40(%RBX,%RDX,1),%ZMM7,%ZMM8 |
(192) 0x42f75e VFMADD231PD 0x40(%R14,%RDX,1),%ZMM0,%ZMM28 |
(192) 0x42f766 VADDPD %ZMM28,%ZMM8,%ZMM3 |
(192) 0x42f76c MOV 0x50(%RSP),%RBX |
(192) 0x42f771 VMULPD %ZMM16,%ZMM3,%ZMM9 |
(192) 0x42f777 VMOVUPD %ZMM9,0x40(%RSI,%RDX,1) |
(192) 0x42f77f VMOVUPD 0x40(%R10,%RDX,1),%ZMM12 |
(192) 0x42f787 VSUBPD 0x40(%R9,%RDX,1),%ZMM12,%ZMM29 |
(192) 0x42f78f VADDPD %ZMM9,%ZMM29,%ZMM13 |
(192) 0x42f795 VMOVUPD %ZMM13,0x40(%R8,%RDX,1) |
(192) 0x42f79d SUB $-0x80,%RDX |
(192) 0x42f7a1 CMP %RBX,%RDX |
(192) 0x42f7a4 JNE 42f6b5 |
(191) 0x42f7aa MOV 0x10(%RSP),%R11D |
(191) 0x42f7af MOV 0xc(%RSP),%ECX |
(191) 0x42f7b3 VMOVQ %XMM4,%RBX |
(191) 0x42f7b8 MOV 0x44(%RSP),%R13D |
(191) 0x42f7bd MOV %ECX,%EDX |
(191) 0x42f7bf AND $-0x8,%EDX |
(191) 0x42f7c2 ADD %EDX,%R11D |
(191) 0x42f7c5 ADD %EDX,%R13D |
(191) 0x42f7c8 MOV %R13D,0x50(%RSP) |
(191) 0x42f7cd TEST $0x7,%CL |
(191) 0x42f7d0 JE 42fb14 |
(191) 0x42f7d6 SUB %EDX,%ECX |
(191) 0x42f7d8 LEA -0x1(%RCX),%R15D |
(191) 0x42f7dc CMP $0x2,%R15D |
(191) 0x42f7e0 JBE 42f8cb |
(191) 0x42f7e6 MOVSXD 0x44(%RSP),%RAX |
(191) 0x42f7eb MOV 0x58(%RSP),%RDI |
(191) 0x42f7f0 MOV 0x60(%RSP),%R10 |
(191) 0x42f7f5 MOV 0x38(%RSP),%R14 |
(191) 0x42f7fa MOV 0x30(%RSP),%RSI |
(191) 0x42f7ff MOV 0x70(%RSP),%R13 |
(191) 0x42f804 LEA (%RDI,%RAX,1),%R8 |
(191) 0x42f808 MOV 0x68(%RSP),%RDI |
(191) 0x42f80d LEA (%R10,%RAX,1),%R10 |
(191) 0x42f811 LEA (%R14,%RAX,1),%R9 |
(191) 0x42f815 ADD %RDX,%R10 |
(191) 0x42f818 ADD %RDX,%R8 |
(191) 0x42f81b MOV 0x78(%RSP),%R14 |
(191) 0x42f820 ADD %RDX,%R9 |
(191) 0x42f823 VMOVUPD (%RBX,%R10,8),%YMM6 |
(191) 0x42f829 LEA (%RSI,%RAX,1),%R15 |
(191) 0x42f82d MOV 0x48(%RSP),%RSI |
(191) 0x42f832 VMOVUPD (%RBX,%R9,8),%YMM1 |
(191) 0x42f838 VMOVUPD -0x8(%RBX,%R9,8),%YMM8 |
(191) 0x42f83f LEA (%R13,%RAX,1),%R13 |
(191) 0x42f844 ADD %RDX,%R15 |
(191) 0x42f847 ADD %RDX,%R13 |
(191) 0x42f84a VMOVQ %XMM2,%R9 |
(191) 0x42f84f ADD %RAX,%RDI |
(191) 0x42f852 ADD %RDX,%RDI |
(191) 0x42f855 LEA (%R14,%RAX,1),%R14 |
(191) 0x42f859 SAL $0x3,%RDI |
(191) 0x42f85d VMULPD (%R12,%RDI,1),%YMM6,%YMM14 |
(191) 0x42f863 VMOVUPD -0x8(%R12,%RDI,1),%YMM4 |
(191) 0x42f86a ADD %RSI,%RAX |
(191) 0x42f86d ADD %RDX,%R14 |
(191) 0x42f870 VMULPD -0x8(%RBX,%R10,8),%YMM4,%YMM15 |
(191) 0x42f877 VFMADD231PD (%R12,%R8,8),%YMM1,%YMM14 |
(191) 0x42f87d ADD %RDX,%RAX |
(191) 0x42f880 VMOVQ %XMM11,%RDX |
(191) 0x42f885 VFMADD231PD -0x8(%R12,%R8,8),%YMM8,%YMM15 |
(191) 0x42f88c VADDPD %YMM15,%YMM14,%YMM7 |
(191) 0x42f891 VMOVQ %XMM10,%R8 |
(191) 0x42f896 VMULPD %YMM23,%YMM7,%YMM5 |
(191) 0x42f89c VMOVUPD %YMM5,(%RDX,%R15,8) |
(191) 0x42f8a2 VMOVUPD (%R9,%R14,8),%YMM0 |
(191) 0x42f8a8 VSUBPD (%R9,%R13,8),%YMM0,%YMM3 |
(191) 0x42f8ae VADDPD %YMM5,%YMM3,%YMM9 |
(191) 0x42f8b2 VMOVUPD %YMM9,(%R8,%RAX,8) |
(191) 0x42f8b8 TEST $0x3,%CL |
(191) 0x42f8bb JE 42fb14 |
(191) 0x42f8c1 AND $-0x4,%ECX |
(191) 0x42f8c4 ADD %ECX,0x50(%RSP) |
(191) 0x42f8c8 ADD %ECX,%R11D |
(191) 0x42f8cb MOVSXD 0x50(%RSP),%RAX |
(191) 0x42f8d0 MOV 0x68(%RSP),%RSI |
(191) 0x42f8d5 MOV 0x60(%RSP),%R14 |
(191) 0x42f8da MOV 0x38(%RSP),%R15 |
(191) 0x42f8df MOV 0x58(%RSP),%R13 |
(191) 0x42f8e4 LEA (%RSI,%RAX,1),%RDX |
(191) 0x42f8e8 LEA -0x1(%RAX),%R10D |
(191) 0x42f8ec ADD %RAX,%R14 |
(191) 0x42f8ef LEA (%R12,%RDX,8),%R9 |
(191) 0x42f8f3 MOVSXD %R10D,%RDX |
(191) 0x42f8f6 LEA (%RBX,%R14,8),%R8 |
(191) 0x42f8fa LEA (%R13,%RAX,1),%RDI |
(191) 0x42f8ff LEA (%RDX,%RSI,1),%R14 |
(191) 0x42f903 MOV 0x60(%RSP),%RSI |
(191) 0x42f908 LEA (%RDX,%R13,1),%R13 |
(191) 0x42f90c LEA (%RDX,%R15,1),%R10 |
(191) 0x42f910 VMOVSD (%R12,%R14,8),%XMM14 |
(191) 0x42f916 LEA (%R15,%RAX,1),%RCX |
(191) 0x42f91a VMOVSD (%R12,%R13,8),%XMM4 |
(191) 0x42f920 LEA (%R12,%RDI,8),%RDI |
(191) 0x42f924 VMOVQ %XMM11,%R13 |
(191) 0x42f929 LEA (%RBX,%RCX,8),%RCX |
(191) 0x42f92d VMOVSD (%R9),%XMM12 |
(191) 0x42f932 VMOVSD (%RDI),%XMM6 |
(191) 0x42f936 VMULSD (%R8),%XMM12,%XMM13 |
(191) 0x42f93b MOV 0x48(%RSP),%R14 |
(191) 0x42f940 VFMADD231SD (%RCX),%XMM6,%XMM13 |
(191) 0x42f945 ADD %RSI,%RDX |
(191) 0x42f948 VMULSD (%RBX,%RDX,8),%XMM14,%XMM1 |
(191) 0x42f94d MOV 0x30(%RSP),%RDX |
(191) 0x42f952 VFMADD231SD (%RBX,%R10,8),%XMM4,%XMM1 |
(191) 0x42f958 VADDSD %XMM1,%XMM13,%XMM15 |
(191) 0x42f95c VMULSD %XMM22,%XMM15,%XMM8 |
(191) 0x42f962 LEA (%RDX,%RAX,1),%R10 |
(191) 0x42f966 LEA (%R14,%RAX,1),%RDX |
(191) 0x42f96a VMOVQ %XMM2,%R14 |
(191) 0x42f96f VMOVSD %XMM8,(%R13,%R10,8) |
(191) 0x42f976 MOV 0x70(%RSP),%R13 |
(191) 0x42f97b MOV 0x78(%RSP),%R10 |
(191) 0x42f980 ADD %RAX,%R10 |
(191) 0x42f983 ADD %R13,%RAX |
(191) 0x42f986 LEA 0x1(%R11),%R13D |
(191) 0x42f98a VMOVSD (%R14,%R10,8),%XMM7 |
(191) 0x42f990 VSUBSD (%R14,%RAX,8),%XMM7,%XMM5 |
(191) 0x42f996 VMOVQ %XMM10,%RAX |
(191) 0x42f99b VADDSD %XMM8,%XMM5,%XMM0 |
(191) 0x42f9a0 MOV 0x2c(%RSP),%R14D |
(191) 0x42f9a5 VMOVSD %XMM0,(%RAX,%RDX,8) |
(191) 0x42f9aa MOV 0x50(%RSP),%EDX |
(191) 0x42f9ae LEA 0x1(%RDX),%EAX |
(191) 0x42f9b1 CMP %R14D,%R13D |
(191) 0x42f9b4 JAE 42fb14 |
(191) 0x42f9ba MOV 0x58(%RSP),%R10 |
(191) 0x42f9bf MOV 0x68(%RSP),%R14 |
(191) 0x42f9c4 VMOVSD (%R9),%XMM13 |
(191) 0x42f9c9 MOV 0x30(%RSP),%R9 |
(191) 0x42f9ce CLTQ |
(191) 0x42f9d0 VMULSD (%R8),%XMM13,%XMM14 |
(191) 0x42f9d5 VMOVSD (%RDI),%XMM6 |
(191) 0x42f9d9 VMOVQ %XMM11,%RDI |
(191) 0x42f9de LEA (%R15,%RAX,1),%R15 |
(191) 0x42f9e2 LEA (%RSI,%RAX,1),%RSI |
(191) 0x42f9e6 VFMADD231SD (%RCX),%XMM6,%XMM14 |
(191) 0x42f9eb MOV 0x48(%RSP),%R8 |
(191) 0x42f9f0 LEA (%RBX,%R15,8),%R13 |
(191) 0x42f9f4 LEA (%RBX,%RSI,8),%R15 |
(191) 0x42f9f8 ADD $0x2,%R11D |
(191) 0x42f9fc LEA (%R10,%RAX,1),%RDX |
(191) 0x42fa00 LEA (%R9,%RAX,1),%RCX |
(191) 0x42fa04 LEA (%R12,%RDX,8),%R10 |
(191) 0x42fa08 LEA (%R14,%RAX,1),%RDX |
(191) 0x42fa0c LEA (%R12,%RDX,8),%R14 |
(191) 0x42fa10 VMOVSD (%R10),%XMM12 |
(191) 0x42fa15 MOV 0x78(%RSP),%RDX |
(191) 0x42fa1a LEA (%R8,%RAX,1),%RSI |
(191) 0x42fa1e VMOVSD (%R14),%XMM3 |
(191) 0x42fa23 VMULSD (%R15),%XMM3,%XMM9 |
(191) 0x42fa28 VFMADD231SD (%R13),%XMM12,%XMM9 |
(191) 0x42fa2e VADDSD %XMM9,%XMM14,%XMM1 |
(191) 0x42fa33 VMULSD %XMM22,%XMM1,%XMM15 |
(191) 0x42fa39 VMOVSD %XMM15,(%RDI,%RCX,8) |
(191) 0x42fa3e MOV 0x70(%RSP),%RCX |
(191) 0x42fa43 VMOVQ %XMM2,%RDI |
(191) 0x42fa48 ADD %RAX,%RDX |
(191) 0x42fa4b VMOVSD (%RDI,%RDX,8),%XMM4 |
(191) 0x42fa50 ADD %RCX,%RAX |
(191) 0x42fa53 MOV 0x2c(%RSP),%ECX |
(191) 0x42fa57 VSUBSD (%RDI,%RAX,8),%XMM4,%XMM8 |
(191) 0x42fa5c VMOVQ %XMM10,%RAX |
(191) 0x42fa61 VADDSD %XMM15,%XMM8,%XMM7 |
(191) 0x42fa66 VMOVSD %XMM7,(%RAX,%RSI,8) |
(191) 0x42fa6b MOV 0x50(%RSP),%ESI |
(191) 0x42fa6f ADD $0x2,%ESI |
(191) 0x42fa72 CMP %ECX,%R11D |
(191) 0x42fa75 JAE 42fb14 |
(191) 0x42fa7b MOVSXD %ESI,%R11 |
(191) 0x42fa7e MOV 0x60(%RSP),%RDX |
(191) 0x42fa83 MOV 0x68(%RSP),%RSI |
(191) 0x42fa88 MOV 0x38(%RSP),%RDI |
(191) 0x42fa8d MOV 0x58(%RSP),%RAX |
(191) 0x42fa92 VMOVSD (%R13),%XMM13 |
(191) 0x42fa98 MOV 0x78(%RSP),%R13 |
(191) 0x42fa9d VMOVSD (%R15),%XMM9 |
(191) 0x42faa2 ADD %R11,%R9 |
(191) 0x42faa5 VMULSD (%R14),%XMM9,%XMM12 |
(191) 0x42faaa ADD %R11,%R8 |
(191) 0x42faad VFMADD231SD (%R10),%XMM13,%XMM12 |
(191) 0x42fab2 VMOVQ %XMM10,%R10 |
(191) 0x42fab7 ADD %R11,%RSI |
(191) 0x42faba ADD %R11,%RDX |
(191) 0x42fabd ADD %R11,%RDI |
(191) 0x42fac0 VMOVSD (%RBX,%RDX,8),%XMM5 |
(191) 0x42fac5 VMULSD (%R12,%RSI,8),%XMM5,%XMM0 |
(191) 0x42facb MOV 0x70(%RSP),%RSI |
(191) 0x42fad0 ADD %R11,%RAX |
(191) 0x42fad3 VMOVSD (%RBX,%RDI,8),%XMM3 |
(191) 0x42fad8 ADD %R11,%R13 |
(191) 0x42fadb VFMADD231SD (%R12,%RAX,8),%XMM3,%XMM0 |
(191) 0x42fae1 VMOVQ %XMM2,%RBX |
(191) 0x42fae6 VMOVQ %XMM11,%R12 |
(191) 0x42faeb VADDSD %XMM12,%XMM0,%XMM14 |
(191) 0x42faf0 VMULSD %XMM22,%XMM14,%XMM6 |
(191) 0x42faf6 VMOVSD %XMM6,(%R12,%R9,8) |
(191) 0x42fafc VMOVSD (%RBX,%R13,8),%XMM11 |
(191) 0x42fb02 ADD %R11,%RSI |
(191) 0x42fb05 VSUBSD (%RBX,%RSI,8),%XMM11,%XMM2 |
(191) 0x42fb0a VADDSD %XMM6,%XMM2,%XMM1 |
(191) 0x42fb0e VMOVSD %XMM1,(%R10,%R8,8) |
(191) 0x42fb14 MOV 0x2c(%RSP),%R11D |
(191) 0x42fb19 INCL 0x40(%RSP) |
(191) 0x42fb1d MOV 0x40(%RSP),%R15D |
(191) 0x42fb22 INCQ 0x20(%RSP) |
(191) 0x42fb27 CMP %R15D,0x1c(%RSP) |
(191) 0x42fb2c JLE 42fb50 |
(191) 0x42fb2e MOV 0x14(%RSP),%ECX |
(191) 0x42fb32 MOV 0x18(%RSP),%R14D |
(191) 0x42fb37 MOV 0x28(%RSP),%EDI |
(191) 0x42fb3b SUB %R11D,%ECX |
(191) 0x42fb3e MOV %R14D,0x44(%RSP) |
(191) 0x42fb43 JMP 42f4a0 |
0x42fb48 NOPL (%RAX,%RAX,1) |
0x42fb50 VZEROUPPER |
0x42fb53 LEA -0x28(%RBP),%RSP |
0x42fb57 POP %RBX |
0x42fb58 POP %R12 |
0x42fb5a POP %R13 |
0x42fb5c POP %R14 |
0x42fb5e POP %R15 |
0x42fb60 POP %RBP |
0x42fb61 RET |
0x42fb62 NOPW %CS:(%RAX,%RAX,1) |
0x42fb6d NOPL (%RAX) |
(191) 0x42fb70 MOV 0x44(%RSP),%R8D |
(191) 0x42fb75 XOR %EDX,%EDX |
(191) 0x42fb77 MOV %R8D,0x50(%RSP) |
(191) 0x42fb7c JMP 42f7d6 |
0x42fb81 INC %ECX |
0x42fb83 XOR %EDX,%EDX |
0x42fb85 JMP 42f41c |
0x42fb8a NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 82 |
nb uops | 82 |
loop length | 311 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 8 |
micro-operation queue | 13.67 cycles |
front end | 13.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.75 | 6.75 | 6.75 | 6.75 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.75 | 6.75 | 6.75 | 6.75 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.67 |
Dispatch | 7.33 |
DIV/SQRT | 12.00 |
Overall L1 | 13.67 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RBX),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ECX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EAX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ECX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x28(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42fb81 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7e1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R8D,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x18(%RSP),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x28(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x3134d(%RIP),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R14D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %EAX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42f41c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 82 |
nb uops | 82 |
loop length | 311 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 8 |
micro-operation queue | 13.67 cycles |
front end | 13.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.75 | 6.75 | 6.75 | 6.75 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.75 | 6.75 | 6.75 | 6.75 | 6.00 | 7.33 | 7.33 | 7.33 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.67 |
Dispatch | 7.33 |
DIV/SQRT | 12.00 |
Overall L1 | 13.67 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 10% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RBX),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ECX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EAX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ECX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x28(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42fb81 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7e1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R8D,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42fb53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7b3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x18(%RSP),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x28(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x3134d(%RIP),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM22,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R14D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %EAX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42f41c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9– | 3.06 | 1.54 |
▼Loop 191 - advec_mom.cpp:169-172 - exec– | 0 | 0 |
○Loop 192 - advec_mom.cpp:170-172 - exec | 3.05 | 1.54 |