Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.63% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.63% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 218 - 221 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for simd collapse(2) |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
0x42e670 PUSH %RBP |
0x42e671 MOV %RSP,%RBP |
0x42e674 PUSH %R15 |
0x42e676 PUSH %R14 |
0x42e678 PUSH %R13 |
0x42e67a PUSH %R12 |
0x42e67c PUSH %RBX |
0x42e67d AND $-0x40,%RSP |
0x42e681 ADD $-0x80,%RSP |
0x42e685 MOV 0x28(%RDI),%EAX |
0x42e688 MOV 0x2c(%RDI),%EDX |
0x42e68b MOV 0x20(%RDI),%ESI |
0x42e68e MOV 0x24(%RDI),%EBX |
0x42e691 ADD $0x3,%EDX |
0x42e694 LEA 0x1(%RAX),%R15D |
0x42e698 INC %ESI |
0x42e69a MOV %EDX,0x3c(%RSP) |
0x42e69e MOV %ESI,0x38(%RSP) |
0x42e6a2 CMP %EDX,%R15D |
0x42e6a5 JGE 42eb73 |
0x42e6ab LEA 0x3(%RBX),%R14D |
0x42e6af MOV %EDX,%EBX |
0x42e6b1 SUB %R15D,%EBX |
0x42e6b4 CMP %R14D,%ESI |
0x42e6b7 JGE 42eb73 |
0x42e6bd MOV %R14D,%ECX |
0x42e6c0 MOV %RDI,%R12 |
0x42e6c3 SUB %ESI,%ECX |
0x42e6c5 MOV %ECX,0x58(%RSP) |
0x42e6c9 CALL 404650 <omp_get_num_threads@plt> |
0x42e6ce MOV %EAX,%R13D |
0x42e6d1 CALL 404540 <omp_get_thread_num@plt> |
0x42e6d6 XOR %EDX,%EDX |
0x42e6d8 MOV %EAX,%EDI |
0x42e6da MOV 0x58(%RSP),%EAX |
0x42e6de IMUL %EBX,%EAX |
0x42e6e1 DIV %R13D |
0x42e6e4 MOV %EAX,%ECX |
0x42e6e6 CMP %EDX,%EDI |
0x42e6e8 JB 42eb93 |
0x42e6ee IMUL %ECX,%EDI |
0x42e6f1 LEA (%RDI,%RDX,1),%R11D |
0x42e6f5 LEA (%RCX,%R11,1),%R8D |
0x42e6f9 MOV %R8D,0x34(%RSP) |
0x42e6fe CMP %R8D,%R11D |
0x42e701 JAE 42eb73 |
0x42e707 MOV %R11D,%EAX |
0x42e70a XOR %EDX,%EDX |
0x42e70c MOV 0x38(%RSP),%R9D |
0x42e711 MOV (%R12),%R10 |
0x42e715 DIVL 0x58(%RSP) |
0x42e719 MOV 0x18(%R12),%RSI |
0x42e71e MOV %R10,0x28(%RSP) |
0x42e723 MOV %RSI,0x18(%RSP) |
0x42e728 ADD %EDX,%R9D |
0x42e72b ADD %R15D,%EAX |
0x42e72e MOV %R14D,%EDX |
0x42e731 MOV 0x10(%R12),%R15 |
0x42e736 MOV 0x8(%R12),%R14 |
0x42e73b MOV %R9D,0x74(%RSP) |
0x42e740 SUB %R9D,%EDX |
0x42e743 MOVSXD %EAX,%R12 |
0x42e746 MOV %EAX,0x70(%RSP) |
0x42e74a MOV %R15,0x20(%RSP) |
0x42e74f MOV %R14,0x10(%RSP) |
0x42e754 NOPL (%RAX) |
(196) 0x42e758 CMP %EDX,%ECX |
(196) 0x42e75a CMOVBE %ECX,%EDX |
(196) 0x42e75d LEA (%R11,%RDX,1),%EBX |
(196) 0x42e761 MOV %EBX,0x5c(%RSP) |
(196) 0x42e765 CMP %EBX,%R11D |
(196) 0x42e768 JAE 42eb44 |
(196) 0x42e76e MOV 0x20(%RSP),%RDI |
(196) 0x42e773 MOV 0x28(%RSP),%RCX |
(196) 0x42e778 MOV 0x18(%RSP),%R8 |
(196) 0x42e77d MOV 0x70(%RSP),%EAX |
(196) 0x42e781 MOV (%RDI),%RBX |
(196) 0x42e784 MOV 0x10(%RDI),%R14 |
(196) 0x42e788 MOV 0x10(%RSP),%RDI |
(196) 0x42e78d MOV (%R8),%R9 |
(196) 0x42e790 DEC %EAX |
(196) 0x42e792 MOV (%RCX),%R13 |
(196) 0x42e795 MOV 0x10(%R8),%RSI |
(196) 0x42e799 MOVSXD %EAX,%R10 |
(196) 0x42e79c IMUL %R12,%RBX |
(196) 0x42e7a0 MOV (%RDI),%R8 |
(196) 0x42e7a3 IMUL %R9,%R10 |
(196) 0x42e7a7 MOV 0x10(%RCX),%R15 |
(196) 0x42e7ab LEA -0x1(%RDX),%EAX |
(196) 0x42e7ae IMUL %R12,%R13 |
(196) 0x42e7b2 MOV 0x10(%RDI),%RCX |
(196) 0x42e7b6 IMUL %R12,%R9 |
(196) 0x42e7ba MOV %RBX,0x48(%RSP) |
(196) 0x42e7bf IMUL %R12,%R8 |
(196) 0x42e7c3 MOV %R10,0x50(%RSP) |
(196) 0x42e7c8 MOV %R13,0x40(%RSP) |
(196) 0x42e7cd MOV %R9,0x68(%RSP) |
(196) 0x42e7d2 MOV %RCX,0x78(%RSP) |
(196) 0x42e7d7 MOV %R8,0x60(%RSP) |
(196) 0x42e7dc CMP $0x6,%EAX |
(196) 0x42e7df JBE 42eb88 |
(196) 0x42e7e5 MOVSXD 0x74(%RSP),%RAX |
(196) 0x42e7ea LEA (%RAX,%R13,1),%R13 |
(196) 0x42e7ee ADD %RAX,%RBX |
(196) 0x42e7f1 LEA (%RAX,%R9,1),%R9 |
(196) 0x42e7f5 LEA (%R15,%R13,8),%RCX |
(196) 0x42e7f9 LEA (%R14,%RBX,8),%R13 |
(196) 0x42e7fd LEA (%RAX,%R10,1),%RBX |
(196) 0x42e801 ADD %R8,%RAX |
(196) 0x42e804 MOV 0x78(%RSP),%R8 |
(196) 0x42e809 LEA (%RSI,%RBX,8),%R10 |
(196) 0x42e80d LEA (%RSI,%R9,8),%RBX |
(196) 0x42e811 LEA (%R8,%RAX,8),%R9 |
(196) 0x42e815 MOV %EDX,%R8D |
(196) 0x42e818 XOR %EAX,%EAX |
(196) 0x42e81a SHR $0x3,%R8D |
(196) 0x42e81e SAL $0x6,%R8 |
(196) 0x42e822 LEA -0x40(%R8),%RDI |
(196) 0x42e826 SHR $0x6,%RDI |
(196) 0x42e82a INC %RDI |
(196) 0x42e82d AND $0x3,%EDI |
(196) 0x42e830 JE 42e8d3 |
(196) 0x42e836 CMP $0x1,%RDI |
(196) 0x42e83a JE 42e89b |
(196) 0x42e83c CMP $0x2,%RDI |
(196) 0x42e840 JE 42e86c |
(196) 0x42e842 VMOVUPD (%RCX),%ZMM0 |
(196) 0x42e848 VMOVUPD (%RBX),%ZMM3 |
(196) 0x42e84e MOV $0x40,%EAX |
(196) 0x42e853 VFMSUB132PD (%R13),%ZMM3,%ZMM0 |
(196) 0x42e85a VADDPD (%R10),%ZMM0,%ZMM1 |
(196) 0x42e860 VDIVPD (%R9),%ZMM1,%ZMM2 |
(196) 0x42e866 VMOVUPD %ZMM2,(%RCX) |
(196) 0x42e86c VMOVUPD (%RCX,%RAX,1),%ZMM4 |
(196) 0x42e873 VMOVUPD (%RBX,%RAX,1),%ZMM5 |
(196) 0x42e87a VFMSUB132PD (%R13,%RAX,1),%ZMM5,%ZMM4 |
(196) 0x42e882 VADDPD (%R10,%RAX,1),%ZMM4,%ZMM6 |
(196) 0x42e889 VDIVPD (%R9,%RAX,1),%ZMM6,%ZMM7 |
(196) 0x42e890 VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(196) 0x42e897 ADD $0x40,%RAX |
(196) 0x42e89b VMOVUPD (%RCX,%RAX,1),%ZMM8 |
(196) 0x42e8a2 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(196) 0x42e8a9 VFMSUB132PD (%R13,%RAX,1),%ZMM9,%ZMM8 |
(196) 0x42e8b1 VADDPD (%R10,%RAX,1),%ZMM8,%ZMM10 |
(196) 0x42e8b8 VDIVPD (%R9,%RAX,1),%ZMM10,%ZMM11 |
(196) 0x42e8bf VMOVUPD %ZMM11,(%RCX,%RAX,1) |
(196) 0x42e8c6 ADD $0x40,%RAX |
(196) 0x42e8ca CMP %RAX,%R8 |
(196) 0x42e8cd JE 42e99d |
(197) 0x42e8d3 VMOVUPD (%RBX,%RAX,1),%ZMM13 |
(197) 0x42e8da VMOVUPD (%RCX,%RAX,1),%ZMM12 |
(197) 0x42e8e1 VMOVUPD 0x40(%RCX,%RAX,1),%ZMM0 |
(197) 0x42e8e9 VMOVUPD 0x80(%RCX,%RAX,1),%ZMM5 |
(197) 0x42e8f1 VFMSUB132PD (%R13,%RAX,1),%ZMM13,%ZMM12 |
(197) 0x42e8f9 VMOVUPD 0xc0(%RCX,%RAX,1),%ZMM8 |
(197) 0x42e901 VADDPD (%R10,%RAX,1),%ZMM12,%ZMM14 |
(197) 0x42e908 VDIVPD (%R9,%RAX,1),%ZMM14,%ZMM15 |
(197) 0x42e90f VMOVUPD %ZMM15,(%RCX,%RAX,1) |
(197) 0x42e916 VMOVUPD 0x40(%RBX,%RAX,1),%ZMM3 |
(197) 0x42e91e VFMSUB132PD 0x40(%R13,%RAX,1),%ZMM3,%ZMM0 |
(197) 0x42e926 VADDPD 0x40(%R10,%RAX,1),%ZMM0,%ZMM1 |
(197) 0x42e92e VDIVPD 0x40(%R9,%RAX,1),%ZMM1,%ZMM2 |
(197) 0x42e936 VMOVUPD %ZMM2,0x40(%RCX,%RAX,1) |
(197) 0x42e93e VMOVUPD 0x80(%RBX,%RAX,1),%ZMM4 |
(197) 0x42e946 VFMSUB132PD 0x80(%R13,%RAX,1),%ZMM4,%ZMM5 |
(197) 0x42e94e VADDPD 0x80(%R10,%RAX,1),%ZMM5,%ZMM6 |
(197) 0x42e956 VDIVPD 0x80(%R9,%RAX,1),%ZMM6,%ZMM7 |
(197) 0x42e95e VMOVUPD %ZMM7,0x80(%RCX,%RAX,1) |
(197) 0x42e966 VMOVUPD 0xc0(%RBX,%RAX,1),%ZMM9 |
(197) 0x42e96e VFMSUB132PD 0xc0(%R13,%RAX,1),%ZMM9,%ZMM8 |
(197) 0x42e976 VADDPD 0xc0(%R10,%RAX,1),%ZMM8,%ZMM10 |
(197) 0x42e97e VDIVPD 0xc0(%R9,%RAX,1),%ZMM10,%ZMM11 |
(197) 0x42e986 VMOVUPD %ZMM11,0xc0(%RCX,%RAX,1) |
(197) 0x42e98e ADD $0x100,%RAX |
(197) 0x42e994 CMP %RAX,%R8 |
(197) 0x42e997 JNE 42e8d3 |
(196) 0x42e99d MOV 0x74(%RSP),%R13D |
(196) 0x42e9a2 MOV %EDX,%ECX |
(196) 0x42e9a4 AND $-0x8,%ECX |
(196) 0x42e9a7 ADD %ECX,%R11D |
(196) 0x42e9aa LEA (%RCX,%R13,1),%EDI |
(196) 0x42e9ae TEST $0x7,%DL |
(196) 0x42e9b1 JE 42eb3f |
(196) 0x42e9b7 SUB %ECX,%EDX |
(196) 0x42e9b9 LEA -0x1(%RDX),%R10D |
(196) 0x42e9bd CMP $0x2,%R10D |
(196) 0x42e9c1 JBE 42ea3a |
(196) 0x42e9c3 MOVSXD 0x74(%RSP),%RAX |
(196) 0x42e9c8 MOV 0x40(%RSP),%RBX |
(196) 0x42e9cd MOV 0x60(%RSP),%R8 |
(196) 0x42e9d2 MOV 0x48(%RSP),%R10 |
(196) 0x42e9d7 LEA (%RBX,%RAX,1),%R9 |
(196) 0x42e9db MOV 0x68(%RSP),%R13 |
(196) 0x42e9e0 ADD %RCX,%R9 |
(196) 0x42e9e3 LEA (%R8,%RAX,1),%R8 |
(196) 0x42e9e7 ADD %RAX,%R10 |
(196) 0x42e9ea LEA (%R15,%R9,8),%RBX |
(196) 0x42e9ee MOV 0x50(%RSP),%R9 |
(196) 0x42e9f3 ADD %RCX,%R10 |
(196) 0x42e9f6 ADD %RCX,%R8 |
(196) 0x42e9f9 VMOVUPD (%RBX),%YMM12 |
(196) 0x42e9fd ADD %RAX,%R9 |
(196) 0x42ea00 ADD %R13,%RAX |
(196) 0x42ea03 ADD %RCX,%RAX |
(196) 0x42ea06 ADD %RCX,%R9 |
(196) 0x42ea09 MOV 0x78(%RSP),%RCX |
(196) 0x42ea0e VMOVUPD (%RSI,%RAX,8),%YMM13 |
(196) 0x42ea13 VFMSUB132PD (%R14,%R10,8),%YMM13,%YMM12 |
(196) 0x42ea19 VADDPD (%RSI,%R9,8),%YMM12,%YMM14 |
(196) 0x42ea1f VDIVPD (%RCX,%R8,8),%YMM14,%YMM15 |
(196) 0x42ea25 VMOVUPD %YMM15,(%RBX) |
(196) 0x42ea29 TEST $0x3,%DL |
(196) 0x42ea2c JE 42eb3f |
(196) 0x42ea32 AND $-0x4,%EDX |
(196) 0x42ea35 ADD %EDX,%R11D |
(196) 0x42ea38 ADD %EDX,%EDI |
(196) 0x42ea3a MOV 0x48(%RSP),%R13 |
(196) 0x42ea3f MOV 0x68(%RSP),%R8 |
(196) 0x42ea44 MOVSXD %EDI,%RAX |
(196) 0x42ea47 MOV 0x40(%RSP),%RBX |
(196) 0x42ea4c MOV 0x50(%RSP),%R10 |
(196) 0x42ea51 LEA (%R13,%RAX,1),%R9 |
(196) 0x42ea56 ADD %RAX,%R8 |
(196) 0x42ea59 VMOVSD (%R14,%R9,8),%XMM0 |
(196) 0x42ea5f VMOVSD (%RSI,%R8,8),%XMM3 |
(196) 0x42ea65 LEA (%RBX,%RAX,1),%RDX |
(196) 0x42ea69 LEA (%R10,%RAX,1),%RCX |
(196) 0x42ea6d LEA (%R15,%RDX,8),%RDX |
(196) 0x42ea71 MOV 0x78(%RSP),%R9 |
(196) 0x42ea76 MOV 0x5c(%RSP),%R8D |
(196) 0x42ea7b VFMSUB132SD (%RDX),%XMM3,%XMM0 |
(196) 0x42ea80 VADDSD (%RSI,%RCX,8),%XMM0,%XMM1 |
(196) 0x42ea85 MOV 0x60(%RSP),%RCX |
(196) 0x42ea8a ADD %RCX,%RAX |
(196) 0x42ea8d VDIVSD (%R9,%RAX,8),%XMM1,%XMM2 |
(196) 0x42ea93 VMOVSD %XMM2,(%RDX) |
(196) 0x42ea97 LEA 0x1(%R11),%EDX |
(196) 0x42ea9b LEA 0x1(%RDI),%EAX |
(196) 0x42ea9e CMP %R8D,%EDX |
(196) 0x42eaa1 JAE 42eb3f |
(196) 0x42eaa7 MOV 0x68(%RSP),%R8 |
(196) 0x42eaac CLTQ |
(196) 0x42eaae ADD $0x2,%R11D |
(196) 0x42eab2 ADD $0x2,%EDI |
(196) 0x42eab5 LEA (%R13,%RAX,1),%R9 |
(196) 0x42eaba LEA (%RBX,%RAX,1),%RCX |
(196) 0x42eabe ADD %RAX,%R8 |
(196) 0x42eac1 VMOVSD (%R14,%R9,8),%XMM5 |
(196) 0x42eac7 LEA (%R15,%RCX,8),%RDX |
(196) 0x42eacb LEA (%R10,%RAX,1),%RCX |
(196) 0x42eacf VMOVSD (%RSI,%R8,8),%XMM4 |
(196) 0x42ead5 MOV 0x60(%RSP),%R9 |
(196) 0x42eada VFMSUB132SD (%RDX),%XMM4,%XMM5 |
(196) 0x42eadf ADD %R9,%RAX |
(196) 0x42eae2 VADDSD (%RSI,%RCX,8),%XMM5,%XMM6 |
(196) 0x42eae7 MOV 0x78(%RSP),%RCX |
(196) 0x42eaec VDIVSD (%RCX,%RAX,8),%XMM6,%XMM7 |
(196) 0x42eaf1 MOV 0x5c(%RSP),%EAX |
(196) 0x42eaf5 VMOVSD %XMM7,(%RDX) |
(196) 0x42eaf9 CMP %EAX,%R11D |
(196) 0x42eafc JAE 42eb3f |
(196) 0x42eafe MOVSXD %EDI,%R11 |
(196) 0x42eb01 MOV 0x68(%RSP),%RDI |
(196) 0x42eb06 ADD %R11,%RBX |
(196) 0x42eb09 ADD %R11,%R13 |
(196) 0x42eb0c ADD %R11,%R10 |
(196) 0x42eb0f ADD %R11,%R9 |
(196) 0x42eb12 LEA (%R15,%RBX,8),%R15 |
(196) 0x42eb16 ADD %R11,%RDI |
(196) 0x42eb19 VMOVSD (%R15),%XMM8 |
(196) 0x42eb1e VMOVSD (%RSI,%RDI,8),%XMM9 |
(196) 0x42eb23 VFMSUB132SD (%R14,%R13,8),%XMM9,%XMM8 |
(196) 0x42eb29 MOV 0x78(%RSP),%R14 |
(196) 0x42eb2e VADDSD (%RSI,%R10,8),%XMM8,%XMM10 |
(196) 0x42eb34 VDIVSD (%R14,%R9,8),%XMM10,%XMM11 |
(196) 0x42eb3a VMOVSD %XMM11,(%R15) |
(196) 0x42eb3f MOV 0x5c(%RSP),%R11D |
(196) 0x42eb44 INCL 0x70(%RSP) |
(196) 0x42eb48 INC %R12 |
(196) 0x42eb4b MOV 0x70(%RSP),%ESI |
(196) 0x42eb4f CMP %ESI,0x3c(%RSP) |
(196) 0x42eb53 JLE 42eb70 |
(196) 0x42eb55 MOV 0x34(%RSP),%ECX |
(196) 0x42eb59 MOV 0x38(%RSP),%R8D |
(196) 0x42eb5e MOV 0x58(%RSP),%EDX |
(196) 0x42eb62 MOV %R8D,0x74(%RSP) |
(196) 0x42eb67 SUB %R11D,%ECX |
(196) 0x42eb6a JMP 42e758 |
0x42eb6f NOP |
0x42eb70 VZEROUPPER |
0x42eb73 LEA -0x28(%RBP),%RSP |
0x42eb77 POP %RBX |
0x42eb78 POP %R12 |
0x42eb7a POP %R13 |
0x42eb7c POP %R14 |
0x42eb7e POP %R15 |
0x42eb80 POP %RBP |
0x42eb81 RET |
0x42eb82 NOPW (%RAX,%RAX,1) |
(196) 0x42eb88 MOV 0x74(%RSP),%EDI |
(196) 0x42eb8c XOR %ECX,%ECX |
(196) 0x42eb8e JMP 42e9b7 |
0x42eb93 INC %ECX |
0x42eb95 XOR %EDX,%EDX |
0x42eb97 JMP 42e6ee |
0x42eb9c NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 81 |
nb uops | 91 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.17 cycles |
front end | 15.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.43-15.14 |
Stall cycles | 0.00-0.37 |
Front-end | 15.17 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42eb93 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42e6ee <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 81 |
nb uops | 91 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.17 cycles |
front end | 15.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.43-15.14 |
Stall cycles | 0.00-0.37 |
Front-end | 15.17 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42eb93 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42eb73 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42e6ee <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11– | 3.63 | 1.21 |
▼Loop 196 - advec_mom.cpp:220-221 - exec– | 0.01 | 0 |
○Loop 197 - advec_mom.cpp:221-221 - exec | 3.62 | 1.21 |