Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.99% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.99% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 44 - 48 |
-------------------------------------------------------------------------------- |
44: #pragma omp parallel for simd collapse(2) |
45: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
46: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
47: post_vol(i, j) = volume(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
48: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42c830 PUSH %RBP |
0x42c831 MOV %RSP,%RBP |
0x42c834 PUSH %R15 |
0x42c836 PUSH %R14 |
0x42c838 PUSH %R13 |
0x42c83a PUSH %R12 |
0x42c83c PUSH %RBX |
0x42c83d MOV %RDI,%R14 |
0x42c840 AND $-0x40,%RSP |
0x42c844 ADD $-0x80,%RSP |
0x42c848 MOV 0x30(%RDI),%EAX |
0x42c84b MOV 0x34(%R14),%ECX |
0x42c84f MOV 0x28(%R14),%ESI |
0x42c853 MOV 0x2c(%R14),%EBX |
0x42c857 ADD $0x4,%ECX |
0x42c85a LEA -0x1(%RSI),%EDX |
0x42c85d LEA -0x1(%RAX),%EDI |
0x42c860 MOV %ECX,0x3c(%RSP) |
0x42c864 MOV %EDX,0x38(%RSP) |
0x42c868 CMP %ECX,%EDI |
0x42c86a JGE 42ce53 |
0x42c870 LEA 0x4(%RBX),%R13D |
0x42c874 MOV %ECX,%EBX |
0x42c876 MOV %EDI,0x78(%RSP) |
0x42c87a SUB %EDI,%EBX |
0x42c87c CMP %R13D,%EDX |
0x42c87f JGE 42ce53 |
0x42c885 MOV %R13D,%R15D |
0x42c888 SUB %EDX,%R15D |
0x42c88b MOV %R15D,0x34(%RSP) |
0x42c890 CALL 404650 <omp_get_num_threads@plt> |
0x42c895 MOV %EAX,%R12D |
0x42c898 CALL 404540 <omp_get_thread_num@plt> |
0x42c89d XOR %EDX,%EDX |
0x42c89f MOV 0x78(%RSP),%R10D |
0x42c8a4 MOV %EAX,%R8D |
0x42c8a7 MOV %R15D,%EAX |
0x42c8aa IMUL %EBX,%EAX |
0x42c8ad DIV %R12D |
0x42c8b0 CMP %EDX,%R8D |
0x42c8b3 MOV %EAX,%R9D |
0x42c8b6 JB 42ce8c |
0x42c8bc IMUL %R9D,%R8D |
0x42c8c0 LEA (%R8,%RDX,1),%R15D |
0x42c8c4 LEA (%R9,%R15,1),%R11D |
0x42c8c8 MOV %R11D,0x30(%RSP) |
0x42c8cd CMP %R11D,%R15D |
0x42c8d0 JAE 42ce53 |
0x42c8d6 MOV %R15D,%EAX |
0x42c8d9 XOR %EDX,%EDX |
0x42c8db MOV 0x38(%RSP),%EDI |
0x42c8df VMOVQ 0x10(%R14),%XMM12 |
0x42c8e5 DIVL 0x34(%RSP) |
0x42c8e9 VMOVQ 0x8(%R14),%XMM11 |
0x42c8ef VMOVQ 0x20(%R14),%XMM10 |
0x42c8f5 VMOVQ (%R14),%XMM9 |
0x42c8fa VMOVQ 0x18(%R14),%XMM8 |
0x42c900 ADD %EDI,%EDX |
0x42c902 LEA (%RAX,%R10,1),%ECX |
0x42c906 SUB %EDX,%R13D |
0x42c909 MOV %EDX,0x74(%RSP) |
0x42c90d MOVSXD %ECX,%RSI |
0x42c910 MOV %R9D,%EDX |
0x42c913 NOPW %CS:(%RAX,%RAX,1) |
0x42c91e XCHG %AX,%AX |
(177) 0x42c920 CMP %R13D,%EDX |
(177) 0x42c923 CMOVA %R13D,%EDX |
(177) 0x42c927 LEA (%R15,%RDX,1),%R13D |
(177) 0x42c92b MOV %R13D,0x70(%RSP) |
(177) 0x42c930 CMP %R13D,%R15D |
(177) 0x42c933 JAE 42ce70 |
(177) 0x42c939 VMOVQ %XMM11,%R8 |
(177) 0x42c93e LEA 0x1(%RSI),%RDI |
(177) 0x42c942 VMOVQ %XMM12,%RBX |
(177) 0x42c947 MOV (%R8),%R9 |
(177) 0x42c94a MOV %RDI,0x40(%RSP) |
(177) 0x42c94f VMOVQ %XMM10,%R10 |
(177) 0x42c954 VMOVQ %XMM9,%RCX |
(177) 0x42c959 MOV (%RBX),%R12 |
(177) 0x42c95c VMOVQ 0x10(%RBX),%XMM1 |
(177) 0x42c961 MOV (%R10),%RBX |
(177) 0x42c964 VMOVQ %XMM8,%R13 |
(177) 0x42c969 VMOVQ 0x10(%R10),%XMM16 |
(177) 0x42c970 VMOVQ 0x10(%R13),%XMM3 |
(177) 0x42c976 MOV 0x10(%R8),%R11 |
(177) 0x42c97a MOV 0x10(%RCX),%R14 |
(177) 0x42c97e IMUL %R9,%RDI |
(177) 0x42c982 IMUL %RSI,%R12 |
(177) 0x42c986 IMUL %RSI,%RBX |
(177) 0x42c98a MOV %RDI,%RAX |
(177) 0x42c98d MOV %RDI,0x50(%RSP) |
(177) 0x42c992 SUB %R9,%RAX |
(177) 0x42c995 MOV (%RCX),%R9 |
(177) 0x42c998 MOV %R12,0x48(%RSP) |
(177) 0x42c99d MOV %RAX,0x58(%RSP) |
(177) 0x42c9a2 MOV %RBX,0x60(%RSP) |
(177) 0x42c9a7 IMUL %RSI,%R9 |
(177) 0x42c9ab IMUL (%R13),%RSI |
(177) 0x42c9b0 MOV %R9,0x68(%RSP) |
(177) 0x42c9b5 MOV %RSI,0x78(%RSP) |
(177) 0x42c9ba LEA -0x1(%RDX),%ESI |
(177) 0x42c9bd CMP $0x6,%ESI |
(177) 0x42c9c0 JBE 42ce80 |
(177) 0x42c9c6 MOVSXD 0x74(%RSP),%R10 |
(177) 0x42c9cb VMOVQ %XMM1,%R8 |
(177) 0x42c9d0 LEA 0x1(%R9,%R10,1),%R9 |
(177) 0x42c9d5 ADD %R10,%R12 |
(177) 0x42c9d8 LEA (%RDI,%R10,1),%RCX |
(177) 0x42c9dc LEA (%RAX,%R10,1),%RAX |
(177) 0x42c9e0 SAL $0x3,%R9 |
(177) 0x42c9e4 VMOVQ %XMM16,%RDI |
(177) 0x42c9ea LEA (%R8,%R12,8),%R13 |
(177) 0x42c9ee LEA (%R11,%RCX,8),%R8 |
(177) 0x42c9f2 MOV 0x78(%RSP),%RCX |
(177) 0x42c9f7 LEA (%R11,%RAX,8),%RSI |
(177) 0x42c9fb LEA (%R14,%R9,1),%R12 |
(177) 0x42c9ff LEA -0x8(%R14,%R9,1),%RAX |
(177) 0x42ca04 MOV %EDX,%R9D |
(177) 0x42ca07 SHR $0x3,%R9D |
(177) 0x42ca0b LEA (%RBX,%R10,1),%RBX |
(177) 0x42ca0f SAL $0x6,%R9 |
(177) 0x42ca13 ADD %RCX,%R10 |
(177) 0x42ca16 LEA (%RDI,%RBX,8),%RBX |
(177) 0x42ca1a VMOVQ %XMM3,%RDI |
(177) 0x42ca1f XOR %ECX,%ECX |
(177) 0x42ca21 LEA (%RDI,%R10,8),%R10 |
(177) 0x42ca25 LEA -0x40(%R9),%RDI |
(177) 0x42ca29 SHR $0x6,%RDI |
(177) 0x42ca2d INC %RDI |
(177) 0x42ca30 AND $0x3,%EDI |
(177) 0x42ca33 JE 42cafd |
(177) 0x42ca39 CMP $0x1,%RDI |
(177) 0x42ca3d JE 42cab8 |
(177) 0x42ca3f CMP $0x2,%RDI |
(177) 0x42ca43 JE 42ca7c |
(177) 0x42ca45 VMOVUPD (%R13),%ZMM7 |
(177) 0x42ca4c VADDPD (%R8),%ZMM7,%ZMM0 |
(177) 0x42ca52 MOV $0x40,%ECX |
(177) 0x42ca57 VSUBPD (%RSI),%ZMM0,%ZMM4 |
(177) 0x42ca5d VMOVUPD %ZMM4,(%RBX) |
(177) 0x42ca63 VMOVUPD (%R12),%ZMM2 |
(177) 0x42ca6a VSUBPD (%RAX),%ZMM2,%ZMM5 |
(177) 0x42ca70 VADDPD %ZMM4,%ZMM5,%ZMM6 |
(177) 0x42ca76 VMOVUPD %ZMM6,(%R10) |
(177) 0x42ca7c VMOVUPD (%R13,%RCX,1),%ZMM13 |
(177) 0x42ca84 VADDPD (%R8,%RCX,1),%ZMM13,%ZMM14 |
(177) 0x42ca8b VSUBPD (%RSI,%RCX,1),%ZMM14,%ZMM15 |
(177) 0x42ca92 VMOVUPD %ZMM15,(%RBX,%RCX,1) |
(177) 0x42ca99 VMOVUPD (%R12,%RCX,1),%ZMM7 |
(177) 0x42caa0 VSUBPD (%RAX,%RCX,1),%ZMM7,%ZMM0 |
(177) 0x42caa7 VADDPD %ZMM15,%ZMM0,%ZMM4 |
(177) 0x42caad VMOVUPD %ZMM4,(%R10,%RCX,1) |
(177) 0x42cab4 ADD $0x40,%RCX |
(177) 0x42cab8 VMOVUPD (%R13,%RCX,1),%ZMM2 |
(177) 0x42cac0 VADDPD (%R8,%RCX,1),%ZMM2,%ZMM5 |
(177) 0x42cac7 VSUBPD (%RSI,%RCX,1),%ZMM5,%ZMM6 |
(177) 0x42cace VMOVUPD %ZMM6,(%RBX,%RCX,1) |
(177) 0x42cad5 VMOVUPD (%R12,%RCX,1),%ZMM13 |
(177) 0x42cadc VSUBPD (%RAX,%RCX,1),%ZMM13,%ZMM14 |
(177) 0x42cae3 VADDPD %ZMM6,%ZMM14,%ZMM15 |
(177) 0x42cae9 VMOVUPD %ZMM15,(%R10,%RCX,1) |
(177) 0x42caf0 ADD $0x40,%RCX |
(177) 0x42caf4 CMP %RCX,%R9 |
(177) 0x42caf7 JE 42cbff |
(178) 0x42cafd VMOVUPD (%R13,%RCX,1),%ZMM7 |
(178) 0x42cb05 VADDPD (%R8,%RCX,1),%ZMM7,%ZMM0 |
(178) 0x42cb0c VSUBPD (%RSI,%RCX,1),%ZMM0,%ZMM4 |
(178) 0x42cb13 VMOVUPD %ZMM4,(%RBX,%RCX,1) |
(178) 0x42cb1a VMOVUPD (%R12,%RCX,1),%ZMM2 |
(178) 0x42cb21 VSUBPD (%RAX,%RCX,1),%ZMM2,%ZMM5 |
(178) 0x42cb28 VADDPD %ZMM4,%ZMM5,%ZMM6 |
(178) 0x42cb2e VMOVUPD %ZMM6,(%R10,%RCX,1) |
(178) 0x42cb35 VMOVUPD 0x40(%R13,%RCX,1),%ZMM13 |
(178) 0x42cb3d VADDPD 0x40(%R8,%RCX,1),%ZMM13,%ZMM14 |
(178) 0x42cb45 VSUBPD 0x40(%RSI,%RCX,1),%ZMM14,%ZMM15 |
(178) 0x42cb4d VMOVUPD %ZMM15,0x40(%RBX,%RCX,1) |
(178) 0x42cb55 VMOVUPD 0x40(%R12,%RCX,1),%ZMM7 |
(178) 0x42cb5d VSUBPD 0x40(%RAX,%RCX,1),%ZMM7,%ZMM0 |
(178) 0x42cb65 VADDPD %ZMM15,%ZMM0,%ZMM4 |
(178) 0x42cb6b VMOVUPD %ZMM4,0x40(%R10,%RCX,1) |
(178) 0x42cb73 VMOVUPD 0x80(%R13,%RCX,1),%ZMM2 |
(178) 0x42cb7b VADDPD 0x80(%R8,%RCX,1),%ZMM2,%ZMM5 |
(178) 0x42cb83 VSUBPD 0x80(%RSI,%RCX,1),%ZMM5,%ZMM6 |
(178) 0x42cb8b VMOVUPD %ZMM6,0x80(%RBX,%RCX,1) |
(178) 0x42cb93 VMOVUPD 0x80(%R12,%RCX,1),%ZMM13 |
(178) 0x42cb9b VSUBPD 0x80(%RAX,%RCX,1),%ZMM13,%ZMM14 |
(178) 0x42cba3 VADDPD %ZMM6,%ZMM14,%ZMM15 |
(178) 0x42cba9 VMOVUPD %ZMM15,0x80(%R10,%RCX,1) |
(178) 0x42cbb1 VMOVUPD 0xc0(%R13,%RCX,1),%ZMM7 |
(178) 0x42cbb9 VADDPD 0xc0(%R8,%RCX,1),%ZMM7,%ZMM0 |
(178) 0x42cbc1 VSUBPD 0xc0(%RSI,%RCX,1),%ZMM0,%ZMM4 |
(178) 0x42cbc9 VMOVUPD %ZMM4,0xc0(%RBX,%RCX,1) |
(178) 0x42cbd1 VMOVUPD 0xc0(%R12,%RCX,1),%ZMM2 |
(178) 0x42cbd9 VSUBPD 0xc0(%RAX,%RCX,1),%ZMM2,%ZMM5 |
(178) 0x42cbe1 VADDPD %ZMM4,%ZMM5,%ZMM6 |
(178) 0x42cbe7 VMOVUPD %ZMM6,0xc0(%R10,%RCX,1) |
(178) 0x42cbef ADD $0x100,%RCX |
(178) 0x42cbf6 CMP %RCX,%R9 |
(178) 0x42cbf9 JNE 42cafd |
(177) 0x42cbff MOV 0x74(%RSP),%R13D |
(177) 0x42cc04 MOV %EDX,%R12D |
(177) 0x42cc07 AND $-0x8,%R12D |
(177) 0x42cc0b ADD %R12D,%R15D |
(177) 0x42cc0e LEA (%R12,%R13,1),%EDI |
(177) 0x42cc12 TEST $0x7,%DL |
(177) 0x42cc15 JE 42ce21 |
(177) 0x42cc1b SUB %R12D,%EDX |
(177) 0x42cc1e LEA -0x1(%RDX),%R8D |
(177) 0x42cc22 CMP $0x2,%R8D |
(177) 0x42cc26 JBE 42cccc |
(177) 0x42cc2c MOVSXD 0x74(%RSP),%RAX |
(177) 0x42cc31 MOV 0x68(%RSP),%RSI |
(177) 0x42cc36 MOV 0x48(%RSP),%R10 |
(177) 0x42cc3b MOV 0x50(%RSP),%R13 |
(177) 0x42cc40 LEA (%RSI,%RAX,1),%RBX |
(177) 0x42cc44 LEA (%R10,%RAX,1),%R9 |
(177) 0x42cc48 VMOVQ %XMM1,%RSI |
(177) 0x42cc4d LEA 0x1(%R12,%RBX,1),%RCX |
(177) 0x42cc52 MOV 0x58(%RSP),%RBX |
(177) 0x42cc57 LEA (%R13,%RAX,1),%R8 |
(177) 0x42cc5c ADD %R12,%R9 |
(177) 0x42cc5f ADD %R12,%R8 |
(177) 0x42cc62 VMOVUPD (%R11,%R8,8),%YMM13 |
(177) 0x42cc68 VADDPD (%RSI,%R9,8),%YMM13,%YMM14 |
(177) 0x42cc6e MOV 0x60(%RSP),%R9 |
(177) 0x42cc73 VMOVQ %XMM16,%R8 |
(177) 0x42cc79 LEA (%RBX,%RAX,1),%R10 |
(177) 0x42cc7d ADD %R12,%R10 |
(177) 0x42cc80 VSUBPD (%R11,%R10,8),%YMM14,%YMM15 |
(177) 0x42cc86 LEA (%R9,%RAX,1),%R13 |
(177) 0x42cc8a ADD %R12,%R13 |
(177) 0x42cc8d VMOVUPD %YMM15,(%R8,%R13,8) |
(177) 0x42cc93 VMOVUPD (%R14,%RCX,8),%YMM7 |
(177) 0x42cc99 VSUBPD -0x8(%R14,%RCX,8),%YMM7,%YMM0 |
(177) 0x42cca0 MOV 0x78(%RSP),%RCX |
(177) 0x42cca5 ADD %RCX,%RAX |
(177) 0x42cca8 ADD %R12,%RAX |
(177) 0x42ccab VMOVQ %XMM3,%R12 |
(177) 0x42ccb0 VADDPD %YMM15,%YMM0,%YMM4 |
(177) 0x42ccb5 VMOVUPD %YMM4,(%R12,%RAX,8) |
(177) 0x42ccbb TEST $0x3,%DL |
(177) 0x42ccbe JE 42ce21 |
(177) 0x42ccc4 AND $-0x4,%EDX |
(177) 0x42ccc7 ADD %EDX,%R15D |
(177) 0x42ccca ADD %EDX,%EDI |
(177) 0x42cccc MOV 0x50(%RSP),%RBX |
(177) 0x42ccd1 MOV 0x48(%RSP),%R8 |
(177) 0x42ccd6 MOVSXD %EDI,%RDX |
(177) 0x42ccd9 VMOVQ %XMM1,%R10 |
(177) 0x42ccde MOV 0x60(%RSP),%R13 |
(177) 0x42cce3 VMOVQ %XMM16,%R12 |
(177) 0x42cce9 LEA (%RBX,%RDX,1),%RSI |
(177) 0x42cced LEA (%R8,%RDX,1),%RAX |
(177) 0x42ccf1 VMOVSD (%R11,%RSI,8),%XMM2 |
(177) 0x42ccf7 VADDSD (%R10,%RAX,8),%XMM2,%XMM5 |
(177) 0x42ccfd MOV 0x58(%RSP),%R10 |
(177) 0x42cd02 LEA (%R13,%RDX,1),%RCX |
(177) 0x42cd07 MOV 0x68(%RSP),%RSI |
(177) 0x42cd0c LEA 0x1(%RDI),%EAX |
(177) 0x42cd0f CLTQ |
(177) 0x42cd11 LEA (%R10,%RDX,1),%R9 |
(177) 0x42cd15 VSUBSD (%R11,%R9,8),%XMM5,%XMM6 |
(177) 0x42cd1b VMOVSD %XMM6,(%R12,%RCX,8) |
(177) 0x42cd21 MOV 0x78(%RSP),%R12 |
(177) 0x42cd26 LEA (%RSI,%RAX,1),%R9 |
(177) 0x42cd2a LEA (%R14,%R9,8),%RCX |
(177) 0x42cd2e VMOVSD (%RCX),%XMM13 |
(177) 0x42cd32 LEA (%R12,%RDX,1),%R9 |
(177) 0x42cd36 ADD %RSI,%RDX |
(177) 0x42cd39 VSUBSD (%R14,%RDX,8),%XMM13,%XMM14 |
(177) 0x42cd3f VMOVQ %XMM3,%RDX |
(177) 0x42cd44 VADDSD %XMM6,%XMM14,%XMM15 |
(177) 0x42cd48 VMOVSD %XMM15,(%RDX,%R9,8) |
(177) 0x42cd4e MOV 0x70(%RSP),%R9D |
(177) 0x42cd53 LEA 0x1(%R15),%EDX |
(177) 0x42cd57 CMP %R9D,%EDX |
(177) 0x42cd5a JAE 42ce21 |
(177) 0x42cd60 LEA (%RAX,%RBX,1),%R9 |
(177) 0x42cd64 LEA (%RAX,%R8,1),%RDX |
(177) 0x42cd68 ADD $0x2,%R15D |
(177) 0x42cd6c VMOVSD (%R11,%R9,8),%XMM7 |
(177) 0x42cd72 VMOVQ %XMM1,%R9 |
(177) 0x42cd77 VADDSD (%R9,%RDX,8),%XMM7,%XMM0 |
(177) 0x42cd7d LEA (%RAX,%R10,1),%RDX |
(177) 0x42cd81 VMOVQ %XMM16,%R9 |
(177) 0x42cd87 VSUBSD (%R11,%RDX,8),%XMM0,%XMM4 |
(177) 0x42cd8d LEA (%RAX,%R13,1),%RDX |
(177) 0x42cd91 ADD %R12,%RAX |
(177) 0x42cd94 VMOVSD %XMM4,(%R9,%RDX,8) |
(177) 0x42cd9a LEA 0x2(%RDI),%EDX |
(177) 0x42cd9d MOVSXD %EDX,%RDX |
(177) 0x42cda0 LEA (%RSI,%RDX,1),%R9 |
(177) 0x42cda4 LEA (%R14,%R9,8),%R9 |
(177) 0x42cda8 VMOVSD (%R9),%XMM2 |
(177) 0x42cdad VSUBSD (%RCX),%XMM2,%XMM5 |
(177) 0x42cdb1 VMOVQ %XMM3,%RCX |
(177) 0x42cdb6 VADDSD %XMM4,%XMM5,%XMM6 |
(177) 0x42cdba VMOVSD %XMM6,(%RCX,%RAX,8) |
(177) 0x42cdbf MOV 0x70(%RSP),%EAX |
(177) 0x42cdc3 CMP %EAX,%R15D |
(177) 0x42cdc6 JAE 42ce21 |
(177) 0x42cdc8 MOV 0x78(%RSP),%R12 |
(177) 0x42cdcd ADD %RDX,%R8 |
(177) 0x42cdd0 ADD $0x3,%EDI |
(177) 0x42cdd3 ADD %RDX,%RBX |
(177) 0x42cdd6 MOV %R8,%R15 |
(177) 0x42cdd9 ADD %RDX,%R10 |
(177) 0x42cddc VMOVQ %XMM1,%R8 |
(177) 0x42cde1 MOVSXD %EDI,%RDI |
(177) 0x42cde4 VMOVSD (%R8,%R15,8),%XMM1 |
(177) 0x42cdea ADD %RDX,%R13 |
(177) 0x42cded VADDSD (%R11,%RBX,8),%XMM1,%XMM13 |
(177) 0x42cdf3 ADD %RSI,%RDI |
(177) 0x42cdf6 VSUBSD (%R11,%R10,8),%XMM13,%XMM14 |
(177) 0x42cdfc VMOVQ %XMM16,%R11 |
(177) 0x42ce02 VMOVSD %XMM14,(%R11,%R13,8) |
(177) 0x42ce08 VMOVSD (%R14,%RDI,8),%XMM15 |
(177) 0x42ce0e VSUBSD (%R9),%XMM15,%XMM7 |
(177) 0x42ce13 VADDSD %XMM14,%XMM7,%XMM0 |
(177) 0x42ce18 ADD %RDX,%R12 |
(177) 0x42ce1b VMOVSD %XMM0,(%RCX,%R12,8) |
(177) 0x42ce21 MOV 0x70(%RSP),%R15D |
(177) 0x42ce26 MOV 0x40(%RSP),%RSI |
(177) 0x42ce2b LEA (%RSI),%EDX |
(177) 0x42ce2d CMP %EDX,0x3c(%RSP) |
(177) 0x42ce31 JLE 42ce50 |
(177) 0x42ce33 MOV 0x30(%RSP),%EDX |
(177) 0x42ce37 MOV 0x38(%RSP),%R9D |
(177) 0x42ce3c MOV 0x34(%RSP),%R13D |
(177) 0x42ce41 SUB %R15D,%EDX |
(177) 0x42ce44 MOV %R9D,0x74(%RSP) |
(177) 0x42ce49 JMP 42c920 |
0x42ce4e XCHG %AX,%AX |
0x42ce50 VZEROUPPER |
0x42ce53 LEA -0x28(%RBP),%RSP |
0x42ce57 POP %RBX |
0x42ce58 POP %R12 |
0x42ce5a POP %R13 |
0x42ce5c POP %R14 |
0x42ce5e POP %R15 |
0x42ce60 POP %RBP |
0x42ce61 RET |
0x42ce62 NOPW %CS:(%RAX,%RAX,1) |
0x42ce6d NOPL (%RAX) |
(177) 0x42ce70 LEA 0x1(%RSI),%R14 |
(177) 0x42ce74 MOV %R14,0x40(%RSP) |
(177) 0x42ce79 JMP 42ce26 |
0x42ce7b NOPL (%RAX,%RAX,1) |
(177) 0x42ce80 MOV 0x74(%RSP),%EDI |
(177) 0x42ce84 XOR %R12D,%R12D |
(177) 0x42ce87 JMP 42cc1b |
0x42ce8c INC %R9D |
0x42ce8f XOR %EDX,%EDX |
0x42ce91 JMP 42c8bc |
0x42ce96 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 299 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RSI),%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 42ce8c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x65c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x10(%R14),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R14),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42c8bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 299 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RSI),%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 42ce8c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x65c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42ce53 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x10(%R14),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R14),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42c8bc <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.0– | 2.99 | 1.51 |
▼Loop 177 - advec_mom.cpp:44-48 - exec– | 0 | 0.01 |
○Loop 178 - advec_mom.cpp:47-48 - exec | 2.99 | 1.5 |