Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 3.4% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 3.4% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 117 - 125 |
-------------------------------------------------------------------------------- |
117: #pragma omp parallel for simd collapse(2) |
118: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
119: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
120: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
121: double post_mass_s = pre_mass_s + mass_flux_x(i, j) - mass_flux_x(i + 1, j + 0); |
122: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 1, j + 0)) / post_mass_s; |
123: double advec_vol_s = pre_vol(i, j) + vol_flux_x(i, j) - vol_flux_x(i + 1, j + 0); |
124: density1(i, j) = post_mass_s / advec_vol_s; |
125: energy1(i, j) = post_ener_s; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x427960 PUSH %RBP |
0x427961 MOV %RSP,%RBP |
0x427964 PUSH %R15 |
0x427966 PUSH %R14 |
0x427968 PUSH %R13 |
0x42796a PUSH %R12 |
0x42796c PUSH %RBX |
0x42796d MOV %RDI,%R13 |
0x427970 AND $-0x40,%RSP |
0x427974 ADD $-0x80,%RSP |
0x427978 MOV 0x38(%RDI),%EAX |
0x42797b MOV 0x3c(%RDI),%EDX |
0x42797e MOV 0x30(%RDI),%EBX |
0x427981 MOV 0x34(%R13),%ECX |
0x427985 ADD $0x2,%EDX |
0x427988 LEA 0x1(%RBX),%EDI |
0x42798b LEA 0x1(%RAX),%R15D |
0x42798f MOV %EDX,0x38(%RSP) |
0x427993 MOV %EDI,0x34(%RSP) |
0x427997 CMP %EDX,%R15D |
0x42799a JGE 428033 |
0x4279a0 MOV %EDX,%EBX |
0x4279a2 LEA 0x2(%RCX),%R14D |
0x4279a6 SUB %R15D,%EBX |
0x4279a9 CMP %R14D,%EDI |
0x4279ac JGE 428033 |
0x4279b2 MOV %R14D,%ESI |
0x4279b5 SUB %EDI,%ESI |
0x4279b7 MOV %ESI,0x3c(%RSP) |
0x4279bb CALL 404650 <omp_get_num_threads@plt> |
0x4279c0 MOV %EAX,%R12D |
0x4279c3 CALL 404540 <omp_get_thread_num@plt> |
0x4279c8 XOR %EDX,%EDX |
0x4279ca MOV %EAX,%R8D |
0x4279cd MOV 0x3c(%RSP),%EAX |
0x4279d1 IMUL %EBX,%EAX |
0x4279d4 DIV %R12D |
0x4279d7 MOV %EAX,%R9D |
0x4279da CMP %EDX,%R8D |
0x4279dd JB 42805b |
0x4279e3 IMUL %R9D,%R8D |
0x4279e7 LEA (%R8,%RDX,1),%EBX |
0x4279eb LEA (%R9,%RBX,1),%R10D |
0x4279ef MOV %R10D,0x30(%RSP) |
0x4279f4 CMP %R10D,%EBX |
0x4279f7 JAE 428033 |
0x4279fd MOV %EBX,%EAX |
0x4279ff XOR %EDX,%EDX |
0x427a01 MOV 0x34(%RSP),%R11D |
0x427a06 VMOVQ (%R13),%XMM19 |
0x427a0d DIVL 0x3c(%RSP) |
0x427a11 VMOVQ 0x20(%R13),%XMM18 |
0x427a18 VMOVQ 0x10(%R13),%XMM17 |
0x427a1f MOV %R14D,%ECX |
0x427a22 VMOVQ 0x8(%R13),%XMM16 |
0x427a29 VMOVQ 0x28(%R13),%XMM15 |
0x427a2f VMOVQ 0x18(%R13),%XMM14 |
0x427a35 ADD %EDX,%R11D |
0x427a38 LEA (%RAX,%R15,1),%R15D |
0x427a3c MOV %R9D,%EDX |
0x427a3f SUB %R11D,%ECX |
0x427a42 MOVSXD %R15D,%R15 |
0x427a45 MOV %R11D,0x64(%RSP) |
0x427a4a MOV %ECX,%R14D |
0x427a4d NOPL (%RAX) |
(163) 0x427a50 CMP %R14D,%EDX |
(163) 0x427a53 CMOVA %R14D,%EDX |
(163) 0x427a57 LEA (%RBX,%RDX,1),%R13D |
(163) 0x427a5b MOV %R13D,0x60(%RSP) |
(163) 0x427a60 CMP %R13D,%EBX |
(163) 0x427a63 JAE 428003 |
(163) 0x427a69 VMOVQ %XMM19,%RDI |
(163) 0x427a6f VMOVQ %XMM18,%RSI |
(163) 0x427a75 VMOVQ %XMM17,%R8 |
(163) 0x427a7b VMOVQ %XMM16,%RAX |
(163) 0x427a81 MOV (%RDI),%R9 |
(163) 0x427a84 VMOVQ 0x10(%RDI),%XMM22 |
(163) 0x427a8b VMOVQ %XMM15,%RCX |
(163) 0x427a90 MOV (%RSI),%RDI |
(163) 0x427a93 VMOVQ 0x10(%RSI),%XMM13 |
(163) 0x427a98 MOV (%R8),%R11 |
(163) 0x427a9b VMOVQ %XMM14,%RSI |
(163) 0x427aa0 MOV (%RCX),%R13 |
(163) 0x427aa3 VMOVQ 0x10(%RAX),%XMM12 |
(163) 0x427aa8 MOV 0x10(%R8),%R10 |
(163) 0x427aac MOV (%RAX),%R8 |
(163) 0x427aaf MOV (%RSI),%RAX |
(163) 0x427ab2 MOV 0x10(%RCX),%R12 |
(163) 0x427ab6 LEA -0x1(%RDX),%ECX |
(163) 0x427ab9 MOV 0x10(%RSI),%R14 |
(163) 0x427abd IMUL %R15,%R9 |
(163) 0x427ac1 IMUL %R15,%RDI |
(163) 0x427ac5 IMUL %R15,%R11 |
(163) 0x427ac9 IMUL %R15,%R8 |
(163) 0x427acd MOV %R9,0x48(%RSP) |
(163) 0x427ad2 IMUL %R15,%R13 |
(163) 0x427ad6 MOV %RDI,0x50(%RSP) |
(163) 0x427adb IMUL %R15,%RAX |
(163) 0x427adf MOV %R11,0x68(%RSP) |
(163) 0x427ae4 MOV %R8,0x58(%RSP) |
(163) 0x427ae9 MOV %R13,0x70(%RSP) |
(163) 0x427aee MOV %RAX,0x78(%RSP) |
(163) 0x427af3 CMP $0x6,%ECX |
(163) 0x427af6 JBE 428050 |
(163) 0x427afc MOVSXD 0x64(%RSP),%RCX |
(163) 0x427b01 VMOVQ %XMM22,%RSI |
(163) 0x427b07 VMOVQ %XMM13,%RAX |
(163) 0x427b0c LEA (%R11,%RCX,1),%R11 |
(163) 0x427b10 LEA (%R9,%RCX,1),%R9 |
(163) 0x427b14 LEA (%RDI,%RCX,1),%RDI |
(163) 0x427b18 LEA (%R8,%RCX,1),%R8 |
(163) 0x427b1c SAL $0x3,%R11 |
(163) 0x427b20 LEA (%RSI,%R9,8),%R9 |
(163) 0x427b24 LEA (%R10,%R11,1),%RSI |
(163) 0x427b28 LEA (%R13,%RCX,1),%R13 |
(163) 0x427b2d LEA (%RAX,%RDI,8),%RDI |
(163) 0x427b31 VMOVQ %RSI,%XMM9 |
(163) 0x427b36 MOV 0x78(%RSP),%RSI |
(163) 0x427b3b LEA 0x8(%R10,%R11,1),%RAX |
(163) 0x427b40 VMOVQ %XMM12,%R11 |
(163) 0x427b45 LEA (%R11,%R8,8),%R8 |
(163) 0x427b49 VMOVQ %RAX,%XMM10 |
(163) 0x427b4e SAL $0x3,%R13 |
(163) 0x427b52 LEA (%R12,%R13,1),%RAX |
(163) 0x427b56 LEA 0x8(%R12,%R13,1),%R13 |
(163) 0x427b5b ADD %RSI,%RCX |
(163) 0x427b5e MOV %EDX,%ESI |
(163) 0x427b60 SAL $0x3,%RCX |
(163) 0x427b64 SHR $0x3,%ESI |
(163) 0x427b67 LEA (%R14,%RCX,1),%R11 |
(163) 0x427b6b LEA 0x8(%R14,%RCX,1),%RCX |
(163) 0x427b70 VMOVQ %R11,%XMM3 |
(163) 0x427b75 MOV %RSI,%R11 |
(163) 0x427b78 VMOVQ %RCX,%XMM2 |
(163) 0x427b7d XOR %ECX,%ECX |
(163) 0x427b7f SAL $0x6,%R11 |
(163) 0x427b83 AND $0x1,%ESI |
(163) 0x427b86 MOV %R11,0x40(%RSP) |
(163) 0x427b8b JE 427c07 |
(163) 0x427b8d VMOVUPD (%R9),%ZMM7 |
(163) 0x427b93 VMULPD (%RDI),%ZMM7,%ZMM0 |
(163) 0x427b99 VMOVQ %XMM10,%RCX |
(163) 0x427b9e VMOVQ %XMM3,%R11 |
(163) 0x427ba3 VMOVUPD (%R11),%ZMM1 |
(163) 0x427ba9 VADDPD (%RDI),%ZMM1,%ZMM4 |
(163) 0x427baf VMOVQ %XMM9,%RSI |
(163) 0x427bb4 VMOVUPD (%RAX),%ZMM6 |
(163) 0x427bba VSUBPD (%RCX),%ZMM0,%ZMM21 |
(163) 0x427bc0 CMPQ $0x40,0x40(%RSP) |
(163) 0x427bc6 VMOVQ %XMM2,%RCX |
(163) 0x427bcb VSUBPD (%R13),%ZMM6,%ZMM20 |
(163) 0x427bd2 VADDPD (%RSI),%ZMM21,%ZMM23 |
(163) 0x427bd8 VSUBPD (%RCX),%ZMM4,%ZMM5 |
(163) 0x427bde VFMADD231PD (%R8),%ZMM0,%ZMM20 |
(163) 0x427be4 MOV $0x40,%ECX |
(163) 0x427be9 VDIVPD %ZMM23,%ZMM20,%ZMM11 |
(163) 0x427bef VDIVPD %ZMM5,%ZMM23,%ZMM8 |
(163) 0x427bf5 VMOVUPD %ZMM8,(%R9) |
(163) 0x427bfb VMOVUPD %ZMM11,(%R8) |
(163) 0x427c01 JE 427d11 |
(163) 0x427c07 MOV %EBX,0x2c(%RSP) |
(163) 0x427c0b MOV %EDX,0x28(%RSP) |
(163) 0x427c0f MOV %R15,0x20(%RSP) |
(163) 0x427c14 VMOVQ %XMM9,%RDX |
(163) 0x427c19 VMOVQ %XMM10,%RSI |
(163) 0x427c1e VMOVQ %XMM3,%RBX |
(163) 0x427c23 VMOVQ %XMM2,%R15 |
(164) 0x427c28 VMOVUPD (%R9,%RCX,1),%ZMM9 |
(164) 0x427c2f VMULPD (%RDI,%RCX,1),%ZMM9,%ZMM10 |
(164) 0x427c36 VMOVUPD (%RBX,%RCX,1),%ZMM2 |
(164) 0x427c3d VADDPD (%RDI,%RCX,1),%ZMM2,%ZMM7 |
(164) 0x427c44 VMOVUPD (%RAX,%RCX,1),%ZMM3 |
(164) 0x427c4b VSUBPD (%R13,%RCX,1),%ZMM3,%ZMM26 |
(164) 0x427c53 VSUBPD (%RSI,%RCX,1),%ZMM10,%ZMM24 |
(164) 0x427c5a VSUBPD (%R15,%RCX,1),%ZMM7,%ZMM0 |
(164) 0x427c61 VFMADD231PD (%R8,%RCX,1),%ZMM10,%ZMM26 |
(164) 0x427c68 VADDPD (%RDX,%RCX,1),%ZMM24,%ZMM25 |
(164) 0x427c6f VDIVPD %ZMM25,%ZMM26,%ZMM1 |
(164) 0x427c75 VDIVPD %ZMM0,%ZMM25,%ZMM6 |
(164) 0x427c7b VMOVUPD %ZMM6,(%R9,%RCX,1) |
(164) 0x427c82 VMOVUPD %ZMM1,(%R8,%RCX,1) |
(164) 0x427c89 VMOVUPD 0x40(%R9,%RCX,1),%ZMM4 |
(164) 0x427c91 VMULPD 0x40(%RDI,%RCX,1),%ZMM4,%ZMM1 |
(164) 0x427c99 VMOVUPD 0x40(%RBX,%RCX,1),%ZMM8 |
(164) 0x427ca1 VADDPD 0x40(%RDI,%RCX,1),%ZMM8,%ZMM11 |
(164) 0x427ca9 VMOVUPD 0x40(%RAX,%RCX,1),%ZMM5 |
(164) 0x427cb1 VSUBPD 0x40(%R13,%RCX,1),%ZMM5,%ZMM29 |
(164) 0x427cb9 VSUBPD 0x40(%RSI,%RCX,1),%ZMM1,%ZMM27 |
(164) 0x427cc1 VSUBPD 0x40(%R15,%RCX,1),%ZMM11,%ZMM9 |
(164) 0x427cc9 VFMADD132PD 0x40(%R8,%RCX,1),%ZMM29,%ZMM1 |
(164) 0x427cd1 VADDPD 0x40(%RDX,%RCX,1),%ZMM27,%ZMM28 |
(164) 0x427cd9 VDIVPD %ZMM9,%ZMM28,%ZMM10 |
(164) 0x427cdf VDIVPD %ZMM28,%ZMM1,%ZMM3 |
(164) 0x427ce5 VMOVUPD %ZMM10,0x40(%R9,%RCX,1) |
(164) 0x427ced VMOVUPD %ZMM3,0x40(%R8,%RCX,1) |
(164) 0x427cf5 SUB $-0x80,%RCX |
(164) 0x427cf9 CMP %RCX,0x40(%RSP) |
(164) 0x427cfe JNE 427c28 |
(163) 0x427d04 MOV 0x2c(%RSP),%EBX |
(163) 0x427d08 MOV 0x28(%RSP),%EDX |
(163) 0x427d0c MOV 0x20(%RSP),%R15 |
(163) 0x427d11 MOV 0x64(%RSP),%R9D |
(163) 0x427d16 MOV %EDX,%ECX |
(163) 0x427d18 AND $-0x8,%ECX |
(163) 0x427d1b ADD %ECX,%EBX |
(163) 0x427d1d LEA (%RCX,%R9,1),%EDI |
(163) 0x427d21 TEST $0x7,%DL |
(163) 0x427d24 JE 427fff |
(163) 0x427d2a SUB %ECX,%EDX |
(163) 0x427d2c LEA -0x1(%RDX),%R8D |
(163) 0x427d30 MOV %EDX,0x40(%RSP) |
(163) 0x427d34 CMP $0x2,%R8D |
(163) 0x427d38 JBE 427e04 |
(163) 0x427d3e MOVSXD 0x64(%RSP),%RAX |
(163) 0x427d43 MOV 0x48(%RSP),%R13 |
(163) 0x427d48 MOV 0x58(%RSP),%R8 |
(163) 0x427d4d VMOVQ %XMM22,%R11 |
(163) 0x427d53 MOV 0x68(%RSP),%RSI |
(163) 0x427d58 ADD %RAX,%R13 |
(163) 0x427d5b ADD %RAX,%R8 |
(163) 0x427d5e ADD %RCX,%R13 |
(163) 0x427d61 LEA (%RSI,%RAX,1),%R9 |
(163) 0x427d65 ADD %RCX,%R8 |
(163) 0x427d68 MOV 0x70(%RSP),%RSI |
(163) 0x427d6d LEA (%R11,%R13,8),%RDX |
(163) 0x427d71 MOV 0x78(%RSP),%R11 |
(163) 0x427d76 VMOVQ %XMM12,%R13 |
(163) 0x427d7b ADD %RCX,%R9 |
(163) 0x427d7e LEA (%R13,%R8,8),%R13 |
(163) 0x427d83 ADD %RAX,%RSI |
(163) 0x427d86 LEA (%R11,%RAX,1),%R8 |
(163) 0x427d8a MOV 0x50(%RSP),%R11 |
(163) 0x427d8f ADD %RCX,%RSI |
(163) 0x427d92 ADD %RCX,%R8 |
(163) 0x427d95 SAL $0x3,%RSI |
(163) 0x427d99 VMOVUPD (%R12,%RSI,1),%YMM4 |
(163) 0x427d9f VMOVUPD 0x8(%R12,%RSI,1),%YMM5 |
(163) 0x427da6 ADD %R11,%RAX |
(163) 0x427da9 ADD %RCX,%RAX |
(163) 0x427dac VMOVQ %XMM13,%RCX |
(163) 0x427db1 VMOVUPD (%RCX,%RAX,8),%YMM2 |
(163) 0x427db6 VMULPD (%RDX),%YMM2,%YMM7 |
(163) 0x427dba VADDPD (%R14,%R8,8),%YMM2,%YMM1 |
(163) 0x427dc0 VADDPD (%R10,%R9,8),%YMM7,%YMM0 |
(163) 0x427dc6 VSUBPD 0x8(%R14,%R8,8),%YMM1,%YMM8 |
(163) 0x427dcd VFMSUB132PD (%R13),%YMM5,%YMM7 |
(163) 0x427dd3 MOV 0x40(%RSP),%EAX |
(163) 0x427dd7 VSUBPD 0x8(%R10,%R9,8),%YMM0,%YMM6 |
(163) 0x427dde VADDPD %YMM7,%YMM4,%YMM9 |
(163) 0x427de2 VDIVPD %YMM8,%YMM6,%YMM11 |
(163) 0x427de7 VDIVPD %YMM6,%YMM9,%YMM10 |
(163) 0x427deb VMOVUPD %YMM11,(%RDX) |
(163) 0x427def VMOVUPD %YMM10,(%R13) |
(163) 0x427df5 TEST $0x3,%AL |
(163) 0x427df7 JE 427fff |
(163) 0x427dfd AND $-0x4,%EAX |
(163) 0x427e00 ADD %EAX,%EBX |
(163) 0x427e02 ADD %EAX,%EDI |
(163) 0x427e04 MOV 0x50(%RSP),%R8 |
(163) 0x427e09 MOV 0x48(%RSP),%R9 |
(163) 0x427e0e MOVSXD %EDI,%RDX |
(163) 0x427e11 VMOVQ %XMM13,%R11 |
(163) 0x427e16 LEA 0x1(%RDI),%EAX |
(163) 0x427e19 VMOVQ %XMM22,%RSI |
(163) 0x427e1f CLTQ |
(163) 0x427e21 ADD %RDX,%R8 |
(163) 0x427e24 LEA (%R9,%RDX,1),%R13 |
(163) 0x427e28 MOV 0x68(%RSP),%R9 |
(163) 0x427e2d VMOVSD (%R11,%R8,8),%XMM3 |
(163) 0x427e33 MOV 0x58(%RSP),%R11 |
(163) 0x427e38 LEA (%RSI,%R13,8),%RSI |
(163) 0x427e3c VMULSD (%RSI),%XMM3,%XMM0 |
(163) 0x427e40 LEA (%R9,%RDX,1),%R8 |
(163) 0x427e44 LEA (%RAX,%R9,1),%RCX |
(163) 0x427e48 VMOVQ %XMM12,%R9 |
(163) 0x427e4d ADD %RDX,%R11 |
(163) 0x427e50 LEA (%R10,%RCX,8),%R13 |
(163) 0x427e54 VADDSD (%R10,%R8,8),%XMM0,%XMM2 |
(163) 0x427e5a LEA (%R9,%R11,8),%RCX |
(163) 0x427e5e MOV 0x70(%RSP),%R11 |
(163) 0x427e63 VSUBSD (%R13),%XMM2,%XMM6 |
(163) 0x427e69 LEA (%R11,%RDX,1),%R8 |
(163) 0x427e6d LEA (%R11,%RAX,1),%R9 |
(163) 0x427e71 MOV 0x78(%RSP),%R11 |
(163) 0x427e76 VMOVSD (%R12,%R8,8),%XMM7 |
(163) 0x427e7c LEA (%R12,%R9,8),%R9 |
(163) 0x427e80 VMOVSD (%R9),%XMM4 |
(163) 0x427e85 VFMSUB132SD (%RCX),%XMM4,%XMM0 |
(163) 0x427e8a VADDSD %XMM7,%XMM0,%XMM11 |
(163) 0x427e8e VDIVSD %XMM6,%XMM11,%XMM9 |
(163) 0x427e92 MOV %R11,%R8 |
(163) 0x427e95 ADD %R11,%RDX |
(163) 0x427e98 ADD %RAX,%R8 |
(163) 0x427e9b VADDSD (%R14,%RDX,8),%XMM3,%XMM5 |
(163) 0x427ea1 LEA 0x1(%RBX),%EDX |
(163) 0x427ea4 LEA (%R14,%R8,8),%R8 |
(163) 0x427ea8 VSUBSD (%R8),%XMM5,%XMM1 |
(163) 0x427ead VDIVSD %XMM1,%XMM6,%XMM8 |
(163) 0x427eb1 VMOVSD %XMM8,(%RSI) |
(163) 0x427eb5 MOV 0x60(%RSP),%ESI |
(163) 0x427eb9 VMOVSD %XMM9,(%RCX) |
(163) 0x427ebd CMP %ESI,%EDX |
(163) 0x427ebf JAE 427fff |
(163) 0x427ec5 MOV 0x48(%RSP),%R11 |
(163) 0x427eca VMOVQ %XMM22,%RDX |
(163) 0x427ed0 VMOVQ %XMM13,%RSI |
(163) 0x427ed5 VMOVSD (%R9),%XMM7 |
(163) 0x427eda MOV 0x78(%RSP),%R9 |
(163) 0x427edf ADD $0x2,%EBX |
(163) 0x427ee2 MOV %R11,%RCX |
(163) 0x427ee5 ADD %RAX,%RCX |
(163) 0x427ee8 LEA (%RDX,%RCX,8),%RCX |
(163) 0x427eec MOV 0x50(%RSP),%RDX |
(163) 0x427ef1 ADD %RAX,%RDX |
(163) 0x427ef4 VMOVSD (%RSI,%RDX,8),%XMM10 |
(163) 0x427ef9 MOV 0x68(%RSP),%RSI |
(163) 0x427efe LEA 0x2(%RDI),%EDX |
(163) 0x427f01 VMULSD (%RCX),%XMM10,%XMM3 |
(163) 0x427f05 MOVSXD %EDX,%RDX |
(163) 0x427f08 VADDSD (%R8),%XMM10,%XMM5 |
(163) 0x427f0d MOV 0x60(%RSP),%R8D |
(163) 0x427f12 ADD %RDX,%RSI |
(163) 0x427f15 LEA (%R10,%RSI,8),%RSI |
(163) 0x427f19 VSUBSD (%RSI),%XMM3,%XMM0 |
(163) 0x427f1d VADDSD (%R13),%XMM0,%XMM6 |
(163) 0x427f23 MOV 0x58(%RSP),%R13 |
(163) 0x427f28 VMOVQ %RSI,%XMM2 |
(163) 0x427f2d VMOVQ %XMM12,%RSI |
(163) 0x427f32 ADD %R13,%RAX |
(163) 0x427f35 MOV 0x70(%RSP),%R13 |
(163) 0x427f3a LEA (%RSI,%RAX,8),%RAX |
(163) 0x427f3e LEA (%R9,%RDX,1),%RSI |
(163) 0x427f42 LEA (%R14,%RSI,8),%RSI |
(163) 0x427f46 VSUBSD (%RSI),%XMM5,%XMM1 |
(163) 0x427f4a VDIVSD %XMM1,%XMM6,%XMM8 |
(163) 0x427f4e ADD %RDX,%R13 |
(163) 0x427f51 LEA (%R12,%R13,8),%R13 |
(163) 0x427f55 VSUBSD (%R13),%XMM7,%XMM4 |
(163) 0x427f5b VFMADD132SD (%RAX),%XMM4,%XMM3 |
(163) 0x427f60 VMOVSD %XMM8,(%RCX) |
(163) 0x427f64 VDIVSD %XMM6,%XMM3,%XMM11 |
(163) 0x427f68 VMOVSD %XMM11,(%RAX) |
(163) 0x427f6c CMP %R8D,%EBX |
(163) 0x427f6f JAE 427fff |
(163) 0x427f75 ADD %RDX,%R11 |
(163) 0x427f78 VMOVQ %XMM22,%RBX |
(163) 0x427f7e MOV 0x50(%RSP),%RAX |
(163) 0x427f83 ADD $0x3,%EDI |
(163) 0x427f86 LEA (%RBX,%R11,8),%RCX |
(163) 0x427f8a MOV 0x68(%RSP),%R11 |
(163) 0x427f8f MOVSXD %EDI,%RDI |
(163) 0x427f92 VMOVQ %XMM13,%R8 |
(163) 0x427f97 MOV 0x58(%RSP),%RBX |
(163) 0x427f9c ADD %RDI,%R9 |
(163) 0x427f9f ADD %RDX,%RAX |
(163) 0x427fa2 ADD %RDI,%R11 |
(163) 0x427fa5 VMOVSD (%R8,%RAX,8),%XMM13 |
(163) 0x427fab VMULSD (%RCX),%XMM13,%XMM9 |
(163) 0x427faf VMOVQ %XMM12,%RAX |
(163) 0x427fb4 VSUBSD (%R10,%R11,8),%XMM9,%XMM10 |
(163) 0x427fba MOV 0x70(%RSP),%R11 |
(163) 0x427fbf ADD %RDX,%RBX |
(163) 0x427fc2 VMOVQ %XMM2,%R10 |
(163) 0x427fc7 LEA (%RAX,%RBX,8),%R8 |
(163) 0x427fcb VADDSD (%R10),%XMM10,%XMM3 |
(163) 0x427fd0 VMOVSD (%R13),%XMM12 |
(163) 0x427fd6 VADDSD (%RSI),%XMM13,%XMM0 |
(163) 0x427fda VSUBSD (%R14,%R9,8),%XMM0,%XMM6 |
(163) 0x427fe0 VDIVSD %XMM6,%XMM3,%XMM7 |
(163) 0x427fe4 ADD %RDI,%R11 |
(163) 0x427fe7 VSUBSD (%R12,%R11,8),%XMM12,%XMM2 |
(163) 0x427fed VFMADD132SD (%R8),%XMM2,%XMM9 |
(163) 0x427ff2 VMOVSD %XMM7,(%RCX) |
(163) 0x427ff6 VDIVSD %XMM3,%XMM9,%XMM4 |
(163) 0x427ffa VMOVSD %XMM4,(%R8) |
(163) 0x427fff MOV 0x60(%RSP),%EBX |
(163) 0x428003 INC %R15 |
(163) 0x428006 LEA (%R15),%R12D |
(163) 0x428009 CMP %R12D,0x38(%RSP) |
(163) 0x42800e JLE 428030 |
(163) 0x428010 MOV 0x30(%RSP),%EDX |
(163) 0x428014 MOV 0x34(%RSP),%R13D |
(163) 0x428019 MOV 0x3c(%RSP),%R14D |
(163) 0x42801e SUB %EBX,%EDX |
(163) 0x428020 MOV %R13D,0x64(%RSP) |
(163) 0x428025 JMP 427a50 |
0x42802a NOPW (%RAX,%RAX,1) |
0x428030 VZEROUPPER |
0x428033 LEA -0x28(%RBP),%RSP |
0x428037 POP %RBX |
0x428038 POP %R12 |
0x42803a POP %R13 |
0x42803c POP %R14 |
0x42803e POP %R15 |
0x428040 POP %RBP |
0x428041 RET |
0x428042 NOPW %CS:(%RAX,%RAX,1) |
0x42804d NOPL (%RAX) |
(163) 0x428050 MOV 0x64(%RSP),%EDI |
(163) 0x428054 XOR %ECX,%ECX |
(163) 0x428056 JMP 427d2a |
0x42805b INC %R9D |
0x42805e XOR %EDX,%EDX |
0x428060 JMP 4279e3 |
0x428065 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 81 |
nb uops | 81 |
loop length | 299 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.50 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.50 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x38(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x3c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RBX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x3c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42805b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6fb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%RBX,1),%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x34(%RSP),%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x3c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x28(%R13),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R11D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R15D,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4279e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x83> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 81 |
nb uops | 81 |
loop length | 299 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.50 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.50 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x38(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x3c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RBX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x3c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42805b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6fb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%RBX,1),%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 428033 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x6d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x34(%RSP),%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x3c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x28(%R13),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R11D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R15D,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ECX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4279e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3+0x83> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.3– | 3.4 | 1.71 |
▼Loop 163 - advec_cell.cpp:119-125 - exec– | 0 | 0 |
○Loop 164 - advec_cell.cpp:120-125 - exec | 3.39 | 1.71 |