Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 3.42% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 3.42% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 208 - 216 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x428d00 PUSH %RBP |
0x428d01 MOV %RSP,%RBP |
0x428d04 PUSH %R15 |
0x428d06 PUSH %R14 |
0x428d08 PUSH %R13 |
0x428d0a PUSH %R12 |
0x428d0c PUSH %RBX |
0x428d0d MOV %RDI,%R13 |
0x428d10 AND $-0x40,%RSP |
0x428d14 ADD $-0x80,%RSP |
0x428d18 MOV 0x38(%RDI),%EAX |
0x428d1b MOV 0x3c(%RDI),%EDX |
0x428d1e MOV 0x30(%RDI),%EDI |
0x428d21 MOV 0x34(%R13),%ECX |
0x428d25 ADD $0x2,%EDX |
0x428d28 INC %EDI |
0x428d2a LEA 0x1(%RAX),%R15D |
0x428d2e MOV %EDX,0x24(%RSP) |
0x428d32 MOV %EDI,0x20(%RSP) |
0x428d36 CMP %EDX,%R15D |
0x428d39 JGE 429433 |
0x428d3f MOV %EDX,%EBX |
0x428d41 LEA 0x2(%RCX),%R14D |
0x428d45 SUB %R15D,%EBX |
0x428d48 CMP %R14D,%EDI |
0x428d4b JGE 429433 |
0x428d51 MOV %R14D,%ESI |
0x428d54 SUB %EDI,%ESI |
0x428d56 MOV %ESI,0x30(%RSP) |
0x428d5a CALL 404650 <omp_get_num_threads@plt> |
0x428d5f MOV %EAX,%R12D |
0x428d62 CALL 404540 <omp_get_thread_num@plt> |
0x428d67 MOV 0x30(%RSP),%R9D |
0x428d6c XOR %EDX,%EDX |
0x428d6e MOV %EAX,%R8D |
0x428d71 IMUL %R9D,%EBX |
0x428d75 MOV %EBX,%EAX |
0x428d77 DIV %R12D |
0x428d7a MOV %EAX,%R10D |
0x428d7d CMP %EDX,%R8D |
0x428d80 JB 42945b |
0x428d86 IMUL %R10D,%R8D |
0x428d8a LEA (%R8,%RDX,1),%R12D |
0x428d8e LEA (%R10,%R12,1),%R11D |
0x428d92 MOV %R11D,0x1c(%RSP) |
0x428d97 CMP %R11D,%R12D |
0x428d9a JAE 429433 |
0x428da0 MOV %R12D,%EAX |
0x428da3 XOR %EDX,%EDX |
0x428da5 MOV 0x20(%RSP),%EDI |
0x428da9 VMOVQ (%R13),%XMM21 |
0x428db0 DIVL 0x30(%RSP) |
0x428db4 VMOVQ 0x20(%R13),%XMM20 |
0x428dbb VMOVQ 0x10(%R13),%XMM19 |
0x428dc2 MOV %R10D,%ECX |
0x428dc5 VMOVQ 0x8(%R13),%XMM18 |
0x428dcc VMOVQ 0x28(%R13),%XMM17 |
0x428dd3 VMOVQ 0x18(%R13),%XMM16 |
0x428dda ADD %EDX,%EDI |
0x428ddc MOV %R14D,%EDX |
0x428ddf LEA (%RAX,%R15,1),%R15D |
0x428de3 SUB %EDI,%EDX |
0x428de5 MOVSXD %R15D,%R15 |
0x428de8 MOV %EDI,0x50(%RSP) |
0x428dec MOV %EDX,%R13D |
0x428def NOP |
(169) 0x428df0 CMP %R13D,%ECX |
(169) 0x428df3 CMOVA %R13D,%ECX |
(169) 0x428df7 LEA (%R12,%RCX,1),%R13D |
(169) 0x428dfb MOV %R13D,0x34(%RSP) |
(169) 0x428e00 CMP %R13D,%R12D |
(169) 0x428e03 JAE 4293fd |
(169) 0x428e09 VMOVQ %XMM19,%RSI |
(169) 0x428e0f VMOVQ %XMM21,%R14 |
(169) 0x428e15 MOV (%RSI),%RAX |
(169) 0x428e18 MOV (%R14),%R10 |
(169) 0x428e1b VMOVQ 0x10(%R14),%XMM14 |
(169) 0x428e21 VMOVQ %XMM17,%R14 |
(169) 0x428e27 MOV 0x10(%RSI),%R8 |
(169) 0x428e2b MOV (%R14),%RSI |
(169) 0x428e2e VMOVQ %XMM18,%RDX |
(169) 0x428e34 MOV (%RDX),%R13 |
(169) 0x428e37 VMOVQ 0x10(%RDX),%XMM12 |
(169) 0x428e3c VMOVQ %XMM20,%RBX |
(169) 0x428e42 MOV (%RBX),%R11 |
(169) 0x428e45 VMOVQ 0x10(%RBX),%XMM13 |
(169) 0x428e4a MOV %RAX,%R9 |
(169) 0x428e4d IMUL %R15,%R10 |
(169) 0x428e51 IMUL %R15,%R9 |
(169) 0x428e55 IMUL %R15,%R13 |
(169) 0x428e59 IMUL %R15,%R11 |
(169) 0x428e5d MOV %R10,0x28(%RSP) |
(169) 0x428e62 LEA (%RAX,%R9,1),%RDI |
(169) 0x428e66 MOV %RSI,%RAX |
(169) 0x428e69 IMUL %R15,%RAX |
(169) 0x428e6d MOV %RDI,0x68(%RSP) |
(169) 0x428e72 MOV 0x10(%R14),%RDI |
(169) 0x428e76 VMOVQ %XMM16,%R14 |
(169) 0x428e7c MOV %R13,0x70(%RSP) |
(169) 0x428e81 MOV %R11,0x40(%RSP) |
(169) 0x428e86 MOV 0x10(%R14),%R13 |
(169) 0x428e8a LEA (%RSI,%RAX,1),%RDX |
(169) 0x428e8e MOV (%R14),%RSI |
(169) 0x428e91 MOV %RAX,0x48(%RSP) |
(169) 0x428e96 MOV %RDX,0x78(%RSP) |
(169) 0x428e9b LEA -0x1(%RCX),%EDX |
(169) 0x428e9e MOV %RSI,%RBX |
(169) 0x428ea1 IMUL %R15,%RBX |
(169) 0x428ea5 ADD %RBX,%RSI |
(169) 0x428ea8 MOV %RBX,0x58(%RSP) |
(169) 0x428ead MOV %RSI,0x60(%RSP) |
(169) 0x428eb2 CMP $0x6,%EDX |
(169) 0x428eb5 JBE 429450 |
(169) 0x428ebb MOVSXD 0x50(%RSP),%RDX |
(169) 0x428ec0 VMOVQ %XMM13,%RSI |
(169) 0x428ec5 LEA (%R10,%RDX,1),%R14 |
(169) 0x428ec9 LEA (%R11,%RDX,1),%R11 |
(169) 0x428ecd VMOVQ %XMM14,%R10 |
(169) 0x428ed2 LEA (%RAX,%RDX,1),%RAX |
(169) 0x428ed6 LEA (%R10,%R14,8),%RBX |
(169) 0x428eda LEA (%RSI,%R11,8),%R10 |
(169) 0x428ede MOV 0x68(%RSP),%RSI |
(169) 0x428ee3 LEA (%R9,%RDX,1),%R14 |
(169) 0x428ee7 LEA (%R8,%R14,8),%R11 |
(169) 0x428eeb LEA (%RDI,%RAX,8),%RAX |
(169) 0x428eef VMOVQ %R11,%XMM9 |
(169) 0x428ef4 LEA (%RSI,%RDX,1),%R14 |
(169) 0x428ef8 MOV 0x70(%RSP),%RSI |
(169) 0x428efd LEA (%R8,%R14,8),%R11 |
(169) 0x428f01 VMOVQ %R11,%XMM10 |
(169) 0x428f06 VMOVQ %XMM12,%R11 |
(169) 0x428f0b LEA (%RSI,%RDX,1),%R14 |
(169) 0x428f0f MOV 0x78(%RSP),%RSI |
(169) 0x428f14 LEA (%R11,%R14,8),%R11 |
(169) 0x428f18 LEA (%RSI,%RDX,1),%R14 |
(169) 0x428f1c MOV 0x58(%RSP),%RSI |
(169) 0x428f21 LEA (%RDI,%R14,8),%R14 |
(169) 0x428f25 ADD %RDX,%RSI |
(169) 0x428f28 LEA (%R13,%RSI,8),%RSI |
(169) 0x428f2d VMOVQ %RSI,%XMM3 |
(169) 0x428f32 MOV 0x60(%RSP),%RSI |
(169) 0x428f37 ADD %RSI,%RDX |
(169) 0x428f3a MOV %ECX,%ESI |
(169) 0x428f3c SHR $0x3,%ESI |
(169) 0x428f3f LEA (%R13,%RDX,8),%RDX |
(169) 0x428f44 VMOVQ %RDX,%XMM2 |
(169) 0x428f49 MOV %RSI,%RDX |
(169) 0x428f4c SAL $0x6,%RDX |
(169) 0x428f50 MOV %RDX,0x38(%RSP) |
(169) 0x428f55 XOR %EDX,%EDX |
(169) 0x428f57 AND $0x1,%ESI |
(169) 0x428f5a JE 428fd5 |
(169) 0x428f5c VMOVUPD (%RBX),%ZMM6 |
(169) 0x428f62 VMULPD (%R10),%ZMM6,%ZMM0 |
(169) 0x428f68 VMOVQ %XMM10,%RSI |
(169) 0x428f6d VMOVQ %XMM9,%RDX |
(169) 0x428f72 VMOVUPD (%RAX),%ZMM1 |
(169) 0x428f78 VSUBPD (%R14),%ZMM1,%ZMM22 |
(169) 0x428f7e CMPQ $0x40,0x38(%RSP) |
(169) 0x428f84 VSUBPD (%RSI),%ZMM0,%ZMM23 |
(169) 0x428f8a VMOVQ %XMM3,%RSI |
(169) 0x428f8f VFMADD231PD (%R11),%ZMM0,%ZMM22 |
(169) 0x428f95 VMOVUPD (%RSI),%ZMM4 |
(169) 0x428f9b VADDPD (%R10),%ZMM4,%ZMM5 |
(169) 0x428fa1 VADDPD (%RDX),%ZMM23,%ZMM24 |
(169) 0x428fa7 VMOVQ %XMM2,%RDX |
(169) 0x428fac VSUBPD (%RDX),%ZMM5,%ZMM7 |
(169) 0x428fb2 MOV $0x40,%EDX |
(169) 0x428fb7 VDIVPD %ZMM24,%ZMM22,%ZMM11 |
(169) 0x428fbd VDIVPD %ZMM7,%ZMM24,%ZMM8 |
(169) 0x428fc3 VMOVUPD %ZMM8,(%RBX) |
(169) 0x428fc9 VMOVUPD %ZMM11,(%R11) |
(169) 0x428fcf JE 4290e8 |
(169) 0x428fd5 MOV %R12D,0x18(%RSP) |
(169) 0x428fda MOV %ECX,0x14(%RSP) |
(169) 0x428fde MOV %R9,0x8(%RSP) |
(169) 0x428fe3 MOV %R15,(%RSP) |
(169) 0x428fe7 VMOVQ %XMM10,%RCX |
(169) 0x428fec VMOVQ %XMM9,%R9 |
(169) 0x428ff1 VMOVQ %XMM3,%R12 |
(169) 0x428ff6 VMOVQ %XMM2,%R15 |
(170) 0x428ffb VMOVUPD (%RBX,%RDX,1),%ZMM15 |
(170) 0x429002 VMULPD (%R10,%RDX,1),%ZMM15,%ZMM9 |
(170) 0x429009 VMOVUPD (%R12,%RDX,1),%ZMM3 |
(170) 0x429010 VADDPD (%R10,%RDX,1),%ZMM3,%ZMM2 |
(170) 0x429017 VMOVUPD (%RAX,%RDX,1),%ZMM10 |
(170) 0x42901e VSUBPD (%R14,%RDX,1),%ZMM10,%ZMM27 |
(170) 0x429025 VSUBPD (%RCX,%RDX,1),%ZMM9,%ZMM25 |
(170) 0x42902c VSUBPD (%R15,%RDX,1),%ZMM2,%ZMM6 |
(170) 0x429033 VFMADD231PD (%R11,%RDX,1),%ZMM9,%ZMM27 |
(170) 0x42903a VADDPD (%R9,%RDX,1),%ZMM25,%ZMM26 |
(170) 0x429041 VDIVPD %ZMM26,%ZMM27,%ZMM1 |
(170) 0x429047 VDIVPD %ZMM6,%ZMM26,%ZMM0 |
(170) 0x42904d VMOVUPD %ZMM0,(%RBX,%RDX,1) |
(170) 0x429054 VMOVUPD %ZMM1,(%R11,%RDX,1) |
(170) 0x42905b VMOVUPD 0x40(%RBX,%RDX,1),%ZMM4 |
(170) 0x429063 VMULPD 0x40(%R10,%RDX,1),%ZMM4,%ZMM1 |
(170) 0x42906b VMOVUPD 0x40(%R12,%RDX,1),%ZMM7 |
(170) 0x429073 VADDPD 0x40(%R10,%RDX,1),%ZMM7,%ZMM8 |
(170) 0x42907b VMOVUPD 0x40(%RAX,%RDX,1),%ZMM5 |
(170) 0x429083 VSUBPD 0x40(%R14,%RDX,1),%ZMM5,%ZMM30 |
(170) 0x42908b VSUBPD 0x40(%RCX,%RDX,1),%ZMM1,%ZMM28 |
(170) 0x429093 VSUBPD 0x40(%R15,%RDX,1),%ZMM8,%ZMM11 |
(170) 0x42909b VFMADD132PD 0x40(%R11,%RDX,1),%ZMM30,%ZMM1 |
(170) 0x4290a3 VADDPD 0x40(%R9,%RDX,1),%ZMM28,%ZMM29 |
(170) 0x4290ab VDIVPD %ZMM11,%ZMM29,%ZMM15 |
(170) 0x4290b1 VDIVPD %ZMM29,%ZMM1,%ZMM9 |
(170) 0x4290b7 VMOVUPD %ZMM15,0x40(%RBX,%RDX,1) |
(170) 0x4290bf VMOVUPD %ZMM9,0x40(%R11,%RDX,1) |
(170) 0x4290c7 SUB $-0x80,%RDX |
(170) 0x4290cb CMP %RDX,0x38(%RSP) |
(170) 0x4290d0 JNE 428ffb |
(169) 0x4290d6 MOV 0x18(%RSP),%R12D |
(169) 0x4290db MOV 0x14(%RSP),%ECX |
(169) 0x4290df MOV 0x8(%RSP),%R9 |
(169) 0x4290e4 MOV (%RSP),%R15 |
(169) 0x4290e8 MOV 0x50(%RSP),%EBX |
(169) 0x4290ec MOV %ECX,%EDX |
(169) 0x4290ee AND $-0x8,%EDX |
(169) 0x4290f1 ADD %EDX,%R12D |
(169) 0x4290f4 LEA (%RDX,%RBX,1),%ESI |
(169) 0x4290f7 TEST $0x7,%CL |
(169) 0x4290fa JE 4293f8 |
(169) 0x429100 SUB %EDX,%ECX |
(169) 0x429102 LEA -0x1(%RCX),%R10D |
(169) 0x429106 MOV %ECX,0x38(%RSP) |
(169) 0x42910a CMP $0x2,%R10D |
(169) 0x42910e JBE 4291f6 |
(169) 0x429114 MOVSXD 0x50(%RSP),%RAX |
(169) 0x429119 MOV 0x28(%RSP),%R11 |
(169) 0x42911e MOV 0x70(%RSP),%RBX |
(169) 0x429123 VMOVQ %XMM14,%RCX |
(169) 0x429128 LEA (%R11,%RAX,1),%R14 |
(169) 0x42912c LEA (%RBX,%RAX,1),%R10 |
(169) 0x429130 VMOVQ %XMM12,%R11 |
(169) 0x429135 MOV 0x58(%RSP),%RBX |
(169) 0x42913a ADD %RDX,%R14 |
(169) 0x42913d ADD %RDX,%R10 |
(169) 0x429140 LEA (%RCX,%R14,8),%RCX |
(169) 0x429144 LEA (%R11,%R10,8),%R14 |
(169) 0x429148 MOV 0x60(%RSP),%R10 |
(169) 0x42914d LEA (%RBX,%RAX,1),%R11 |
(169) 0x429151 ADD %RDX,%R11 |
(169) 0x429154 LEA (%R10,%RAX,1),%RBX |
(169) 0x429158 LEA (%RBX,%RDX,1),%R10 |
(169) 0x42915c MOV 0x40(%RSP),%RBX |
(169) 0x429161 MOV %R10,0x50(%RSP) |
(169) 0x429166 VMOVQ %XMM13,%R10 |
(169) 0x42916b ADD %RAX,%RBX |
(169) 0x42916e ADD %RDX,%RBX |
(169) 0x429171 VMOVUPD (%R10,%RBX,8),%YMM10 |
(169) 0x429177 MOV 0x68(%RSP),%RBX |
(169) 0x42917c VMULPD (%RCX),%YMM10,%YMM2 |
(169) 0x429180 LEA (%R9,%RAX,1),%R10 |
(169) 0x429184 ADD %RDX,%R10 |
(169) 0x429187 VADDPD (%R13,%R11,8),%YMM10,%YMM5 |
(169) 0x42918e ADD %RAX,%RBX |
(169) 0x429191 ADD %RDX,%RBX |
(169) 0x429194 VSUBPD (%R8,%RBX,8),%YMM2,%YMM3 |
(169) 0x42919a MOV 0x48(%RSP),%RBX |
(169) 0x42919f VADDPD (%R8,%R10,8),%YMM3,%YMM4 |
(169) 0x4291a5 MOV 0x78(%RSP),%R10 |
(169) 0x4291aa ADD %RAX,%RBX |
(169) 0x4291ad ADD %R10,%RAX |
(169) 0x4291b0 ADD %RDX,%RBX |
(169) 0x4291b3 ADD %RDX,%RAX |
(169) 0x4291b6 MOV 0x50(%RSP),%RDX |
(169) 0x4291bb VMOVUPD (%RDI,%RBX,8),%YMM6 |
(169) 0x4291c0 VSUBPD (%RDI,%RAX,8),%YMM6,%YMM0 |
(169) 0x4291c5 MOV 0x38(%RSP),%EAX |
(169) 0x4291c9 VFMADD132PD (%R14),%YMM0,%YMM2 |
(169) 0x4291ce VDIVPD %YMM4,%YMM2,%YMM8 |
(169) 0x4291d2 VSUBPD (%R13,%RDX,8),%YMM5,%YMM1 |
(169) 0x4291d9 VDIVPD %YMM1,%YMM4,%YMM7 |
(169) 0x4291dd VMOVUPD %YMM7,(%RCX) |
(169) 0x4291e1 VMOVUPD %YMM8,(%R14) |
(169) 0x4291e6 TEST $0x3,%AL |
(169) 0x4291e8 JE 4293f8 |
(169) 0x4291ee AND $-0x4,%EAX |
(169) 0x4291f1 ADD %EAX,%R12D |
(169) 0x4291f4 ADD %EAX,%ESI |
(169) 0x4291f6 MOV 0x28(%RSP),%RBX |
(169) 0x4291fb MOVSXD %ESI,%RAX |
(169) 0x4291fe VMOVQ %XMM14,%RCX |
(169) 0x429203 VMOVQ %XMM13,%R11 |
(169) 0x429208 MOV 0x68(%RSP),%RDX |
(169) 0x42920d LEA (%RBX,%RAX,1),%R14 |
(169) 0x429211 LEA (%RCX,%R14,8),%RCX |
(169) 0x429215 MOV 0x40(%RSP),%R14 |
(169) 0x42921a ADD %RAX,%RDX |
(169) 0x42921d LEA (%R14,%RAX,1),%R10 |
(169) 0x429221 VMOVSD (%R11,%R10,8),%XMM11 |
(169) 0x429227 MOV 0x70(%RSP),%R11 |
(169) 0x42922c VMULSD (%RCX),%XMM11,%XMM15 |
(169) 0x429230 VSUBSD (%R8,%RDX,8),%XMM15,%XMM9 |
(169) 0x429236 LEA (%R9,%RAX,1),%R10 |
(169) 0x42923a VADDSD (%R8,%R10,8),%XMM9,%XMM10 |
(169) 0x429240 VMOVQ %XMM12,%R10 |
(169) 0x429245 LEA (%R11,%RAX,1),%RDX |
(169) 0x429249 MOV 0x48(%RSP),%R11 |
(169) 0x42924e LEA (%R10,%RDX,8),%RDX |
(169) 0x429252 MOV 0x78(%RSP),%R10 |
(169) 0x429257 LEA (%R11,%RAX,1),%R11 |
(169) 0x42925b VMOVSD (%RDI,%R11,8),%XMM2 |
(169) 0x429261 MOV 0x58(%RSP),%R11 |
(169) 0x429266 ADD %RAX,%R10 |
(169) 0x429269 VSUBSD (%RDI,%R10,8),%XMM2,%XMM3 |
(169) 0x42926f VFMADD132SD (%RDX),%XMM3,%XMM15 |
(169) 0x429274 VDIVSD %XMM10,%XMM15,%XMM5 |
(169) 0x429279 LEA (%R11,%RAX,1),%R10 |
(169) 0x42927d MOV 0x60(%RSP),%R11 |
(169) 0x429282 VADDSD (%R13,%R10,8),%XMM11,%XMM4 |
(169) 0x429289 ADD %R11,%RAX |
(169) 0x42928c VSUBSD (%R13,%RAX,8),%XMM4,%XMM6 |
(169) 0x429293 VDIVSD %XMM6,%XMM10,%XMM0 |
(169) 0x429297 VMOVSD %XMM0,(%RCX) |
(169) 0x42929b MOV 0x34(%RSP),%ECX |
(169) 0x42929f VMOVSD %XMM5,(%RDX) |
(169) 0x4292a3 LEA 0x1(%R12),%EDX |
(169) 0x4292a8 LEA 0x1(%RSI),%EAX |
(169) 0x4292ab CMP %ECX,%EDX |
(169) 0x4292ad JAE 4293f8 |
(169) 0x4292b3 CLTQ |
(169) 0x4292b5 VMOVQ %XMM14,%R11 |
(169) 0x4292ba ADD $0x2,%R12D |
(169) 0x4292be ADD $0x2,%ESI |
(169) 0x4292c1 LEA (%RBX,%RAX,1),%R10 |
(169) 0x4292c5 LEA (%R14,%RAX,1),%RDX |
(169) 0x4292c9 VMOVQ %XMM13,%R14 |
(169) 0x4292ce LEA (%R11,%R10,8),%RCX |
(169) 0x4292d2 MOV 0x68(%RSP),%R10 |
(169) 0x4292d7 VMOVSD (%R14,%RDX,8),%XMM7 |
(169) 0x4292dd MOV 0x70(%RSP),%RDX |
(169) 0x4292e2 VMULSD (%RCX),%XMM7,%XMM8 |
(169) 0x4292e6 LEA (%R9,%RAX,1),%R11 |
(169) 0x4292ea VMOVQ %XMM12,%R14 |
(169) 0x4292ef ADD %RAX,%R10 |
(169) 0x4292f2 VSUBSD (%R8,%R10,8),%XMM8,%XMM1 |
(169) 0x4292f8 LEA (%RDX,%RAX,1),%R10 |
(169) 0x4292fc VADDSD (%R8,%R11,8),%XMM1,%XMM11 |
(169) 0x429302 MOV 0x48(%RSP),%R11 |
(169) 0x429307 LEA (%R14,%R10,8),%RDX |
(169) 0x42930b MOV 0x78(%RSP),%R10 |
(169) 0x429310 LEA (%R11,%RAX,1),%R14 |
(169) 0x429314 MOV 0x58(%RSP),%R11 |
(169) 0x429319 ADD %RAX,%R10 |
(169) 0x42931c VMOVSD (%RDI,%R14,8),%XMM15 |
(169) 0x429322 VSUBSD (%RDI,%R10,8),%XMM15,%XMM9 |
(169) 0x429328 MOV 0x60(%RSP),%R10 |
(169) 0x42932d VFMADD132SD (%RDX),%XMM9,%XMM8 |
(169) 0x429332 VDIVSD %XMM11,%XMM8,%XMM4 |
(169) 0x429337 LEA (%R11,%RAX,1),%R14 |
(169) 0x42933b VADDSD (%R13,%R14,8),%XMM7,%XMM10 |
(169) 0x429342 ADD %R10,%RAX |
(169) 0x429345 VSUBSD (%R13,%RAX,8),%XMM10,%XMM2 |
(169) 0x42934c MOV 0x34(%RSP),%EAX |
(169) 0x429350 VDIVSD %XMM2,%XMM11,%XMM3 |
(169) 0x429354 VMOVSD %XMM3,(%RCX) |
(169) 0x429358 VMOVSD %XMM4,(%RDX) |
(169) 0x42935c CMP %EAX,%R12D |
(169) 0x42935f JAE 4293f8 |
(169) 0x429365 MOV 0x40(%RSP),%RDX |
(169) 0x42936a MOV 0x68(%RSP),%RAX |
(169) 0x42936f MOVSXD %ESI,%RSI |
(169) 0x429372 VMOVQ %XMM14,%R12 |
(169) 0x429377 ADD %RSI,%RBX |
(169) 0x42937a VMOVQ %XMM13,%R14 |
(169) 0x42937f ADD %RSI,%R9 |
(169) 0x429382 LEA (%R12,%RBX,8),%RCX |
(169) 0x429386 MOV 0x48(%RSP),%R12 |
(169) 0x42938b ADD %RSI,%R11 |
(169) 0x42938e ADD %RSI,%R10 |
(169) 0x429391 ADD %RSI,%RDX |
(169) 0x429394 ADD %RSI,%RAX |
(169) 0x429397 VMOVSD (%R14,%RDX,8),%XMM14 |
(169) 0x42939d VMULSD (%RCX),%XMM14,%XMM13 |
(169) 0x4293a1 VSUBSD (%R8,%RAX,8),%XMM13,%XMM6 |
(169) 0x4293a7 VADDSD (%R8,%R9,8),%XMM6,%XMM5 |
(169) 0x4293ad MOV 0x70(%RSP),%R8 |
(169) 0x4293b2 MOV 0x78(%RSP),%RDX |
(169) 0x4293b7 VMOVQ %XMM12,%R9 |
(169) 0x4293bc ADD %RSI,%R12 |
(169) 0x4293bf VMOVSD (%RDI,%R12,8),%XMM12 |
(169) 0x4293c5 VADDSD (%R13,%R11,8),%XMM14,%XMM7 |
(169) 0x4293cc VSUBSD (%R13,%R10,8),%XMM7,%XMM8 |
(169) 0x4293d3 VDIVSD %XMM8,%XMM5,%XMM1 |
(169) 0x4293d8 ADD %RSI,%R8 |
(169) 0x4293db ADD %RSI,%RDX |
(169) 0x4293de LEA (%R9,%R8,8),%RBX |
(169) 0x4293e2 VSUBSD (%RDI,%RDX,8),%XMM12,%XMM0 |
(169) 0x4293e7 VFMADD132SD (%RBX),%XMM0,%XMM13 |
(169) 0x4293ec VMOVSD %XMM1,(%RCX) |
(169) 0x4293f0 VDIVSD %XMM5,%XMM13,%XMM11 |
(169) 0x4293f4 VMOVSD %XMM11,(%RBX) |
(169) 0x4293f8 MOV 0x34(%RSP),%R12D |
(169) 0x4293fd INC %R15 |
(169) 0x429400 LEA (%R15),%EDI |
(169) 0x429403 CMP %EDI,0x24(%RSP) |
(169) 0x429407 JLE 429430 |
(169) 0x429409 MOV 0x1c(%RSP),%ECX |
(169) 0x42940d MOV 0x20(%RSP),%ESI |
(169) 0x429411 MOV 0x30(%RSP),%R13D |
(169) 0x429416 SUB %R12D,%ECX |
(169) 0x429419 MOV %ESI,0x50(%RSP) |
(169) 0x42941d JMP 428df0 |
0x429422 NOPW %CS:(%RAX,%RAX,1) |
0x42942d NOPL (%RAX) |
0x429430 VZEROUPPER |
0x429433 LEA -0x28(%RBP),%RSP |
0x429437 POP %RBX |
0x429438 POP %R12 |
0x42943a POP %R13 |
0x42943c POP %R14 |
0x42943e POP %R15 |
0x429440 POP %RBP |
0x429441 RET |
0x429442 NOPW %CS:(%RAX,%RAX,1) |
0x42944d NOPL (%RAX) |
(169) 0x429450 MOV 0x50(%RSP),%ESI |
(169) 0x429454 XOR %EDX,%EDX |
(169) 0x429456 JMP 429100 |
0x42945b INC %R10D |
0x42945e XOR %EDX,%EDX |
0x429460 JMP 428d86 |
0x429465 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 83 |
nb uops | 82 |
loop length | 307 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 13.67 cycles |
front end | 13.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.67 |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 8% |
load | 10% |
store | 6% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x38(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x3c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9D,%EBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42945b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x75b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R10D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R10,%R12,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R12D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x30(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x28(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R15D,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 428d86 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 83 |
nb uops | 82 |
loop length | 307 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 13.67 cycles |
front end | 13.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.67 |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 8% |
load | 10% |
store | 6% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x38(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x3c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9D,%EBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42945b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x75b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R10D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R10,%R12,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 429433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x733> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R12D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x30(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%R13),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x8(%R13),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x28(%R13),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R15D,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 428d86 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.7– | 3.42 | 1.72 |
▼Loop 169 - advec_cell.cpp:210-216 - exec– | 0 | 0 |
○Loop 170 - advec_cell.cpp:211-216 - exec | 3.41 | 1.72 |