Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.45% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.45% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 136 - 140 |
-------------------------------------------------------------------------------- |
136: #pragma omp parallel for simd collapse(2) |
137: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
138: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
139: pre_vol(i, j) = volume(i, j) + (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
140: post_vol(i, j) = pre_vol(i, j) - (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x428070 PUSH %RBP |
0x428071 MOV %RSP,%RBP |
0x428074 PUSH %R15 |
0x428076 PUSH %R14 |
0x428078 PUSH %R13 |
0x42807a PUSH %R12 |
0x42807c PUSH %RBX |
0x42807d MOV %RDI,%R14 |
0x428080 AND $-0x40,%RSP |
0x428084 ADD $-0x80,%RSP |
0x428088 MOV 0x30(%RDI),%EAX |
0x42808b MOV 0x34(%R14),%ECX |
0x42808f MOV 0x28(%R14),%EDX |
0x428093 MOV 0x2c(%R14),%EBX |
0x428097 ADD $0x4,%ECX |
0x42809a LEA -0x1(%RDX),%ESI |
0x42809d LEA -0x1(%RAX),%EDI |
0x4280a0 MOV %ECX,0x3c(%RSP) |
0x4280a4 MOV %ESI,0x38(%RSP) |
0x4280a8 CMP %ECX,%EDI |
0x4280aa JGE 428783 |
0x4280b0 LEA 0x4(%RBX),%R13D |
0x4280b4 MOV %ECX,%EBX |
0x4280b6 MOV %EDI,0x78(%RSP) |
0x4280ba SUB %EDI,%EBX |
0x4280bc CMP %R13D,%ESI |
0x4280bf JGE 428783 |
0x4280c5 MOV %R13D,%R15D |
0x4280c8 SUB %ESI,%R15D |
0x4280cb MOV %R15D,0x34(%RSP) |
0x4280d0 CALL 404650 <omp_get_num_threads@plt> |
0x4280d5 MOV %EAX,%R12D |
0x4280d8 CALL 404540 <omp_get_thread_num@plt> |
0x4280dd XOR %EDX,%EDX |
0x4280df MOV 0x78(%RSP),%R10D |
0x4280e4 MOV %EAX,%R8D |
0x4280e7 MOV %R15D,%EAX |
0x4280ea IMUL %EBX,%EAX |
0x4280ed DIV %R12D |
0x4280f0 CMP %EDX,%R8D |
0x4280f3 MOV %EAX,%R9D |
0x4280f6 JB 4287bb |
0x4280fc IMUL %R9D,%R8D |
0x428100 LEA (%R8,%RDX,1),%R15D |
0x428104 LEA (%R9,%R15,1),%R11D |
0x428108 MOV %R11D,0x30(%RSP) |
0x42810d CMP %R11D,%R15D |
0x428110 JAE 428783 |
0x428116 MOV %R15D,%EAX |
0x428119 XOR %EDX,%EDX |
0x42811b MOV 0x38(%RSP),%EDI |
0x42811f VMOVQ (%R14),%XMM11 |
0x428124 DIVL 0x34(%RSP) |
0x428128 VMOVQ 0x10(%R14),%XMM10 |
0x42812e VMOVQ 0x8(%R14),%XMM9 |
0x428134 VMOVQ 0x18(%R14),%XMM8 |
0x42813a VMOVQ 0x20(%R14),%XMM7 |
0x428140 ADD %EDI,%EDX |
0x428142 LEA (%RAX,%R10,1),%ECX |
0x428146 SUB %EDX,%R13D |
0x428149 MOV %EDX,0x74(%RSP) |
0x42814d MOVSXD %ECX,%RSI |
0x428150 MOV %R9D,%EDX |
0x428153 NOPW %CS:(%RAX,%RAX,1) |
0x42815e XCHG %AX,%AX |
(165) 0x428160 CMP %R13D,%EDX |
(165) 0x428163 CMOVA %R13D,%EDX |
(165) 0x428167 LEA (%R15,%RDX,1),%R13D |
(165) 0x42816b MOV %R13D,0x70(%RSP) |
(165) 0x428170 CMP %R13D,%R15D |
(165) 0x428173 JAE 4287a0 |
(165) 0x428179 VMOVQ %XMM11,%RBX |
(165) 0x42817e VMOVQ %XMM10,%R8 |
(165) 0x428183 LEA 0x1(%RSI),%RDI |
(165) 0x428187 VMOVQ %XMM9,%RCX |
(165) 0x42818c VMOVQ %XMM8,%R14 |
(165) 0x428191 MOV (%R8),%R9 |
(165) 0x428194 MOV (%RBX),%R10 |
(165) 0x428197 MOV 0x10(%R8),%R12 |
(165) 0x42819b MOV (%RCX),%R13 |
(165) 0x42819e MOV (%R14),%R8 |
(165) 0x4281a1 MOV %RDI,0x40(%RSP) |
(165) 0x4281a6 VMOVQ 0x10(%RBX),%XMM18 |
(165) 0x4281ad VMOVQ %XMM7,%RBX |
(165) 0x4281b2 VMOVQ 0x10(%R14),%XMM15 |
(165) 0x4281b8 MOV 0x10(%RCX),%R11 |
(165) 0x4281bc VMOVQ 0x10(%RBX),%XMM3 |
(165) 0x4281c1 IMUL %RSI,%R10 |
(165) 0x4281c5 IMUL %RSI,%R13 |
(165) 0x4281c9 IMUL %RSI,%R8 |
(165) 0x4281cd IMUL (%RBX),%RSI |
(165) 0x4281d1 MOV %R10,0x60(%RSP) |
(165) 0x4281d6 IMUL %R9,%RDI |
(165) 0x4281da MOV %R13,0x58(%RSP) |
(165) 0x4281df MOV %R8,0x68(%RSP) |
(165) 0x4281e4 MOV %RDI,%RAX |
(165) 0x4281e7 MOV %RDI,0x48(%RSP) |
(165) 0x4281ec SUB %R9,%RAX |
(165) 0x4281ef MOV %RSI,0x78(%RSP) |
(165) 0x4281f4 LEA -0x1(%RDX),%ESI |
(165) 0x4281f7 MOV %RAX,0x50(%RSP) |
(165) 0x4281fc CMP $0x6,%ESI |
(165) 0x4281ff JBE 4287b0 |
(165) 0x428205 MOVSXD 0x74(%RSP),%RCX |
(165) 0x42820a VMOVQ %XMM18,%R9 |
(165) 0x428210 VMOVQ %XMM15,%R14 |
(165) 0x428215 ADD %RCX,%R10 |
(165) 0x428218 LEA (%RCX,%R8,1),%R8 |
(165) 0x42821c LEA (%RCX,%RAX,1),%RAX |
(165) 0x428220 LEA 0x1(%RCX,%R13,1),%R13 |
(165) 0x428225 LEA (%R9,%R10,8),%R9 |
(165) 0x428229 LEA (%R14,%R8,8),%R14 |
(165) 0x42822d LEA (%RCX,%RDI,1),%R10 |
(165) 0x428231 MOV %EDX,%R8D |
(165) 0x428234 MOV 0x78(%RSP),%RDI |
(165) 0x428239 LEA (%R12,%RAX,8),%RBX |
(165) 0x42823d VMOVQ %XMM3,%RAX |
(165) 0x428242 LEA (%R12,%R10,8),%R10 |
(165) 0x428246 SHR $0x3,%R8D |
(165) 0x42824a SAL $0x6,%R8 |
(165) 0x42824e ADD %RDI,%RCX |
(165) 0x428251 LEA -0x40(%R8),%RDI |
(165) 0x428255 SHR $0x6,%RDI |
(165) 0x428259 SAL $0x3,%R13 |
(165) 0x42825d INC %RDI |
(165) 0x428260 LEA (%RAX,%RCX,8),%RAX |
(165) 0x428264 XOR %ECX,%ECX |
(165) 0x428266 LEA (%R11,%R13,1),%RSI |
(165) 0x42826a AND $0x3,%EDI |
(165) 0x42826d LEA -0x8(%R11,%R13,1),%R13 |
(165) 0x428272 JE 428379 |
(165) 0x428278 CMP $0x1,%RDI |
(165) 0x42827c JE 428320 |
(165) 0x428282 CMP $0x2,%RDI |
(165) 0x428286 JE 4282d0 |
(165) 0x428288 VMOVUPD (%R9),%ZMM4 |
(165) 0x42828e VMOVUPD (%RBX),%ZMM6 |
(165) 0x428294 VADDPD (%R10),%ZMM4,%ZMM16 |
(165) 0x42829a MOV $0x40,%ECX |
(165) 0x42829f VADDPD (%R13),%ZMM6,%ZMM17 |
(165) 0x4282a6 VSUBPD %ZMM17,%ZMM16,%ZMM19 |
(165) 0x4282ac VADDPD (%RSI),%ZMM19,%ZMM20 |
(165) 0x4282b2 VMOVUPD %ZMM20,(%R14) |
(165) 0x4282b8 VMOVUPD (%RBX),%ZMM5 |
(165) 0x4282be VSUBPD (%R10),%ZMM5,%ZMM21 |
(165) 0x4282c4 VADDPD %ZMM20,%ZMM21,%ZMM22 |
(165) 0x4282ca VMOVUPD %ZMM22,(%RAX) |
(165) 0x4282d0 VMOVUPD (%R9,%RCX,1),%ZMM0 |
(165) 0x4282d7 VMOVUPD (%RBX,%RCX,1),%ZMM1 |
(165) 0x4282de VADDPD (%R10,%RCX,1),%ZMM0,%ZMM23 |
(165) 0x4282e5 VADDPD (%R13,%RCX,1),%ZMM1,%ZMM24 |
(165) 0x4282ed VSUBPD %ZMM24,%ZMM23,%ZMM25 |
(165) 0x4282f3 VADDPD (%RSI,%RCX,1),%ZMM25,%ZMM26 |
(165) 0x4282fa VMOVUPD %ZMM26,(%R14,%RCX,1) |
(165) 0x428301 VMOVUPD (%RBX,%RCX,1),%ZMM2 |
(165) 0x428308 VSUBPD (%R10,%RCX,1),%ZMM2,%ZMM27 |
(165) 0x42830f VADDPD %ZMM26,%ZMM27,%ZMM28 |
(165) 0x428315 VMOVUPD %ZMM28,(%RAX,%RCX,1) |
(165) 0x42831c ADD $0x40,%RCX |
(165) 0x428320 VMOVUPD (%R9,%RCX,1),%ZMM12 |
(165) 0x428327 VMOVUPD (%RBX,%RCX,1),%ZMM13 |
(165) 0x42832e VADDPD (%R10,%RCX,1),%ZMM12,%ZMM29 |
(165) 0x428335 VADDPD (%R13,%RCX,1),%ZMM13,%ZMM30 |
(165) 0x42833d VSUBPD %ZMM30,%ZMM29,%ZMM31 |
(165) 0x428343 VADDPD (%RSI,%RCX,1),%ZMM31,%ZMM16 |
(165) 0x42834a VMOVUPD %ZMM16,(%R14,%RCX,1) |
(165) 0x428351 VMOVUPD (%RBX,%RCX,1),%ZMM14 |
(165) 0x428358 VSUBPD (%R10,%RCX,1),%ZMM14,%ZMM17 |
(165) 0x42835f VADDPD %ZMM16,%ZMM17,%ZMM19 |
(165) 0x428365 VMOVUPD %ZMM19,(%RAX,%RCX,1) |
(165) 0x42836c ADD $0x40,%RCX |
(165) 0x428370 CMP %RCX,%R8 |
(165) 0x428373 JE 4284d1 |
(166) 0x428379 VMOVUPD (%R9,%RCX,1),%ZMM4 |
(166) 0x428380 VMOVUPD (%RBX,%RCX,1),%ZMM6 |
(166) 0x428387 VADDPD (%R10,%RCX,1),%ZMM4,%ZMM5 |
(166) 0x42838e VADDPD (%R13,%RCX,1),%ZMM6,%ZMM20 |
(166) 0x428396 VSUBPD %ZMM20,%ZMM5,%ZMM0 |
(166) 0x42839c VADDPD (%RSI,%RCX,1),%ZMM0,%ZMM1 |
(166) 0x4283a3 VMOVUPD %ZMM1,(%R14,%RCX,1) |
(166) 0x4283aa VMOVUPD (%RBX,%RCX,1),%ZMM2 |
(166) 0x4283b1 VSUBPD (%R10,%RCX,1),%ZMM2,%ZMM21 |
(166) 0x4283b8 VADDPD %ZMM1,%ZMM21,%ZMM22 |
(166) 0x4283be VMOVUPD %ZMM22,(%RAX,%RCX,1) |
(166) 0x4283c5 VMOVUPD 0x40(%R9,%RCX,1),%ZMM12 |
(166) 0x4283cd VMOVUPD 0x40(%RBX,%RCX,1),%ZMM14 |
(166) 0x4283d5 VADDPD 0x40(%R10,%RCX,1),%ZMM12,%ZMM13 |
(166) 0x4283dd VADDPD 0x40(%R13,%RCX,1),%ZMM14,%ZMM23 |
(166) 0x4283e5 VSUBPD %ZMM23,%ZMM13,%ZMM4 |
(166) 0x4283eb VADDPD 0x40(%RSI,%RCX,1),%ZMM4,%ZMM5 |
(166) 0x4283f3 VMOVUPD %ZMM5,0x40(%R14,%RCX,1) |
(166) 0x4283fb VMOVUPD 0x40(%RBX,%RCX,1),%ZMM6 |
(166) 0x428403 VSUBPD 0x40(%R10,%RCX,1),%ZMM6,%ZMM24 |
(166) 0x42840b VADDPD %ZMM5,%ZMM24,%ZMM25 |
(166) 0x428411 VMOVUPD %ZMM25,0x40(%RAX,%RCX,1) |
(166) 0x428419 VMOVUPD 0x80(%R9,%RCX,1),%ZMM0 |
(166) 0x428421 VMOVUPD 0x80(%RBX,%RCX,1),%ZMM2 |
(166) 0x428429 VADDPD 0x80(%R10,%RCX,1),%ZMM0,%ZMM1 |
(166) 0x428431 VADDPD 0x80(%R13,%RCX,1),%ZMM2,%ZMM26 |
(166) 0x428439 VSUBPD %ZMM26,%ZMM1,%ZMM12 |
(166) 0x42843f VADDPD 0x80(%RSI,%RCX,1),%ZMM12,%ZMM13 |
(166) 0x428447 VMOVUPD %ZMM13,0x80(%R14,%RCX,1) |
(166) 0x42844f VMOVUPD 0x80(%RBX,%RCX,1),%ZMM14 |
(166) 0x428457 VSUBPD 0x80(%R10,%RCX,1),%ZMM14,%ZMM27 |
(166) 0x42845f VADDPD %ZMM13,%ZMM27,%ZMM28 |
(166) 0x428465 VMOVUPD %ZMM28,0x80(%RAX,%RCX,1) |
(166) 0x42846d VMOVUPD 0xc0(%R9,%RCX,1),%ZMM4 |
(166) 0x428475 VMOVUPD 0xc0(%RBX,%RCX,1),%ZMM6 |
(166) 0x42847d VADDPD 0xc0(%R10,%RCX,1),%ZMM4,%ZMM5 |
(166) 0x428485 VADDPD 0xc0(%R13,%RCX,1),%ZMM6,%ZMM29 |
(166) 0x42848d VSUBPD %ZMM29,%ZMM5,%ZMM0 |
(166) 0x428493 VADDPD 0xc0(%RSI,%RCX,1),%ZMM0,%ZMM1 |
(166) 0x42849b VMOVUPD %ZMM1,0xc0(%R14,%RCX,1) |
(166) 0x4284a3 VMOVUPD 0xc0(%RBX,%RCX,1),%ZMM2 |
(166) 0x4284ab VSUBPD 0xc0(%R10,%RCX,1),%ZMM2,%ZMM30 |
(166) 0x4284b3 VADDPD %ZMM1,%ZMM30,%ZMM31 |
(166) 0x4284b9 VMOVUPD %ZMM31,0xc0(%RAX,%RCX,1) |
(166) 0x4284c1 ADD $0x100,%RCX |
(166) 0x4284c8 CMP %RCX,%R8 |
(166) 0x4284cb JNE 428379 |
(165) 0x4284d1 MOV 0x74(%RSP),%R9D |
(165) 0x4284d6 MOV %EDX,%ECX |
(165) 0x4284d8 AND $-0x8,%ECX |
(165) 0x4284db ADD %ECX,%R15D |
(165) 0x4284de LEA (%RCX,%R9,1),%EDI |
(165) 0x4284e2 TEST $0x7,%DL |
(165) 0x4284e5 JE 428752 |
(165) 0x4284eb SUB %ECX,%EDX |
(165) 0x4284ed LEA -0x1(%RDX),%R10D |
(165) 0x4284f1 CMP $0x2,%R10D |
(165) 0x4284f5 JBE 4285b0 |
(165) 0x4284fb MOVSXD 0x74(%RSP),%R13 |
(165) 0x428500 MOV 0x50(%RSP),%RAX |
(165) 0x428505 MOV 0x48(%RSP),%RBX |
(165) 0x42850a MOV 0x58(%RSP),%R10 |
(165) 0x42850f LEA (%RAX,%R13,1),%R8 |
(165) 0x428513 MOV 0x60(%RSP),%RAX |
(165) 0x428518 LEA (%RBX,%R13,1),%RSI |
(165) 0x42851c ADD %RCX,%RSI |
(165) 0x42851f LEA (%R10,%R13,1),%RBX |
(165) 0x428523 ADD %RCX,%R8 |
(165) 0x428526 VMOVQ %XMM18,%R10 |
(165) 0x42852c LEA (%R12,%RSI,8),%R14 |
(165) 0x428530 LEA 0x1(%RCX,%RBX,1),%RSI |
(165) 0x428535 LEA (%R12,%R8,8),%R9 |
(165) 0x428539 MOV 0x68(%RSP),%RBX |
(165) 0x42853e SAL $0x3,%RSI |
(165) 0x428542 LEA (%RAX,%R13,1),%R8 |
(165) 0x428546 VMOVUPD (%R11,%RSI,1),%YMM14 |
(165) 0x42854c VSUBPD (%R9),%YMM14,%YMM4 |
(165) 0x428551 VMOVQ %XMM15,%RAX |
(165) 0x428556 ADD %RCX,%R8 |
(165) 0x428559 VMOVUPD (%R10,%R8,8),%YMM12 |
(165) 0x42855f VADDPD (%R14),%YMM12,%YMM13 |
(165) 0x428564 VADDPD %YMM4,%YMM13,%YMM5 |
(165) 0x428568 VSUBPD -0x8(%R11,%RSI,1),%YMM5,%YMM6 |
(165) 0x42856f LEA (%RBX,%R13,1),%RSI |
(165) 0x428573 ADD %RCX,%RSI |
(165) 0x428576 VMOVUPD %YMM6,(%RAX,%RSI,8) |
(165) 0x42857b VMOVUPD (%R9),%YMM0 |
(165) 0x428580 VSUBPD (%R14),%YMM0,%YMM1 |
(165) 0x428585 MOV 0x78(%RSP),%R14 |
(165) 0x42858a ADD %R14,%R13 |
(165) 0x42858d ADD %RCX,%R13 |
(165) 0x428590 VMOVQ %XMM3,%RCX |
(165) 0x428595 VADDPD %YMM6,%YMM1,%YMM2 |
(165) 0x428599 VMOVUPD %YMM2,(%RCX,%R13,8) |
(165) 0x42859f TEST $0x3,%DL |
(165) 0x4285a2 JE 428752 |
(165) 0x4285a8 AND $-0x4,%EDX |
(165) 0x4285ab ADD %EDX,%R15D |
(165) 0x4285ae ADD %EDX,%EDI |
(165) 0x4285b0 MOV 0x48(%RSP),%RBX |
(165) 0x4285b5 MOVSXD %EDI,%RDX |
(165) 0x4285b8 MOV 0x58(%RSP),%R14 |
(165) 0x4285bd LEA 0x1(%RDI),%EAX |
(165) 0x4285c0 CLTQ |
(165) 0x4285c2 LEA (%RBX,%RDX,1),%R13 |
(165) 0x4285c6 LEA (%R14,%RAX,1),%R10 |
(165) 0x4285ca LEA (%R12,%R13,8),%RCX |
(165) 0x4285ce MOV 0x50(%RSP),%R13 |
(165) 0x4285d3 LEA (%R11,%R10,8),%RSI |
(165) 0x4285d7 VMOVQ %XMM18,%R10 |
(165) 0x4285dd VMOVQ %RSI,%XMM12 |
(165) 0x4285e2 LEA (%R13,%RDX,1),%R9 |
(165) 0x4285e7 LEA (%R12,%R9,8),%R8 |
(165) 0x4285eb MOV 0x60(%RSP),%R9 |
(165) 0x4285f0 LEA (%R9,%RDX,1),%RSI |
(165) 0x4285f4 VMOVSD (%R10,%RSI,8),%XMM13 |
(165) 0x4285fa VMOVQ %XMM12,%RSI |
(165) 0x4285ff LEA (%R14,%RDX,1),%R10 |
(165) 0x428603 VADDSD (%RCX),%XMM13,%XMM14 |
(165) 0x428607 VMOVSD (%RSI),%XMM4 |
(165) 0x42860b MOV 0x68(%RSP),%RSI |
(165) 0x428610 VSUBSD (%R8),%XMM4,%XMM5 |
(165) 0x428615 VADDSD %XMM5,%XMM14,%XMM6 |
(165) 0x428619 VSUBSD (%R11,%R10,8),%XMM6,%XMM0 |
(165) 0x42861f VMOVQ %XMM15,%R10 |
(165) 0x428624 ADD %RDX,%RSI |
(165) 0x428627 VMOVSD %XMM0,(%R10,%RSI,8) |
(165) 0x42862d MOV 0x78(%RSP),%RSI |
(165) 0x428632 VMOVSD (%R8),%XMM1 |
(165) 0x428637 MOV 0x70(%RSP),%R8D |
(165) 0x42863c VSUBSD (%RCX),%XMM1,%XMM2 |
(165) 0x428640 VMOVQ %XMM3,%RCX |
(165) 0x428645 VADDSD %XMM0,%XMM2,%XMM13 |
(165) 0x428649 ADD %RSI,%RDX |
(165) 0x42864c VMOVSD %XMM13,(%RCX,%RDX,8) |
(165) 0x428651 LEA 0x1(%R15),%EDX |
(165) 0x428655 CMP %R8D,%EDX |
(165) 0x428658 JAE 428752 |
(165) 0x42865e LEA 0x2(%RDI),%EDX |
(165) 0x428661 LEA (%RAX,%RBX,1),%R10 |
(165) 0x428665 LEA (%RAX,%R13,1),%RSI |
(165) 0x428669 ADD %RAX,%R9 |
(165) 0x42866c MOVSXD %EDX,%RDX |
(165) 0x42866f LEA (%R12,%R10,8),%RCX |
(165) 0x428673 LEA (%R12,%RSI,8),%R8 |
(165) 0x428677 ADD $0x2,%R15D |
(165) 0x42867b LEA (%R14,%RDX,1),%R10 |
(165) 0x42867f LEA (%R11,%R10,8),%RSI |
(165) 0x428683 VMOVQ %XMM18,%R10 |
(165) 0x428689 VMOVSD (%R10,%R9,8),%XMM14 |
(165) 0x42868f MOV 0x68(%RSP),%R10 |
(165) 0x428694 VMOVQ %XMM12,%R9 |
(165) 0x428699 VADDSD (%RCX),%XMM14,%XMM4 |
(165) 0x42869d VMOVSD (%R9),%XMM12 |
(165) 0x4286a2 VMOVQ %XMM15,%R9 |
(165) 0x4286a7 VADDSD (%R8),%XMM12,%XMM5 |
(165) 0x4286ac VSUBSD %XMM5,%XMM4,%XMM6 |
(165) 0x4286b0 VADDSD (%RSI),%XMM6,%XMM0 |
(165) 0x4286b4 ADD %RAX,%R10 |
(165) 0x4286b7 VMOVSD %XMM0,(%R9,%R10,8) |
(165) 0x4286bd MOV 0x78(%RSP),%R10 |
(165) 0x4286c2 VMOVSD (%R8),%XMM1 |
(165) 0x4286c7 VSUBSD (%RCX),%XMM1,%XMM2 |
(165) 0x4286cb VMOVQ %XMM3,%RCX |
(165) 0x4286d0 VADDSD %XMM0,%XMM2,%XMM13 |
(165) 0x4286d4 ADD %R10,%RAX |
(165) 0x4286d7 VMOVSD %XMM13,(%RCX,%RAX,8) |
(165) 0x4286dc MOV 0x70(%RSP),%EAX |
(165) 0x4286e0 CMP %EAX,%R15D |
(165) 0x4286e3 JAE 428752 |
(165) 0x4286e5 ADD %RDX,%RBX |
(165) 0x4286e8 ADD %RDX,%R13 |
(165) 0x4286eb ADD $0x3,%EDI |
(165) 0x4286ee VMOVQ %XMM18,%R9 |
(165) 0x4286f4 LEA (%R12,%RBX,8),%R15 |
(165) 0x4286f8 LEA (%R12,%R13,8),%R8 |
(165) 0x4286fc MOV 0x60(%RSP),%R12 |
(165) 0x428701 MOVSXD %EDI,%RDI |
(165) 0x428704 ADD %R14,%RDI |
(165) 0x428707 VMOVSD (%R8),%XMM12 |
(165) 0x42870c VADDSD (%RSI),%XMM12,%XMM5 |
(165) 0x428710 VMOVQ %XMM15,%R14 |
(165) 0x428715 ADD %RDX,%R10 |
(165) 0x428718 ADD %RDX,%R12 |
(165) 0x42871b VMOVSD (%R9,%R12,8),%XMM14 |
(165) 0x428721 VADDSD (%R15),%XMM14,%XMM4 |
(165) 0x428726 VSUBSD %XMM5,%XMM4,%XMM6 |
(165) 0x42872a VADDSD (%R11,%RDI,8),%XMM6,%XMM0 |
(165) 0x428730 MOV 0x68(%RSP),%R11 |
(165) 0x428735 ADD %RDX,%R11 |
(165) 0x428738 VMOVSD %XMM0,(%R14,%R11,8) |
(165) 0x42873e VMOVSD (%R8),%XMM15 |
(165) 0x428743 VSUBSD (%R15),%XMM15,%XMM1 |
(165) 0x428748 VADDSD %XMM0,%XMM1,%XMM2 |
(165) 0x42874c VMOVSD %XMM2,(%RCX,%R10,8) |
(165) 0x428752 MOV 0x70(%RSP),%R15D |
(165) 0x428757 MOV 0x40(%RSP),%RSI |
(165) 0x42875c LEA (%RSI),%ECX |
(165) 0x42875e CMP %ECX,0x3c(%RSP) |
(165) 0x428762 JLE 428780 |
(165) 0x428764 MOV 0x30(%RSP),%EDX |
(165) 0x428768 MOV 0x38(%RSP),%EAX |
(165) 0x42876c MOV 0x34(%RSP),%R13D |
(165) 0x428771 SUB %R15D,%EDX |
(165) 0x428774 MOV %EAX,0x74(%RSP) |
(165) 0x428778 JMP 428160 |
0x42877d NOPL (%RAX) |
0x428780 VZEROUPPER |
0x428783 LEA -0x28(%RBP),%RSP |
0x428787 POP %RBX |
0x428788 POP %R12 |
0x42878a POP %R13 |
0x42878c POP %R14 |
0x42878e POP %R15 |
0x428790 POP %RBP |
0x428791 RET |
0x428792 NOPW %CS:(%RAX,%RAX,1) |
0x42879d NOPL (%RAX) |
(165) 0x4287a0 LEA 0x1(%RSI),%R14 |
(165) 0x4287a4 MOV %R14,0x40(%RSP) |
(165) 0x4287a9 JMP 428757 |
0x4287ab NOPL (%RAX,%RAX,1) |
(165) 0x4287b0 MOV 0x74(%RSP),%EDI |
(165) 0x4287b4 XOR %ECX,%ECX |
(165) 0x4287b6 JMP 4284eb |
0x4287bb INC %R9D |
0x4287be XOR %EDX,%EDX |
0x4287c0 JMP 4280fc |
0x4287c5 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 301 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RDX),%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 4287bb <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x74b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R14),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x10(%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x8(%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4280fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 301 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RDX),%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA 0x4(%RBX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 4287bb <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x74b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 428783 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x713> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R14),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x10(%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x8(%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDI,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4280fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4– | 1.45 | 0.73 |
▼Loop 165 - advec_cell.cpp:136-140 - exec– | 0 | 0 |
○Loop 166 - advec_cell.cpp:139-140 - exec | 1.45 | 0.73 |