Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:44-48 [...] | Coverage: 1.52% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:44-48 [...] | Coverage: 1.52% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 44 - 48 |
-------------------------------------------------------------------------------- |
44: #pragma omp parallel for simd collapse(2) |
45: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
46: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
47: pre_vol(i, j) = volume(i, j) + (vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
48: post_vol(i, j) = pre_vol(i, j) - (vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x426cf0 PUSH %RBP |
0x426cf1 MOV %RSP,%RBP |
0x426cf4 PUSH %R15 |
0x426cf6 PUSH %R14 |
0x426cf8 PUSH %R13 |
0x426cfa PUSH %R12 |
0x426cfc PUSH %RBX |
0x426cfd MOV %RDI,%R14 |
0x426d00 AND $-0x40,%RSP |
0x426d04 ADD $-0x80,%RSP |
0x426d08 MOV 0x30(%RDI),%EAX |
0x426d0b MOV 0x34(%R14),%ECX |
0x426d0f MOV 0x28(%R14),%EBX |
0x426d13 MOV 0x2c(%R14),%ESI |
0x426d17 ADD $0x4,%ECX |
0x426d1a LEA -0x1(%RBX),%EDX |
0x426d1d LEA -0x1(%RAX),%EDI |
0x426d20 MOV %ECX,0x3c(%RSP) |
0x426d24 MOV %EDX,0x38(%RSP) |
0x426d28 CMP %ECX,%EDI |
0x426d2a JGE 4273f3 |
0x426d30 MOV %ECX,%EBX |
0x426d32 LEA 0x4(%RSI),%R13D |
0x426d36 MOV %EDI,0x78(%RSP) |
0x426d3a SUB %EDI,%EBX |
0x426d3c CMP %R13D,%EDX |
0x426d3f JGE 4273f3 |
0x426d45 MOV %R13D,%R15D |
0x426d48 SUB %EDX,%R15D |
0x426d4b MOV %R15D,0x34(%RSP) |
0x426d50 CALL 404650 <omp_get_num_threads@plt> |
0x426d55 MOV %EAX,%R12D |
0x426d58 CALL 404540 <omp_get_thread_num@plt> |
0x426d5d XOR %EDX,%EDX |
0x426d5f MOV 0x78(%RSP),%R10D |
0x426d64 MOV %EAX,%R8D |
0x426d67 MOV %R15D,%EAX |
0x426d6a IMUL %EBX,%EAX |
0x426d6d DIV %R12D |
0x426d70 CMP %EDX,%R8D |
0x426d73 MOV %EAX,%R9D |
0x426d76 JB 42742c |
0x426d7c IMUL %R9D,%R8D |
0x426d80 LEA (%R8,%RDX,1),%R15D |
0x426d84 LEA (%R9,%R15,1),%R11D |
0x426d88 MOV %R11D,0x30(%RSP) |
0x426d8d CMP %R11D,%R15D |
0x426d90 JAE 4273f3 |
0x426d96 MOV %R15D,%EAX |
0x426d99 XOR %EDX,%EDX |
0x426d9b MOV 0x38(%RSP),%EDI |
0x426d9f VMOVQ (%R14),%XMM10 |
0x426da4 DIVL 0x34(%RSP) |
0x426da8 VMOVQ 0x8(%R14),%XMM9 |
0x426dae VMOVQ 0x10(%R14),%XMM8 |
0x426db4 VMOVQ 0x18(%R14),%XMM7 |
0x426dba VMOVQ 0x20(%R14),%XMM6 |
0x426dc0 ADD %EDX,%EDI |
0x426dc2 LEA (%RAX,%R10,1),%ECX |
0x426dc6 MOV %R9D,%EDX |
0x426dc9 SUB %EDI,%R13D |
0x426dcc MOVSXD %ECX,%RSI |
0x426dcf MOV %EDI,0x74(%RSP) |
0x426dd3 NOPW %CS:(%RAX,%RAX,1) |
0x426dde XCHG %AX,%AX |
(159) 0x426de0 CMP %R13D,%EDX |
(159) 0x426de3 CMOVA %R13D,%EDX |
(159) 0x426de7 LEA (%R15,%RDX,1),%R13D |
(159) 0x426deb MOV %R13D,0x70(%RSP) |
(159) 0x426df0 CMP %R13D,%R15D |
(159) 0x426df3 JAE 427410 |
(159) 0x426df9 VMOVQ %XMM10,%RBX |
(159) 0x426dfe VMOVQ %XMM9,%R8 |
(159) 0x426e03 LEA 0x1(%RSI),%R13 |
(159) 0x426e07 VMOVQ %XMM7,%RCX |
(159) 0x426e0c MOV (%RBX),%R12 |
(159) 0x426e0f MOV (%R8),%R10 |
(159) 0x426e12 VMOVQ %XMM8,%R9 |
(159) 0x426e17 MOV (%RCX),%R14 |
(159) 0x426e1a MOV (%R9),%RDI |
(159) 0x426e1d VMOVQ 0x10(%RBX),%XMM18 |
(159) 0x426e24 MOV %R13,0x40(%RSP) |
(159) 0x426e29 MOV 0x10(%R8),%RBX |
(159) 0x426e2d VMOVQ %XMM6,%R8 |
(159) 0x426e32 VMOVQ 0x10(%RCX),%XMM15 |
(159) 0x426e37 MOV 0x10(%R9),%R11 |
(159) 0x426e3b VMOVQ 0x10(%R8),%XMM3 |
(159) 0x426e41 IMUL %RSI,%R12 |
(159) 0x426e45 IMUL %RSI,%R10 |
(159) 0x426e49 IMUL %RSI,%R14 |
(159) 0x426e4d IMUL (%R8),%RSI |
(159) 0x426e51 MOV %R12,0x48(%RSP) |
(159) 0x426e56 IMUL %RDI,%R13 |
(159) 0x426e5a MOV %R10,0x50(%RSP) |
(159) 0x426e5f MOV %R14,0x68(%RSP) |
(159) 0x426e64 MOV %R13,%RAX |
(159) 0x426e67 MOV %R13,0x58(%RSP) |
(159) 0x426e6c SUB %RDI,%RAX |
(159) 0x426e6f MOV %RSI,0x78(%RSP) |
(159) 0x426e74 LEA -0x1(%RDX),%ESI |
(159) 0x426e77 MOV %RAX,0x60(%RSP) |
(159) 0x426e7c CMP $0x6,%ESI |
(159) 0x426e7f JBE 427420 |
(159) 0x426e85 MOVSXD 0x74(%RSP),%R9 |
(159) 0x426e8a VMOVQ %XMM18,%RCX |
(159) 0x426e90 ADD %R9,%R12 |
(159) 0x426e93 LEA 0x1(%R9,%R10,1),%RSI |
(159) 0x426e98 LEA (%R9,%R13,1),%R13 |
(159) 0x426e9c LEA (%R9,%RAX,1),%RAX |
(159) 0x426ea0 LEA (%RCX,%R12,8),%R8 |
(159) 0x426ea4 MOV 0x78(%RSP),%RCX |
(159) 0x426ea9 LEA (%R9,%R14,1),%RDI |
(159) 0x426ead VMOVQ %XMM15,%R14 |
(159) 0x426eb2 LEA (%R14,%RDI,8),%R14 |
(159) 0x426eb6 SAL $0x3,%RSI |
(159) 0x426eba LEA (%RBX,%RSI,1),%R10 |
(159) 0x426ebe LEA -0x8(%RBX,%RSI,1),%R12 |
(159) 0x426ec3 LEA (%R11,%R13,8),%RSI |
(159) 0x426ec7 LEA (%R11,%RAX,8),%R13 |
(159) 0x426ecb ADD %RCX,%R9 |
(159) 0x426ece VMOVQ %XMM3,%RAX |
(159) 0x426ed3 XOR %ECX,%ECX |
(159) 0x426ed5 LEA (%RAX,%R9,8),%RAX |
(159) 0x426ed9 MOV %EDX,%R9D |
(159) 0x426edc SHR $0x3,%R9D |
(159) 0x426ee0 SAL $0x6,%R9 |
(159) 0x426ee4 LEA -0x40(%R9),%RDI |
(159) 0x426ee8 SHR $0x6,%RDI |
(159) 0x426eec INC %RDI |
(159) 0x426eef AND $0x3,%EDI |
(159) 0x426ef2 JE 426ffb |
(159) 0x426ef8 CMP $0x1,%RDI |
(159) 0x426efc JE 426fa2 |
(159) 0x426f02 CMP $0x2,%RDI |
(159) 0x426f06 JE 426f52 |
(159) 0x426f08 VMOVUPD (%R8),%ZMM5 |
(159) 0x426f0e VMOVUPD (%R12),%ZMM0 |
(159) 0x426f15 VADDPD (%R10),%ZMM5,%ZMM16 |
(159) 0x426f1b MOV $0x40,%ECX |
(159) 0x426f20 VADDPD (%R13),%ZMM0,%ZMM17 |
(159) 0x426f27 VSUBPD %ZMM17,%ZMM16,%ZMM19 |
(159) 0x426f2d VADDPD (%RSI),%ZMM19,%ZMM20 |
(159) 0x426f33 VMOVUPD %ZMM20,(%R14) |
(159) 0x426f39 VMOVUPD (%R12),%ZMM4 |
(159) 0x426f40 VSUBPD (%R10),%ZMM4,%ZMM21 |
(159) 0x426f46 VADDPD %ZMM20,%ZMM21,%ZMM22 |
(159) 0x426f4c VMOVUPD %ZMM22,(%RAX) |
(159) 0x426f52 VMOVUPD (%R8,%RCX,1),%ZMM1 |
(159) 0x426f59 VMOVUPD (%R12,%RCX,1),%ZMM2 |
(159) 0x426f60 VADDPD (%R10,%RCX,1),%ZMM1,%ZMM23 |
(159) 0x426f67 VADDPD (%R13,%RCX,1),%ZMM2,%ZMM24 |
(159) 0x426f6f VSUBPD %ZMM24,%ZMM23,%ZMM25 |
(159) 0x426f75 VADDPD (%RSI,%RCX,1),%ZMM25,%ZMM26 |
(159) 0x426f7c VMOVUPD %ZMM26,(%R14,%RCX,1) |
(159) 0x426f83 VMOVUPD (%R12,%RCX,1),%ZMM11 |
(159) 0x426f8a VSUBPD (%R10,%RCX,1),%ZMM11,%ZMM27 |
(159) 0x426f91 VADDPD %ZMM26,%ZMM27,%ZMM28 |
(159) 0x426f97 VMOVUPD %ZMM28,(%RAX,%RCX,1) |
(159) 0x426f9e ADD $0x40,%RCX |
(159) 0x426fa2 VMOVUPD (%R8,%RCX,1),%ZMM12 |
(159) 0x426fa9 VMOVUPD (%R12,%RCX,1),%ZMM13 |
(159) 0x426fb0 VADDPD (%R10,%RCX,1),%ZMM12,%ZMM29 |
(159) 0x426fb7 VADDPD (%R13,%RCX,1),%ZMM13,%ZMM30 |
(159) 0x426fbf VSUBPD %ZMM30,%ZMM29,%ZMM31 |
(159) 0x426fc5 VADDPD (%RSI,%RCX,1),%ZMM31,%ZMM16 |
(159) 0x426fcc VMOVUPD %ZMM16,(%R14,%RCX,1) |
(159) 0x426fd3 VMOVUPD (%R12,%RCX,1),%ZMM14 |
(159) 0x426fda VSUBPD (%R10,%RCX,1),%ZMM14,%ZMM17 |
(159) 0x426fe1 VADDPD %ZMM16,%ZMM17,%ZMM19 |
(159) 0x426fe7 VMOVUPD %ZMM19,(%RAX,%RCX,1) |
(159) 0x426fee ADD $0x40,%RCX |
(159) 0x426ff2 CMP %RCX,%R9 |
(159) 0x426ff5 JE 427153 |
(160) 0x426ffb VMOVUPD (%R8,%RCX,1),%ZMM5 |
(160) 0x427002 VMOVUPD (%R12,%RCX,1),%ZMM4 |
(160) 0x427009 VADDPD (%R10,%RCX,1),%ZMM5,%ZMM0 |
(160) 0x427010 VADDPD (%R13,%RCX,1),%ZMM4,%ZMM20 |
(160) 0x427018 VSUBPD %ZMM20,%ZMM0,%ZMM1 |
(160) 0x42701e VADDPD (%RSI,%RCX,1),%ZMM1,%ZMM2 |
(160) 0x427025 VMOVUPD %ZMM2,(%R14,%RCX,1) |
(160) 0x42702c VMOVUPD (%R12,%RCX,1),%ZMM11 |
(160) 0x427033 VSUBPD (%R10,%RCX,1),%ZMM11,%ZMM21 |
(160) 0x42703a VADDPD %ZMM2,%ZMM21,%ZMM22 |
(160) 0x427040 VMOVUPD %ZMM22,(%RAX,%RCX,1) |
(160) 0x427047 VMOVUPD 0x40(%R8,%RCX,1),%ZMM12 |
(160) 0x42704f VMOVUPD 0x40(%R12,%RCX,1),%ZMM14 |
(160) 0x427057 VADDPD 0x40(%R10,%RCX,1),%ZMM12,%ZMM13 |
(160) 0x42705f VADDPD 0x40(%R13,%RCX,1),%ZMM14,%ZMM23 |
(160) 0x427067 VSUBPD %ZMM23,%ZMM13,%ZMM5 |
(160) 0x42706d VADDPD 0x40(%RSI,%RCX,1),%ZMM5,%ZMM0 |
(160) 0x427075 VMOVUPD %ZMM0,0x40(%R14,%RCX,1) |
(160) 0x42707d VMOVUPD 0x40(%R12,%RCX,1),%ZMM4 |
(160) 0x427085 VSUBPD 0x40(%R10,%RCX,1),%ZMM4,%ZMM24 |
(160) 0x42708d VADDPD %ZMM0,%ZMM24,%ZMM25 |
(160) 0x427093 VMOVUPD %ZMM25,0x40(%RAX,%RCX,1) |
(160) 0x42709b VMOVUPD 0x80(%R8,%RCX,1),%ZMM1 |
(160) 0x4270a3 VMOVUPD 0x80(%R12,%RCX,1),%ZMM2 |
(160) 0x4270ab VADDPD 0x80(%R10,%RCX,1),%ZMM1,%ZMM11 |
(160) 0x4270b3 VADDPD 0x80(%R13,%RCX,1),%ZMM2,%ZMM26 |
(160) 0x4270bb VSUBPD %ZMM26,%ZMM11,%ZMM12 |
(160) 0x4270c1 VADDPD 0x80(%RSI,%RCX,1),%ZMM12,%ZMM13 |
(160) 0x4270c9 VMOVUPD %ZMM13,0x80(%R14,%RCX,1) |
(160) 0x4270d1 VMOVUPD 0x80(%R12,%RCX,1),%ZMM14 |
(160) 0x4270d9 VSUBPD 0x80(%R10,%RCX,1),%ZMM14,%ZMM27 |
(160) 0x4270e1 VADDPD %ZMM13,%ZMM27,%ZMM28 |
(160) 0x4270e7 VMOVUPD %ZMM28,0x80(%RAX,%RCX,1) |
(160) 0x4270ef VMOVUPD 0xc0(%R8,%RCX,1),%ZMM5 |
(160) 0x4270f7 VMOVUPD 0xc0(%R12,%RCX,1),%ZMM4 |
(160) 0x4270ff VADDPD 0xc0(%R10,%RCX,1),%ZMM5,%ZMM0 |
(160) 0x427107 VADDPD 0xc0(%R13,%RCX,1),%ZMM4,%ZMM29 |
(160) 0x42710f VSUBPD %ZMM29,%ZMM0,%ZMM1 |
(160) 0x427115 VADDPD 0xc0(%RSI,%RCX,1),%ZMM1,%ZMM11 |
(160) 0x42711d VMOVUPD %ZMM11,0xc0(%R14,%RCX,1) |
(160) 0x427125 VMOVUPD 0xc0(%R12,%RCX,1),%ZMM2 |
(160) 0x42712d VSUBPD 0xc0(%R10,%RCX,1),%ZMM2,%ZMM30 |
(160) 0x427135 VADDPD %ZMM11,%ZMM30,%ZMM31 |
(160) 0x42713b VMOVUPD %ZMM31,0xc0(%RAX,%RCX,1) |
(160) 0x427143 ADD $0x100,%RCX |
(160) 0x42714a CMP %RCX,%R9 |
(160) 0x42714d JNE 426ffb |
(159) 0x427153 MOV 0x74(%RSP),%R8D |
(159) 0x427158 MOV %EDX,%R12D |
(159) 0x42715b AND $-0x8,%R12D |
(159) 0x42715f ADD %R12D,%R15D |
(159) 0x427162 LEA (%R12,%R8,1),%EDI |
(159) 0x427166 TEST $0x7,%DL |
(159) 0x427169 JE 4273bc |
(159) 0x42716f SUB %R12D,%EDX |
(159) 0x427172 LEA -0x1(%RDX),%R10D |
(159) 0x427176 CMP $0x2,%R10D |
(159) 0x42717a JBE 427233 |
(159) 0x427180 MOVSXD 0x74(%RSP),%R13 |
(159) 0x427185 MOV 0x50(%RSP),%RSI |
(159) 0x42718a MOV 0x48(%RSP),%R9 |
(159) 0x42718f MOV 0x58(%RSP),%R10 |
(159) 0x427194 LEA (%RSI,%R13,1),%R14 |
(159) 0x427198 LEA (%R9,%R13,1),%RSI |
(159) 0x42719c MOV 0x60(%RSP),%R9 |
(159) 0x4271a1 LEA 0x1(%R12,%R14,1),%RAX |
(159) 0x4271a6 LEA (%R10,%R13,1),%R14 |
(159) 0x4271aa ADD %R12,%RSI |
(159) 0x4271ad MOV 0x68(%RSP),%R10 |
(159) 0x4271b2 ADD %R12,%R14 |
(159) 0x4271b5 SAL $0x3,%RAX |
(159) 0x4271b9 LEA (%RBX,%RAX,1),%RCX |
(159) 0x4271bd LEA -0x8(%RBX,%RAX,1),%R8 |
(159) 0x4271c2 VMOVQ %XMM18,%RAX |
(159) 0x4271c8 VMOVUPD (%R11,%R14,8),%YMM12 |
(159) 0x4271ce VMOVUPD (%RCX),%YMM14 |
(159) 0x4271d2 VADDPD (%RAX,%RSI,8),%YMM12,%YMM13 |
(159) 0x4271d7 VSUBPD (%R8),%YMM14,%YMM5 |
(159) 0x4271dc LEA (%R9,%R13,1),%RSI |
(159) 0x4271e0 ADD %R12,%RSI |
(159) 0x4271e3 LEA (%R10,%R13,1),%RAX |
(159) 0x4271e7 VMOVQ %XMM15,%R14 |
(159) 0x4271ec ADD %R12,%RAX |
(159) 0x4271ef VADDPD %YMM5,%YMM13,%YMM0 |
(159) 0x4271f3 VSUBPD (%R11,%RSI,8),%YMM0,%YMM4 |
(159) 0x4271f9 VMOVUPD %YMM4,(%R14,%RAX,8) |
(159) 0x4271ff VMOVUPD (%R8),%YMM1 |
(159) 0x427204 VSUBPD (%RCX),%YMM1,%YMM11 |
(159) 0x427208 MOV 0x78(%RSP),%RCX |
(159) 0x42720d ADD %RCX,%R13 |
(159) 0x427210 ADD %R12,%R13 |
(159) 0x427213 VMOVQ %XMM3,%R12 |
(159) 0x427218 VADDPD %YMM4,%YMM11,%YMM2 |
(159) 0x42721c VMOVUPD %YMM2,(%R12,%R13,8) |
(159) 0x427222 TEST $0x3,%DL |
(159) 0x427225 JE 4273bc |
(159) 0x42722b AND $-0x4,%EDX |
(159) 0x42722e ADD %EDX,%R15D |
(159) 0x427231 ADD %EDX,%EDI |
(159) 0x427233 MOV 0x50(%RSP),%R8 |
(159) 0x427238 LEA 0x1(%RDI),%EDX |
(159) 0x42723b MOV 0x48(%RSP),%R14 |
(159) 0x427240 MOVSXD %EDI,%RAX |
(159) 0x427243 MOVSXD %EDX,%RDX |
(159) 0x427246 VMOVQ %XMM18,%R12 |
(159) 0x42724c LEA (%R8,%RDX,1),%R13 |
(159) 0x427250 LEA (%R14,%RAX,1),%RSI |
(159) 0x427254 LEA (%R8,%RAX,1),%R9 |
(159) 0x427258 LEA (%RBX,%R13,8),%RCX |
(159) 0x42725c MOV 0x58(%RSP),%R13 |
(159) 0x427261 VMOVSD (%R12,%RSI,8),%XMM12 |
(159) 0x427267 MOV 0x60(%RSP),%R12 |
(159) 0x42726c LEA (%RBX,%R9,8),%R9 |
(159) 0x427270 VMOVSD (%RCX),%XMM14 |
(159) 0x427274 VSUBSD (%R9),%XMM14,%XMM5 |
(159) 0x427279 LEA (%R13,%RAX,1),%R10 |
(159) 0x42727e LEA (%R12,%RAX,1),%RSI |
(159) 0x427282 VADDSD (%R11,%R10,8),%XMM12,%XMM13 |
(159) 0x427288 MOV 0x68(%RSP),%R10 |
(159) 0x42728d VADDSD %XMM5,%XMM13,%XMM0 |
(159) 0x427291 VSUBSD (%R11,%RSI,8),%XMM0,%XMM4 |
(159) 0x427297 VMOVQ %XMM15,%RSI |
(159) 0x42729c LEA (%R10,%RAX,1),%R10 |
(159) 0x4272a0 VMOVSD %XMM4,(%RSI,%R10,8) |
(159) 0x4272a6 MOV 0x78(%RSP),%R10 |
(159) 0x4272ab MOV 0x70(%RSP),%ESI |
(159) 0x4272af VMOVSD (%R9),%XMM1 |
(159) 0x4272b4 VMOVQ %XMM3,%R9 |
(159) 0x4272b9 VSUBSD (%RCX),%XMM1,%XMM11 |
(159) 0x4272bd VADDSD %XMM4,%XMM11,%XMM2 |
(159) 0x4272c1 ADD %R10,%RAX |
(159) 0x4272c4 VMOVSD %XMM2,(%R9,%RAX,8) |
(159) 0x4272ca LEA 0x1(%R15),%EAX |
(159) 0x4272ce CMP %ESI,%EAX |
(159) 0x4272d0 JAE 4273bc |
(159) 0x4272d6 LEA 0x2(%RDI),%EAX |
(159) 0x4272d9 MOV %R8,%RSI |
(159) 0x4272dc LEA (%RDX,%R14,1),%R10 |
(159) 0x4272e0 ADD $0x2,%R15D |
(159) 0x4272e4 CLTQ |
(159) 0x4272e6 LEA (%R8,%RAX,1),%R8 |
(159) 0x4272ea LEA (%RBX,%R8,8),%R9 |
(159) 0x4272ee MOV %R14,%R8 |
(159) 0x4272f1 VMOVQ %XMM18,%R14 |
(159) 0x4272f7 VMOVSD (%R14,%R10,8),%XMM12 |
(159) 0x4272fd LEA (%R13,%RDX,1),%R10 |
(159) 0x427302 MOV %R12,%R14 |
(159) 0x427305 LEA (%R12,%RDX,1),%R12 |
(159) 0x427309 VMOVSD (%R11,%R12,8),%XMM14 |
(159) 0x42730f VADDSD (%R9),%XMM12,%XMM13 |
(159) 0x427314 VADDSD (%RCX),%XMM14,%XMM5 |
(159) 0x427318 VSUBSD %XMM5,%XMM13,%XMM0 |
(159) 0x42731c VADDSD (%R11,%R10,8),%XMM0,%XMM4 |
(159) 0x427322 MOV 0x68(%RSP),%R10 |
(159) 0x427327 VMOVQ %XMM15,%R12 |
(159) 0x42732c ADD %RDX,%R10 |
(159) 0x42732f VMOVSD %XMM4,(%R12,%R10,8) |
(159) 0x427335 MOV 0x78(%RSP),%R10 |
(159) 0x42733a VMOVSD (%RCX),%XMM1 |
(159) 0x42733e VMOVQ %XMM3,%RCX |
(159) 0x427343 VSUBSD (%R9),%XMM1,%XMM11 |
(159) 0x427348 VADDSD %XMM4,%XMM11,%XMM2 |
(159) 0x42734c ADD %R10,%RDX |
(159) 0x42734f VMOVSD %XMM2,(%RCX,%RDX,8) |
(159) 0x427354 MOV 0x70(%RSP),%EDX |
(159) 0x427358 CMP %EDX,%R15D |
(159) 0x42735b JAE 4273bc |
(159) 0x42735d ADD $0x3,%EDI |
(159) 0x427360 ADD %RAX,%R8 |
(159) 0x427363 ADD %RAX,%R14 |
(159) 0x427366 ADD %RAX,%R13 |
(159) 0x427369 MOVSXD %EDI,%R15 |
(159) 0x42736c VMOVSD (%R11,%R14,8),%XMM14 |
(159) 0x427372 VADDSD (%R9),%XMM14,%XMM5 |
(159) 0x427377 ADD %RAX,%R10 |
(159) 0x42737a ADD %RSI,%R15 |
(159) 0x42737d LEA (%RBX,%R15,8),%RDI |
(159) 0x427381 VMOVQ %XMM18,%RBX |
(159) 0x427387 VMOVSD (%RBX,%R8,8),%XMM12 |
(159) 0x42738d VADDSD (%RDI),%XMM12,%XMM13 |
(159) 0x427391 VSUBSD %XMM5,%XMM13,%XMM0 |
(159) 0x427395 VADDSD (%R11,%R13,8),%XMM0,%XMM4 |
(159) 0x42739b MOV 0x68(%RSP),%R11 |
(159) 0x4273a0 ADD %RAX,%R11 |
(159) 0x4273a3 VMOVSD %XMM4,(%R12,%R11,8) |
(159) 0x4273a9 VMOVSD (%R9),%XMM15 |
(159) 0x4273ae VSUBSD (%RDI),%XMM15,%XMM1 |
(159) 0x4273b2 VADDSD %XMM4,%XMM1,%XMM11 |
(159) 0x4273b6 VMOVSD %XMM11,(%RCX,%R10,8) |
(159) 0x4273bc MOV 0x70(%RSP),%R15D |
(159) 0x4273c1 MOV 0x40(%RSP),%RSI |
(159) 0x4273c6 LEA (%RSI),%R9D |
(159) 0x4273c9 CMP %R9D,0x3c(%RSP) |
(159) 0x4273ce JLE 4273f0 |
(159) 0x4273d0 MOV 0x30(%RSP),%EDX |
(159) 0x4273d4 MOV 0x38(%RSP),%R12D |
(159) 0x4273d9 MOV 0x34(%RSP),%R13D |
(159) 0x4273de SUB %R15D,%EDX |
(159) 0x4273e1 MOV %R12D,0x74(%RSP) |
(159) 0x4273e6 JMP 426de0 |
0x4273eb NOPL (%RAX,%RAX,1) |
0x4273f0 VZEROUPPER |
0x4273f3 LEA -0x28(%RBP),%RSP |
0x4273f7 POP %RBX |
0x4273f8 POP %R12 |
0x4273fa POP %R13 |
0x4273fc POP %R14 |
0x4273fe POP %R15 |
0x427400 POP %RBP |
0x427401 RET |
0x427402 NOPW %CS:(%RAX,%RAX,1) |
0x42740d NOPL (%RAX) |
(159) 0x427410 LEA 0x1(%RSI),%R14 |
(159) 0x427414 MOV %R14,0x40(%RSP) |
(159) 0x427419 JMP 4273c1 |
0x42741b NOPL (%RAX,%RAX,1) |
(159) 0x427420 MOV 0x74(%RSP),%EDI |
(159) 0x427424 XOR %R12D,%R12D |
(159) 0x427427 JMP 42716f |
0x42742c INC %R9D |
0x42742f XOR %EDX,%EDX |
0x427431 JMP 426d7c |
0x427436 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:44-48 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 302 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RBX),%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RSI),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 42742c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x73c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 426d7c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:44-48 |
Module | exec |
nb instructions | 82 |
nb uops | 80 |
loop length | 302 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 13.33 cycles |
front end | 13.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 13.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 13.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x34(%R14),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%R14),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RBX),%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RSI),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x78(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JB 42742c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x73c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R11D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 4273f3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x703> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x38(%RSP),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R14),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x34(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R14),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R14),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R14),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x20(%R14),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R10,1),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 426d7c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0+0x8c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.0– | 1.52 | 0.77 |
▼Loop 159 - advec_cell.cpp:44-48 - exec– | 0 | 0 |
○Loop 160 - advec_cell.cpp:47-48 - exec | 1.52 | 0.76 |