Function: ideal_gas_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 4.27% |
---|
Function: ideal_gas_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 4.27% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/ideal_gas.cpp: 37 - 45 |
-------------------------------------------------------------------------------- |
37: #pragma omp parallel for simd collapse(2) |
38: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
39: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
40: double v = 1.0 / density(i, j); |
41: pressure(i, j) = (1.4 - 1.0) * density(i, j) * energy(i, j); |
42: double pressurebyenergy = (1.4 - 1.0) * density(i, j); |
43: double pressurebyvolume = -density(i, j) * pressure(i, j); |
44: double sound_speed_squared = v * v * (pressure(i, j) * pressurebyenergy - pressurebyvolume); |
45: soundspeed(i, j) = std::sqrt(sound_speed_squared); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x450ba0 PUSH %RBP |
0x450ba1 MOV %RSP,%RBP |
0x450ba4 PUSH %R15 |
0x450ba6 PUSH %R14 |
0x450ba8 PUSH %R13 |
0x450baa PUSH %R12 |
0x450bac PUSH %RBX |
0x450bad AND $-0x40,%RSP |
0x450bb1 ADD $-0x80,%RSP |
0x450bb5 MOV 0x28(%RDI),%EAX |
0x450bb8 MOV 0x2c(%RDI),%EDX |
0x450bbb MOV 0x20(%RDI),%EBX |
0x450bbe MOV 0x24(%RDI),%ECX |
0x450bc1 ADD $0x2,%EDX |
0x450bc4 LEA 0x1(%RAX),%R15D |
0x450bc8 LEA 0x1(%RBX),%ESI |
0x450bcb MOV %EDX,0x50(%RSP) |
0x450bcf MOV %ESI,0x4c(%RSP) |
0x450bd3 CMP %EDX,%R15D |
0x450bd6 JGE 4511ab |
0x450bdc MOV %EDX,%EBX |
0x450bde LEA 0x2(%RCX),%R14D |
0x450be2 SUB %R15D,%EBX |
0x450be5 CMP %R14D,%ESI |
0x450be8 JGE 4511ab |
0x450bee MOV %RDI,%R13 |
0x450bf1 MOV %R14D,%EDI |
0x450bf4 SUB %ESI,%EDI |
0x450bf6 MOV %EDI,0x54(%RSP) |
0x450bfa CALL 4046c0 <omp_get_num_threads@plt> |
0x450bff MOV %EAX,%R12D |
0x450c02 CALL 4045b0 <omp_get_thread_num@plt> |
0x450c07 XOR %EDX,%EDX |
0x450c09 MOV %EAX,%R8D |
0x450c0c MOV 0x54(%RSP),%EAX |
0x450c10 IMUL %EBX,%EAX |
0x450c13 DIV %R12D |
0x450c16 MOV %EAX,%R12D |
0x450c19 CMP %EDX,%R8D |
0x450c1c JB 4511cb |
0x450c22 IMUL %R12D,%R8D |
0x450c26 LEA (%R8,%RDX,1),%R9D |
0x450c2a LEA (%R12,%R9,1),%R10D |
0x450c2e MOV %R10D,0x48(%RSP) |
0x450c33 CMP %R10D,%R9D |
0x450c36 JAE 4511ab |
0x450c3c MOV %R9D,%EAX |
0x450c3f XOR %EDX,%EDX |
0x450c41 MOV 0x4c(%RSP),%R11D |
0x450c46 MOV 0x8(%R13),%RSI |
0x450c4a DIVL 0x54(%RSP) |
0x450c4e MOV 0x18(%R13),%RBX |
0x450c52 VMOVSD 0x1297e(%RIP),%XMM7 |
0x450c5a VMOVSD 0x1297e(%RIP),%XMM6 |
0x450c62 VMOVSD 0x1297e(%RIP),%XMM5 |
0x450c6a MOV %RSI,0x38(%RSP) |
0x450c6f MOV %RBX,0x28(%RSP) |
0x450c74 VBROADCASTSD %XMM7,%YMM10 |
0x450c79 VBROADCASTSD %XMM6,%YMM9 |
0x450c7e VBROADCASTSD %XMM5,%YMM8 |
0x450c83 VBROADCASTSD %XMM7,%ZMM4 |
0x450c89 VBROADCASTSD %XMM6,%ZMM3 |
0x450c8f VBROADCASTSD %XMM5,%ZMM2 |
0x450c95 ADD %EDX,%R11D |
0x450c98 ADD %R15D,%EAX |
0x450c9b MOV %R14D,%EDX |
0x450c9e MOV (%R13),%R15 |
0x450ca2 MOV 0x10(%R13),%R14 |
0x450ca6 MOV %R11D,0x7c(%RSP) |
0x450cab SUB %R11D,%EDX |
0x450cae MOVSXD %EAX,%RBX |
0x450cb1 MOV %R15,0x40(%RSP) |
0x450cb6 MOV %R14,0x30(%RSP) |
0x450cbb NOPL (%RAX,%RAX,1) |
(417) 0x450cc0 CMP %EDX,%R12D |
(417) 0x450cc3 CMOVBE %R12D,%EDX |
(417) 0x450cc7 LEA (%R9,%RDX,1),%ECX |
(417) 0x450ccb MOV %ECX,0x78(%RSP) |
(417) 0x450ccf CMP %ECX,%R9D |
(417) 0x450cd2 JAE 45117c |
(417) 0x450cd8 MOV 0x40(%RSP),%R13 |
(417) 0x450cdd MOV 0x38(%RSP),%RDI |
(417) 0x450ce2 LEA -0x1(%RDX),%EAX |
(417) 0x450ce5 MOV 0x30(%RSP),%R12 |
(417) 0x450cea MOV 0x28(%RSP),%R8 |
(417) 0x450cef MOV (%R13),%R11 |
(417) 0x450cf3 MOV (%RDI),%RSI |
(417) 0x450cf6 MOV (%R12),%R10 |
(417) 0x450cfa MOV 0x10(%RDI),%R14 |
(417) 0x450cfe MOV (%R8),%RDI |
(417) 0x450d01 IMUL %RBX,%R11 |
(417) 0x450d05 MOV 0x10(%R13),%R15 |
(417) 0x450d09 IMUL %RBX,%RSI |
(417) 0x450d0d MOV 0x10(%R12),%R13 |
(417) 0x450d12 MOV 0x10(%R8),%R12 |
(417) 0x450d16 IMUL %RBX,%R10 |
(417) 0x450d1a IMUL %RBX,%RDI |
(417) 0x450d1e MOV %R11,0x58(%RSP) |
(417) 0x450d23 MOV %RSI,0x60(%RSP) |
(417) 0x450d28 MOV %R10,0x68(%RSP) |
(417) 0x450d2d MOV %RDI,0x70(%RSP) |
(417) 0x450d32 CMP $0x6,%EAX |
(417) 0x450d35 JBE 4511c0 |
(417) 0x450d3b MOVSXD 0x7c(%RSP),%R8 |
(417) 0x450d40 LEA (%RSI,%R8,1),%RAX |
(417) 0x450d44 LEA (%R11,%R8,1),%R11 |
(417) 0x450d48 LEA (%R10,%R8,1),%R10 |
(417) 0x450d4c ADD %RDI,%R8 |
(417) 0x450d4f LEA (%R15,%R11,8),%RCX |
(417) 0x450d53 MOV %EDX,%EDI |
(417) 0x450d55 SHR $0x3,%EDI |
(417) 0x450d58 LEA (%R14,%RAX,8),%R11 |
(417) 0x450d5c LEA (%R13,%R10,8),%R10 |
(417) 0x450d61 XOR %EAX,%EAX |
(417) 0x450d63 SAL $0x6,%RDI |
(417) 0x450d67 LEA (%R12,%R8,8),%R8 |
(417) 0x450d6b LEA -0x40(%RDI),%RSI |
(417) 0x450d6f SHR $0x6,%RSI |
(417) 0x450d73 INC %RSI |
(417) 0x450d76 AND $0x3,%ESI |
(417) 0x450d79 JE 450e75 |
(417) 0x450d7f CMP $0x1,%RSI |
(417) 0x450d83 JE 450e21 |
(417) 0x450d89 CMP $0x2,%RSI |
(417) 0x450d8d JE 450dd6 |
(417) 0x450d8f VMOVUPD (%RCX),%ZMM1 |
(417) 0x450d95 MOV $0x40,%EAX |
(417) 0x450d9a VMULPD (%R11),%ZMM1,%ZMM0 |
(417) 0x450da0 VDIVPD %ZMM1,%ZMM4,%ZMM11 |
(417) 0x450da6 VMULPD %ZMM11,%ZMM11,%ZMM14 |
(417) 0x450dac VMULPD %ZMM3,%ZMM0,%ZMM12 |
(417) 0x450db2 VMOVUPD %ZMM12,(%R10) |
(417) 0x450db8 VMULPD (%RCX),%ZMM2,%ZMM13 |
(417) 0x450dbe VMULPD %ZMM14,%ZMM13,%ZMM15 |
(417) 0x450dc4 VMULPD %ZMM12,%ZMM15,%ZMM1 |
(417) 0x450dca VSQRTPD %ZMM1,%ZMM11 |
(417) 0x450dd0 VMOVUPD %ZMM11,(%R8) |
(417) 0x450dd6 VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(417) 0x450ddd VMULPD (%R11,%RAX,1),%ZMM0,%ZMM13 |
(417) 0x450de4 VDIVPD %ZMM0,%ZMM4,%ZMM12 |
(417) 0x450dea VMULPD %ZMM12,%ZMM12,%ZMM1 |
(417) 0x450df0 VMULPD %ZMM3,%ZMM13,%ZMM14 |
(417) 0x450df6 VMOVUPD %ZMM14,(%R10,%RAX,1) |
(417) 0x450dfd VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM15 |
(417) 0x450e04 VMULPD %ZMM1,%ZMM15,%ZMM11 |
(417) 0x450e0a VMULPD %ZMM14,%ZMM11,%ZMM0 |
(417) 0x450e10 VSQRTPD %ZMM0,%ZMM12 |
(417) 0x450e16 VMOVUPD %ZMM12,(%R8,%RAX,1) |
(417) 0x450e1d ADD $0x40,%RAX |
(417) 0x450e21 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(417) 0x450e28 VMULPD (%R11,%RAX,1),%ZMM13,%ZMM15 |
(417) 0x450e2f VDIVPD %ZMM13,%ZMM4,%ZMM14 |
(417) 0x450e35 VMULPD %ZMM14,%ZMM14,%ZMM11 |
(417) 0x450e3b VMULPD %ZMM3,%ZMM15,%ZMM1 |
(417) 0x450e41 VMOVUPD %ZMM1,(%R10,%RAX,1) |
(417) 0x450e48 VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM0 |
(417) 0x450e4f VMULPD %ZMM11,%ZMM0,%ZMM12 |
(417) 0x450e55 VMULPD %ZMM1,%ZMM12,%ZMM13 |
(417) 0x450e5b VSQRTPD %ZMM13,%ZMM14 |
(417) 0x450e61 VMOVUPD %ZMM14,(%R8,%RAX,1) |
(417) 0x450e68 ADD $0x40,%RAX |
(417) 0x450e6c CMP %RDI,%RAX |
(417) 0x450e6f JE 450faf |
(418) 0x450e75 VMOVUPD (%RCX,%RAX,1),%ZMM15 |
(418) 0x450e7c VMULPD (%R11,%RAX,1),%ZMM15,%ZMM1 |
(418) 0x450e83 VDIVPD %ZMM15,%ZMM4,%ZMM11 |
(418) 0x450e89 VMULPD %ZMM11,%ZMM11,%ZMM13 |
(418) 0x450e8f VMULPD %ZMM3,%ZMM1,%ZMM12 |
(418) 0x450e95 VMOVUPD %ZMM12,(%R10,%RAX,1) |
(418) 0x450e9c VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM0 |
(418) 0x450ea3 VMULPD %ZMM13,%ZMM0,%ZMM14 |
(418) 0x450ea9 VMULPD %ZMM12,%ZMM14,%ZMM15 |
(418) 0x450eaf VSQRTPD %ZMM15,%ZMM11 |
(418) 0x450eb5 VMOVUPD %ZMM11,(%R8,%RAX,1) |
(418) 0x450ebc VMOVUPD 0x40(%RCX,%RAX,1),%ZMM1 |
(418) 0x450ec4 VMULPD 0x40(%R11,%RAX,1),%ZMM1,%ZMM0 |
(418) 0x450ecc VDIVPD %ZMM1,%ZMM4,%ZMM12 |
(418) 0x450ed2 VMULPD %ZMM12,%ZMM12,%ZMM15 |
(418) 0x450ed8 VMULPD %ZMM3,%ZMM0,%ZMM13 |
(418) 0x450ede VMOVUPD %ZMM13,0x40(%R10,%RAX,1) |
(418) 0x450ee6 VMULPD 0x40(%RCX,%RAX,1),%ZMM2,%ZMM14 |
(418) 0x450eee VMULPD %ZMM15,%ZMM14,%ZMM11 |
(418) 0x450ef4 VMULPD %ZMM13,%ZMM11,%ZMM1 |
(418) 0x450efa VSQRTPD %ZMM1,%ZMM12 |
(418) 0x450f00 VMOVUPD %ZMM12,0x40(%R8,%RAX,1) |
(418) 0x450f08 VMOVUPD 0x80(%RCX,%RAX,1),%ZMM0 |
(418) 0x450f10 VMULPD 0x80(%R11,%RAX,1),%ZMM0,%ZMM14 |
(418) 0x450f18 VDIVPD %ZMM0,%ZMM4,%ZMM13 |
(418) 0x450f1e VMULPD %ZMM13,%ZMM13,%ZMM11 |
(418) 0x450f24 VMULPD %ZMM3,%ZMM14,%ZMM15 |
(418) 0x450f2a VMOVUPD %ZMM15,0x80(%R10,%RAX,1) |
(418) 0x450f32 VMULPD 0x80(%RCX,%RAX,1),%ZMM2,%ZMM1 |
(418) 0x450f3a VMULPD %ZMM11,%ZMM1,%ZMM12 |
(418) 0x450f40 VMULPD %ZMM15,%ZMM12,%ZMM0 |
(418) 0x450f46 VSQRTPD %ZMM0,%ZMM13 |
(418) 0x450f4c VMOVUPD %ZMM13,0x80(%R8,%RAX,1) |
(418) 0x450f54 VMOVUPD 0xc0(%RCX,%RAX,1),%ZMM14 |
(418) 0x450f5c VMULPD 0xc0(%R11,%RAX,1),%ZMM14,%ZMM1 |
(418) 0x450f64 VDIVPD %ZMM14,%ZMM4,%ZMM15 |
(418) 0x450f6a VMULPD %ZMM15,%ZMM15,%ZMM11 |
(418) 0x450f70 VMULPD %ZMM3,%ZMM1,%ZMM12 |
(418) 0x450f76 VMOVUPD %ZMM12,0xc0(%R10,%RAX,1) |
(418) 0x450f7e VMULPD 0xc0(%RCX,%RAX,1),%ZMM2,%ZMM0 |
(418) 0x450f86 VMULPD %ZMM11,%ZMM0,%ZMM13 |
(418) 0x450f8c VMULPD %ZMM12,%ZMM13,%ZMM14 |
(418) 0x450f92 VSQRTPD %ZMM14,%ZMM15 |
(418) 0x450f98 VMOVUPD %ZMM15,0xc0(%R8,%RAX,1) |
(418) 0x450fa0 ADD $0x100,%RAX |
(418) 0x450fa6 CMP %RDI,%RAX |
(418) 0x450fa9 JNE 450e75 |
(417) 0x450faf MOV 0x7c(%RSP),%R11D |
(417) 0x450fb4 MOV %EDX,%ECX |
(417) 0x450fb6 AND $-0x8,%ECX |
(417) 0x450fb9 ADD %ECX,%R9D |
(417) 0x450fbc LEA (%RCX,%R11,1),%ESI |
(417) 0x450fc0 TEST $0x7,%DL |
(417) 0x450fc3 JE 451177 |
(417) 0x450fc9 SUB %ECX,%EDX |
(417) 0x450fcb LEA -0x1(%RDX),%R10D |
(417) 0x450fcf CMP $0x2,%R10D |
(417) 0x450fd3 JBE 451059 |
(417) 0x450fd9 MOVSXD 0x7c(%RSP),%RAX |
(417) 0x450fde MOV 0x58(%RSP),%R8 |
(417) 0x450fe3 MOV 0x60(%RSP),%R11 |
(417) 0x450fe8 MOV 0x70(%RSP),%RDI |
(417) 0x450fed ADD %RAX,%R8 |
(417) 0x450ff0 ADD %RCX,%R8 |
(417) 0x450ff3 ADD %RAX,%RDI |
(417) 0x450ff6 LEA (%R15,%R8,8),%R10 |
(417) 0x450ffa LEA (%R11,%RAX,1),%R8 |
(417) 0x450ffe MOV 0x68(%RSP),%R11 |
(417) 0x451003 ADD %RCX,%RDI |
(417) 0x451006 VMOVUPD (%R10),%YMM1 |
(417) 0x45100b ADD %RCX,%R8 |
(417) 0x45100e ADD %R11,%RAX |
(417) 0x451011 VDIVPD %YMM1,%YMM10,%YMM12 |
(417) 0x451015 ADD %RCX,%RAX |
(417) 0x451018 VMULPD (%R14,%R8,8),%YMM1,%YMM0 |
(417) 0x45101e VMULPD %YMM9,%YMM0,%YMM13 |
(417) 0x451023 VMOVUPD %YMM13,(%R13,%RAX,8) |
(417) 0x45102a VMULPD (%R10),%YMM8,%YMM14 |
(417) 0x45102f VMULPD %YMM12,%YMM12,%YMM11 |
(417) 0x451034 VMULPD %YMM11,%YMM14,%YMM15 |
(417) 0x451039 VMULPD %YMM13,%YMM15,%YMM1 |
(417) 0x45103e VSQRTPD %YMM1,%YMM12 |
(417) 0x451042 VMOVUPD %YMM12,(%R12,%RDI,8) |
(417) 0x451048 TEST $0x3,%DL |
(417) 0x45104b JE 451177 |
(417) 0x451051 AND $-0x4,%EDX |
(417) 0x451054 ADD %EDX,%R9D |
(417) 0x451057 ADD %EDX,%ESI |
(417) 0x451059 MOV 0x58(%RSP),%RDI |
(417) 0x45105e MOVSXD %ESI,%RAX |
(417) 0x451061 MOV 0x60(%RSP),%R11 |
(417) 0x451066 MOV 0x68(%RSP),%R10 |
(417) 0x45106b LEA (%RDI,%RAX,1),%RDX |
(417) 0x45106f LEA (%R11,%RAX,1),%RCX |
(417) 0x451073 LEA (%R15,%RDX,8),%R8 |
(417) 0x451077 LEA (%R10,%RAX,1),%RDX |
(417) 0x45107b VMOVSD (%R8),%XMM0 |
(417) 0x451080 VDIVSD %XMM0,%XMM7,%XMM13 |
(417) 0x451084 VMULSD (%R14,%RCX,8),%XMM0,%XMM14 |
(417) 0x45108a MOV 0x78(%RSP),%ECX |
(417) 0x45108e VMULSD %XMM6,%XMM14,%XMM15 |
(417) 0x451092 VMOVSD %XMM15,(%R13,%RDX,8) |
(417) 0x451099 LEA 0x1(%R9),%EDX |
(417) 0x45109d VMULSD (%R8),%XMM5,%XMM1 |
(417) 0x4510a2 MOV 0x70(%RSP),%R8 |
(417) 0x4510a7 ADD %R8,%RAX |
(417) 0x4510aa VMULSD %XMM13,%XMM13,%XMM11 |
(417) 0x4510af VMULSD %XMM11,%XMM1,%XMM12 |
(417) 0x4510b4 VMULSD %XMM15,%XMM12,%XMM0 |
(417) 0x4510b9 VSQRTSD %XMM0,%XMM0,%XMM0 |
(417) 0x4510bd VMOVSD %XMM0,(%R12,%RAX,8) |
(417) 0x4510c3 LEA 0x1(%RSI),%EAX |
(417) 0x4510c6 CMP %ECX,%EDX |
(417) 0x4510c8 JAE 451177 |
(417) 0x4510ce CLTQ |
(417) 0x4510d0 ADD $0x2,%R9D |
(417) 0x4510d4 ADD $0x2,%ESI |
(417) 0x4510d7 LEA (%RDI,%RAX,1),%RDX |
(417) 0x4510db LEA (%R11,%RAX,1),%RCX |
(417) 0x4510df LEA (%R15,%RDX,8),%RDX |
(417) 0x4510e3 VMOVSD (%RDX),%XMM13 |
(417) 0x4510e7 VDIVSD %XMM13,%XMM7,%XMM14 |
(417) 0x4510ec VMULSD (%R14,%RCX,8),%XMM13,%XMM15 |
(417) 0x4510f2 LEA (%R10,%RAX,1),%RCX |
(417) 0x4510f6 ADD %R8,%RAX |
(417) 0x4510f9 VMULSD %XMM6,%XMM15,%XMM1 |
(417) 0x4510fd VMOVSD %XMM1,(%R13,%RCX,8) |
(417) 0x451104 VMULSD (%RDX),%XMM5,%XMM12 |
(417) 0x451108 VMULSD %XMM14,%XMM14,%XMM11 |
(417) 0x45110d VMULSD %XMM11,%XMM12,%XMM0 |
(417) 0x451112 VMULSD %XMM1,%XMM0,%XMM13 |
(417) 0x451116 VSQRTSD %XMM13,%XMM13,%XMM13 |
(417) 0x45111b VMOVSD %XMM13,(%R12,%RAX,8) |
(417) 0x451121 MOV 0x78(%RSP),%EAX |
(417) 0x451125 CMP %EAX,%R9D |
(417) 0x451128 JAE 451177 |
(417) 0x45112a MOVSXD %ESI,%R9 |
(417) 0x45112d ADD %R9,%RDI |
(417) 0x451130 ADD %R9,%R11 |
(417) 0x451133 ADD %R9,%R10 |
(417) 0x451136 ADD %R9,%R8 |
(417) 0x451139 LEA (%R15,%RDI,8),%R15 |
(417) 0x45113d VMOVSD (%R15),%XMM14 |
(417) 0x451142 VDIVSD %XMM14,%XMM7,%XMM15 |
(417) 0x451147 VMULSD (%R14,%R11,8),%XMM14,%XMM1 |
(417) 0x45114d VMULSD %XMM6,%XMM1,%XMM12 |
(417) 0x451151 VMOVSD %XMM12,(%R13,%R10,8) |
(417) 0x451158 VMULSD (%R15),%XMM5,%XMM0 |
(417) 0x45115d VMULSD %XMM15,%XMM15,%XMM11 |
(417) 0x451162 VMULSD %XMM11,%XMM0,%XMM13 |
(417) 0x451167 VMULSD %XMM12,%XMM13,%XMM14 |
(417) 0x45116c VSQRTSD %XMM14,%XMM14,%XMM14 |
(417) 0x451171 VMOVSD %XMM14,(%R12,%R8,8) |
(417) 0x451177 MOV 0x78(%RSP),%R9D |
(417) 0x45117c INC %RBX |
(417) 0x45117f LEA (%RBX),%R14D |
(417) 0x451182 CMP %R14D,0x50(%RSP) |
(417) 0x451187 JLE 4511a8 |
(417) 0x451189 MOV 0x48(%RSP),%R12D |
(417) 0x45118e MOV 0x4c(%RSP),%R13D |
(417) 0x451193 MOV 0x54(%RSP),%EDX |
(417) 0x451197 MOV %R13D,0x7c(%RSP) |
(417) 0x45119c SUB %R9D,%R12D |
(417) 0x45119f JMP 450cc0 |
0x4511a4 NOPL (%RAX) |
0x4511a8 VZEROUPPER |
0x4511ab LEA -0x28(%RBP),%RSP |
0x4511af POP %RBX |
0x4511b0 POP %R12 |
0x4511b2 POP %R13 |
0x4511b4 POP %R14 |
0x4511b6 POP %R15 |
0x4511b8 POP %RBP |
0x4511b9 RET |
0x4511ba NOPW (%RAX,%RAX,1) |
(417) 0x4511c0 MOV 0x7c(%RSP),%ESI |
(417) 0x4511c4 XOR %ECX,%ECX |
(417) 0x4511c6 JMP 450fc9 |
0x4511cb INC %R12D |
0x4511ce XOR %EDX,%EDX |
0x4511d0 JMP 450c22 |
0x4511d5 NOP |
0x4511d6 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.26 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.72 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 337 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 10 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 8.00 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
cycles | 7.30 | 11.73 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.92-16.01 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x54(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x62b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x54(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 450c22 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 337 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 10 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 8.00 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
cycles | 7.30 | 11.73 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.92-16.01 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x54(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x62b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4511ab <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x54(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1297e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 450c22 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼ideal_gas_kernel(int, int, int, int, clover::Buffer2D | 4.27 | 1.41 |
▼Loop 417 - ideal_gas.cpp:40-45 - exec– | 0.01 | 0 |
○Loop 418 - ideal_gas.cpp:40-45 - exec | 4.26 | 1.4 |