Function: _Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0 | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 5.41% |
---|
Function: _Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0 | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 5.41% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/ideal_gas.cpp: 37 - 45 |
-------------------------------------------------------------------------------- |
37: #pragma omp parallel for simd collapse(2) |
38: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
39: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
40: double v = 1.0 / density(i, j); |
41: pressure(i, j) = (1.4 - 1.0) * density(i, j) * energy(i, j); |
42: double pressurebyenergy = (1.4 - 1.0) * density(i, j); |
43: double pressurebyvolume = -density(i, j) * pressure(i, j); |
44: double sound_speed_squared = v * v * (pressure(i, j) * pressurebyenergy - pressurebyvolume); |
45: soundspeed(i, j) = std::sqrt(sound_speed_squared); |
0x439c80 PUSH %RBP |
0x439c81 MOV %RSP,%RBP |
0x439c84 PUSH %R15 |
0x439c86 PUSH %R14 |
0x439c88 PUSH %R13 |
0x439c8a PUSH %R12 |
0x439c8c PUSH %RBX |
0x439c8d AND $-0x40,%RSP |
0x439c91 SUB $0x40,%RSP |
0x439c95 MOV 0x28(%RDI),%EAX |
0x439c98 MOV 0x2c(%RDI),%ECX |
0x439c9b MOV 0x20(%RDI),%ESI |
0x439c9e MOV 0x24(%RDI),%EDX |
0x439ca1 ADD $0x2,%ECX |
0x439ca4 INC %ESI |
0x439ca6 LEA 0x1(%RAX),%R12D |
0x439caa MOV %ECX,0x10(%RSP) |
0x439cae MOV %ESI,0xc(%RSP) |
0x439cb2 CMP %ECX,%R12D |
0x439cb5 JGE 43a2a3 |
0x439cbb MOV %ECX,%EBX |
0x439cbd LEA 0x2(%RDX),%R14D |
0x439cc1 SUB %R12D,%EBX |
0x439cc4 CMP %R14D,%ESI |
0x439cc7 JGE 43a2a3 |
0x439ccd MOV %RDI,%R15 |
0x439cd0 MOV %R14D,%EDI |
0x439cd3 SUB %ESI,%EDI |
0x439cd5 MOV %EDI,0x14(%RSP) |
0x439cd9 CALL 404650 <omp_get_num_threads@plt> |
0x439cde MOV %EAX,%R13D |
0x439ce1 CALL 404540 <omp_get_thread_num@plt> |
0x439ce6 XOR %EDX,%EDX |
0x439ce8 MOV %EAX,%ESI |
0x439cea MOV 0x14(%RSP),%EAX |
0x439cee IMUL %EBX,%EAX |
0x439cf1 DIV %R13D |
0x439cf4 MOV %EAX,%R9D |
0x439cf7 CMP %EDX,%ESI |
0x439cf9 JB 43a2cc |
0x439cff IMUL %R9D,%ESI |
0x439d03 ADD %EDX,%ESI |
0x439d05 LEA (%R9,%RSI,1),%R8D |
0x439d09 MOV %R8D,0x8(%RSP) |
0x439d0e CMP %R8D,%ESI |
0x439d11 JAE 43a2a3 |
0x439d17 MOV %ESI,%EAX |
0x439d19 XOR %EDX,%EDX |
0x439d1b MOV 0xc(%RSP),%R10D |
0x439d20 VMOVQ (%R15),%XMM6 |
0x439d25 DIVL 0x14(%RSP) |
0x439d29 VMOVQ 0x8(%R15),%XMM11 |
0x439d2f VMOVQ 0x10(%R15),%XMM10 |
0x439d35 VMOVSD 0x2685b(%RIP),%XMM15 |
0x439d3d VMOVQ 0x18(%R15),%XMM5 |
0x439d43 VMOVSD 0x26c35(%RIP),%XMM14 |
0x439d4b VMOVSD 0x26c35(%RIP),%XMM13 |
0x439d53 VBROADCASTSD %XMM15,%YMM18 |
0x439d59 VBROADCASTSD %XMM14,%YMM17 |
0x439d5f VBROADCASTSD %XMM13,%YMM16 |
0x439d65 VBROADCASTSD %XMM15,%ZMM4 |
0x439d6b VBROADCASTSD %XMM14,%ZMM3 |
0x439d71 VBROADCASTSD %XMM13,%ZMM2 |
0x439d77 ADD %R10D,%EDX |
0x439d7a LEA (%RAX,%R12,1),%R11D |
0x439d7e SUB %EDX,%R14D |
0x439d81 MOV %EDX,0x3c(%RSP) |
0x439d85 MOVSXD %R11D,%R8 |
0x439d88 MOV %R9D,%EDX |
0x439d8b NOPL (%RAX,%RAX,1) |
(239) 0x439d90 CMP %R14D,%EDX |
(239) 0x439d93 CMOVA %R14D,%EDX |
(239) 0x439d97 LEA (%RSI,%RDX,1),%R14D |
(239) 0x439d9b MOV %R14D,0x38(%RSP) |
(239) 0x439da0 CMP %R14D,%ESI |
(239) 0x439da3 JAE 43a270 |
(239) 0x439da9 VMOVQ %XMM6,%R12 |
(239) 0x439dae VMOVQ %XMM11,%RBX |
(239) 0x439db3 LEA -0x1(%RDX),%R9D |
(239) 0x439db7 VMOVQ %XMM10,%RDI |
(239) 0x439dbc VMOVQ %XMM5,%R10 |
(239) 0x439dc1 MOV (%R12),%RAX |
(239) 0x439dc5 MOV 0x10(%R12),%R11 |
(239) 0x439dca MOV 0x10(%RBX),%R13 |
(239) 0x439dce MOV (%RBX),%R12 |
(239) 0x439dd1 MOV 0x10(%RDI),%R14 |
(239) 0x439dd5 MOV (%RDI),%RBX |
(239) 0x439dd8 MOV (%R10),%RDI |
(239) 0x439ddb MOV 0x10(%R10),%R15 |
(239) 0x439ddf IMUL %R8,%RAX |
(239) 0x439de3 IMUL %R8,%R12 |
(239) 0x439de7 IMUL %R8,%RBX |
(239) 0x439deb IMUL %R8,%RDI |
(239) 0x439def MOV %RAX,0x18(%RSP) |
(239) 0x439df4 MOV %R12,0x20(%RSP) |
(239) 0x439df9 MOV %RBX,0x28(%RSP) |
(239) 0x439dfe MOV %RDI,0x30(%RSP) |
(239) 0x439e03 CMP $0x6,%R9D |
(239) 0x439e07 JBE 43a2c0 |
(239) 0x439e0d MOVSXD 0x3c(%RSP),%RCX |
(239) 0x439e12 MOV 0x30(%RSP),%RDI |
(239) 0x439e17 LEA (%R12,%RCX,1),%R9 |
(239) 0x439e1b LEA (%RAX,%RCX,1),%RAX |
(239) 0x439e1f LEA (%RBX,%RCX,1),%RBX |
(239) 0x439e23 ADD %RDI,%RCX |
(239) 0x439e26 LEA (%R13,%R9,8),%R12 |
(239) 0x439e2b MOV %EDX,%R9D |
(239) 0x439e2e LEA (%R11,%RAX,8),%R10 |
(239) 0x439e32 LEA (%R15,%RCX,8),%RAX |
(239) 0x439e36 SHR $0x3,%R9D |
(239) 0x439e3a SAL $0x6,%R9 |
(239) 0x439e3e LEA -0x40(%R9),%RDI |
(239) 0x439e42 XOR %ECX,%ECX |
(239) 0x439e44 LEA (%R14,%RBX,8),%RBX |
(239) 0x439e48 SHR $0x6,%RDI |
(239) 0x439e4c INC %RDI |
(239) 0x439e4f AND $0x3,%EDI |
(239) 0x439e52 JE 439f51 |
(239) 0x439e58 CMP $0x1,%RDI |
(239) 0x439e5c JE 439efc |
(239) 0x439e62 CMP $0x2,%RDI |
(239) 0x439e66 JE 439eb0 |
(239) 0x439e68 VDIVPD (%R10),%ZMM4,%ZMM22 |
(239) 0x439e6e VMOVUPD (%R12),%ZMM7 |
(239) 0x439e75 VMULPD (%R10),%ZMM7,%ZMM1 |
(239) 0x439e7b MOV $0x40,%ECX |
(239) 0x439e80 VMULPD %ZMM3,%ZMM1,%ZMM8 |
(239) 0x439e86 VMOVUPD %ZMM8,(%RBX) |
(239) 0x439e8c VMULPD (%R10),%ZMM2,%ZMM0 |
(239) 0x439e92 VMULPD %ZMM22,%ZMM22,%ZMM19 |
(239) 0x439e98 VMULPD %ZMM19,%ZMM0,%ZMM9 |
(239) 0x439e9e VMULPD %ZMM8,%ZMM9,%ZMM12 |
(239) 0x439ea4 VSQRTPD %ZMM12,%ZMM7 |
(239) 0x439eaa VMOVUPD %ZMM7,(%RAX) |
(239) 0x439eb0 VDIVPD (%R10,%RCX,1),%ZMM4,%ZMM20 |
(239) 0x439eb7 VMOVUPD (%R12,%RCX,1),%ZMM1 |
(239) 0x439ebe VMULPD (%R10,%RCX,1),%ZMM1,%ZMM8 |
(239) 0x439ec5 VMULPD %ZMM3,%ZMM8,%ZMM9 |
(239) 0x439ecb VMOVUPD %ZMM9,(%RBX,%RCX,1) |
(239) 0x439ed2 VMULPD (%R10,%RCX,1),%ZMM2,%ZMM0 |
(239) 0x439ed9 VMULPD %ZMM20,%ZMM20,%ZMM21 |
(239) 0x439edf VMULPD %ZMM21,%ZMM0,%ZMM12 |
(239) 0x439ee5 VMULPD %ZMM9,%ZMM12,%ZMM7 |
(239) 0x439eeb VSQRTPD %ZMM7,%ZMM1 |
(239) 0x439ef1 VMOVUPD %ZMM1,(%RAX,%RCX,1) |
(239) 0x439ef8 ADD $0x40,%RCX |
(239) 0x439efc VDIVPD (%R10,%RCX,1),%ZMM4,%ZMM23 |
(239) 0x439f03 VMOVUPD (%R12,%RCX,1),%ZMM8 |
(239) 0x439f0a VMULPD (%R10,%RCX,1),%ZMM8,%ZMM9 |
(239) 0x439f11 VMULPD %ZMM3,%ZMM9,%ZMM12 |
(239) 0x439f17 VMOVUPD %ZMM12,(%RBX,%RCX,1) |
(239) 0x439f1e VMULPD (%R10,%RCX,1),%ZMM2,%ZMM0 |
(239) 0x439f25 VMULPD %ZMM23,%ZMM23,%ZMM24 |
(239) 0x439f2b VMULPD %ZMM24,%ZMM0,%ZMM7 |
(239) 0x439f31 VMULPD %ZMM12,%ZMM7,%ZMM1 |
(239) 0x439f37 VSQRTPD %ZMM1,%ZMM8 |
(239) 0x439f3d VMOVUPD %ZMM8,(%RAX,%RCX,1) |
(239) 0x439f44 ADD $0x40,%RCX |
(239) 0x439f48 CMP %R9,%RCX |
(239) 0x439f4b JE 43a093 |
(240) 0x439f51 VDIVPD (%R10,%RCX,1),%ZMM4,%ZMM25 |
(240) 0x439f58 VMOVUPD (%R12,%RCX,1),%ZMM9 |
(240) 0x439f5f VMULPD (%R10,%RCX,1),%ZMM9,%ZMM12 |
(240) 0x439f66 VMULPD %ZMM3,%ZMM12,%ZMM7 |
(240) 0x439f6c VMOVUPD %ZMM7,(%RBX,%RCX,1) |
(240) 0x439f73 VMULPD (%R10,%RCX,1),%ZMM2,%ZMM0 |
(240) 0x439f7a VMULPD %ZMM25,%ZMM25,%ZMM26 |
(240) 0x439f80 VMULPD %ZMM26,%ZMM0,%ZMM1 |
(240) 0x439f86 VMULPD %ZMM7,%ZMM1,%ZMM8 |
(240) 0x439f8c VSQRTPD %ZMM8,%ZMM9 |
(240) 0x439f92 VMOVUPD %ZMM9,(%RAX,%RCX,1) |
(240) 0x439f99 VDIVPD 0x40(%R10,%RCX,1),%ZMM4,%ZMM27 |
(240) 0x439fa1 VMOVUPD 0x40(%R12,%RCX,1),%ZMM12 |
(240) 0x439fa9 VMULPD 0x40(%R10,%RCX,1),%ZMM12,%ZMM7 |
(240) 0x439fb1 VMULPD %ZMM3,%ZMM7,%ZMM1 |
(240) 0x439fb7 VMOVUPD %ZMM1,0x40(%RBX,%RCX,1) |
(240) 0x439fbf VMULPD 0x40(%R10,%RCX,1),%ZMM2,%ZMM0 |
(240) 0x439fc7 VMULPD %ZMM27,%ZMM27,%ZMM28 |
(240) 0x439fcd VMULPD %ZMM28,%ZMM0,%ZMM8 |
(240) 0x439fd3 VMULPD %ZMM1,%ZMM8,%ZMM9 |
(240) 0x439fd9 VSQRTPD %ZMM9,%ZMM12 |
(240) 0x439fdf VMOVUPD %ZMM12,0x40(%RAX,%RCX,1) |
(240) 0x439fe7 VDIVPD 0x80(%R10,%RCX,1),%ZMM4,%ZMM29 |
(240) 0x439fef VMOVUPD 0x80(%R12,%RCX,1),%ZMM7 |
(240) 0x439ff7 VMULPD 0x80(%R10,%RCX,1),%ZMM7,%ZMM1 |
(240) 0x439fff VMULPD %ZMM3,%ZMM1,%ZMM8 |
(240) 0x43a005 VMOVUPD %ZMM8,0x80(%RBX,%RCX,1) |
(240) 0x43a00d VMULPD 0x80(%R10,%RCX,1),%ZMM2,%ZMM0 |
(240) 0x43a015 VMULPD %ZMM29,%ZMM29,%ZMM30 |
(240) 0x43a01b VMULPD %ZMM30,%ZMM0,%ZMM9 |
(240) 0x43a021 VMULPD %ZMM8,%ZMM9,%ZMM12 |
(240) 0x43a027 VSQRTPD %ZMM12,%ZMM7 |
(240) 0x43a02d VMOVUPD %ZMM7,0x80(%RAX,%RCX,1) |
(240) 0x43a035 VDIVPD 0xc0(%R10,%RCX,1),%ZMM4,%ZMM31 |
(240) 0x43a03d VMOVUPD 0xc0(%R12,%RCX,1),%ZMM1 |
(240) 0x43a045 VMULPD 0xc0(%R10,%RCX,1),%ZMM1,%ZMM8 |
(240) 0x43a04d VMULPD %ZMM3,%ZMM8,%ZMM9 |
(240) 0x43a053 VMOVUPD %ZMM9,0xc0(%RBX,%RCX,1) |
(240) 0x43a05b VMULPD 0xc0(%R10,%RCX,1),%ZMM2,%ZMM0 |
(240) 0x43a063 VMULPD %ZMM31,%ZMM31,%ZMM22 |
(240) 0x43a069 VMULPD %ZMM22,%ZMM0,%ZMM12 |
(240) 0x43a06f VMULPD %ZMM9,%ZMM12,%ZMM7 |
(240) 0x43a075 VSQRTPD %ZMM7,%ZMM1 |
(240) 0x43a07b VMOVUPD %ZMM1,0xc0(%RAX,%RCX,1) |
(240) 0x43a083 ADD $0x100,%RCX |
(240) 0x43a08a CMP %R9,%RCX |
(240) 0x43a08d JNE 439f51 |
(239) 0x43a093 MOV 0x3c(%RSP),%R10D |
(239) 0x43a098 MOV %EDX,%R12D |
(239) 0x43a09b AND $-0x8,%R12D |
(239) 0x43a09f ADD %R12D,%ESI |
(239) 0x43a0a2 LEA (%R12,%R10,1),%EDI |
(239) 0x43a0a6 TEST $0x7,%DL |
(239) 0x43a0a9 JE 43a26c |
(239) 0x43a0af SUB %R12D,%EDX |
(239) 0x43a0b2 LEA -0x1(%RDX),%EBX |
(239) 0x43a0b5 CMP $0x2,%EBX |
(239) 0x43a0b8 JBE 43a143 |
(239) 0x43a0be MOVSXD 0x3c(%RSP),%RAX |
(239) 0x43a0c3 MOV 0x18(%RSP),%R9 |
(239) 0x43a0c8 MOV 0x30(%RSP),%R10 |
(239) 0x43a0cd LEA (%R9,%RAX,1),%RCX |
(239) 0x43a0d1 LEA (%R10,%RAX,1),%R9 |
(239) 0x43a0d5 MOV 0x28(%RSP),%R10 |
(239) 0x43a0da ADD %R12,%RCX |
(239) 0x43a0dd ADD %R12,%R9 |
(239) 0x43a0e0 LEA (%R11,%RCX,8),%RBX |
(239) 0x43a0e4 MOV 0x20(%RSP),%RCX |
(239) 0x43a0e9 VDIVPD (%RBX),%YMM18,%YMM19 |
(239) 0x43a0ef ADD %RAX,%RCX |
(239) 0x43a0f2 ADD %R10,%RAX |
(239) 0x43a0f5 ADD %R12,%RCX |
(239) 0x43a0f8 ADD %R12,%RAX |
(239) 0x43a0fb VMOVUPD (%R13,%RCX,8),%YMM8 |
(239) 0x43a102 VMULPD (%RBX),%YMM8,%YMM9 |
(239) 0x43a106 VMULPD %YMM17,%YMM9,%YMM12 |
(239) 0x43a10c VMOVUPD %YMM12,(%R14,%RAX,8) |
(239) 0x43a112 VMULPD (%RBX),%YMM16,%YMM0 |
(239) 0x43a118 VMULPD %YMM19,%YMM19,%YMM20 |
(239) 0x43a11e VMULPD %YMM20,%YMM0,%YMM7 |
(239) 0x43a124 VMULPD %YMM12,%YMM7,%YMM1 |
(239) 0x43a129 VSQRTPD %YMM1,%YMM8 |
(239) 0x43a12d VMOVUPD %YMM8,(%R15,%R9,8) |
(239) 0x43a133 TEST $0x3,%DL |
(239) 0x43a136 JE 43a26c |
(239) 0x43a13c AND $-0x4,%EDX |
(239) 0x43a13f ADD %EDX,%ESI |
(239) 0x43a141 ADD %EDX,%EDI |
(239) 0x43a143 MOV 0x18(%RSP),%RBX |
(239) 0x43a148 MOV 0x20(%RSP),%R9 |
(239) 0x43a14d MOV 0x28(%RSP),%R10 |
(239) 0x43a152 MOVSXD %EDI,%RAX |
(239) 0x43a155 LEA (%RBX,%RAX,1),%RDX |
(239) 0x43a159 LEA (%R9,%RAX,1),%RCX |
(239) 0x43a15d LEA (%R11,%RDX,8),%R12 |
(239) 0x43a161 LEA (%R10,%RAX,1),%RDX |
(239) 0x43a165 VMOVSD (%R12),%XMM9 |
(239) 0x43a16b VMULSD (%R13,%RCX,8),%XMM9,%XMM12 |
(239) 0x43a172 VMULSD %XMM14,%XMM12,%XMM7 |
(239) 0x43a177 VMOVSD %XMM7,(%R14,%RDX,8) |
(239) 0x43a17d VMULSD (%R12),%XMM13,%XMM0 |
(239) 0x43a183 MOV 0x30(%RSP),%R12 |
(239) 0x43a188 MOV 0x38(%RSP),%ECX |
(239) 0x43a18c LEA 0x1(%RSI),%EDX |
(239) 0x43a18f VDIVSD %XMM9,%XMM15,%XMM21 |
(239) 0x43a195 VMULSD %XMM21,%XMM21,%XMM23 |
(239) 0x43a19b VMULSD %XMM23,%XMM0,%XMM1 |
(239) 0x43a1a1 VMULSD %XMM7,%XMM1,%XMM8 |
(239) 0x43a1a5 VSQRTSD %XMM8,%XMM8,%XMM8 |
(239) 0x43a1aa ADD %R12,%RAX |
(239) 0x43a1ad VMOVSD %XMM8,(%R15,%RAX,8) |
(239) 0x43a1b3 LEA 0x1(%RDI),%EAX |
(239) 0x43a1b6 CMP %ECX,%EDX |
(239) 0x43a1b8 JAE 43a26c |
(239) 0x43a1be CLTQ |
(239) 0x43a1c0 ADD $0x2,%ESI |
(239) 0x43a1c3 ADD $0x2,%EDI |
(239) 0x43a1c6 LEA (%RBX,%RAX,1),%RDX |
(239) 0x43a1ca LEA (%R9,%RAX,1),%RCX |
(239) 0x43a1ce LEA (%R11,%RDX,8),%RDX |
(239) 0x43a1d2 VMOVSD (%RDX),%XMM9 |
(239) 0x43a1d6 VMULSD (%R13,%RCX,8),%XMM9,%XMM12 |
(239) 0x43a1dd LEA (%R10,%RAX,1),%RCX |
(239) 0x43a1e1 VMULSD %XMM14,%XMM12,%XMM7 |
(239) 0x43a1e6 ADD %R12,%RAX |
(239) 0x43a1e9 VMOVSD %XMM7,(%R14,%RCX,8) |
(239) 0x43a1ef VMULSD (%RDX),%XMM13,%XMM0 |
(239) 0x43a1f3 MOV %R12,%RDX |
(239) 0x43a1f6 MOV 0x38(%RSP),%R12D |
(239) 0x43a1fb VDIVSD %XMM9,%XMM15,%XMM24 |
(239) 0x43a201 VMULSD %XMM24,%XMM24,%XMM25 |
(239) 0x43a207 VMULSD %XMM25,%XMM0,%XMM1 |
(239) 0x43a20d VMULSD %XMM7,%XMM1,%XMM8 |
(239) 0x43a211 VSQRTSD %XMM8,%XMM8,%XMM8 |
(239) 0x43a216 VMOVSD %XMM8,(%R15,%RAX,8) |
(239) 0x43a21c CMP %R12D,%ESI |
(239) 0x43a21f JAE 43a26c |
(239) 0x43a221 MOVSXD %EDI,%RSI |
(239) 0x43a224 ADD %RSI,%RBX |
(239) 0x43a227 ADD %RSI,%R9 |
(239) 0x43a22a ADD %RSI,%R10 |
(239) 0x43a22d ADD %RSI,%RDX |
(239) 0x43a230 LEA (%R11,%RBX,8),%RDI |
(239) 0x43a234 VMOVSD (%RDI),%XMM9 |
(239) 0x43a238 VMULSD (%R13,%R9,8),%XMM9,%XMM7 |
(239) 0x43a23f VDIVSD %XMM9,%XMM15,%XMM12 |
(239) 0x43a244 VMULSD %XMM14,%XMM7,%XMM1 |
(239) 0x43a249 VMULSD %XMM12,%XMM12,%XMM8 |
(239) 0x43a24e VMOVSD %XMM1,(%R14,%R10,8) |
(239) 0x43a254 VMULSD (%RDI),%XMM13,%XMM0 |
(239) 0x43a258 VMULSD %XMM8,%XMM0,%XMM9 |
(239) 0x43a25d VMULSD %XMM1,%XMM9,%XMM12 |
(239) 0x43a261 VSQRTSD %XMM12,%XMM12,%XMM12 |
(239) 0x43a266 VMOVSD %XMM12,(%R15,%RDX,8) |
(239) 0x43a26c MOV 0x38(%RSP),%ESI |
(239) 0x43a270 INC %R8 |
(239) 0x43a273 LEA (%R8),%R11D |
(239) 0x43a276 CMP %R11D,0x10(%RSP) |
(239) 0x43a27b JLE 43a2a0 |
(239) 0x43a27d MOV 0x8(%RSP),%EDX |
(239) 0x43a281 MOV 0xc(%RSP),%R13D |
(239) 0x43a286 MOV 0x14(%RSP),%R14D |
(239) 0x43a28b SUB %ESI,%EDX |
(239) 0x43a28d MOV %R13D,0x3c(%RSP) |
(239) 0x43a292 JMP 439d90 |
0x43a297 NOPW (%RAX,%RAX,1) |
0x43a2a0 VZEROUPPER |
0x43a2a3 LEA -0x28(%RBP),%RSP |
0x43a2a7 POP %RBX |
0x43a2a8 POP %R12 |
0x43a2aa POP %R13 |
0x43a2ac POP %R14 |
0x43a2ae POP %R15 |
0x43a2b0 POP %RBP |
0x43a2b1 RET |
0x43a2b2 NOPW %CS:(%RAX,%RAX,1) |
0x43a2bd NOPL (%RAX) |
(239) 0x43a2c0 MOV 0x3c(%RSP),%EDI |
(239) 0x43a2c4 XOR %R12D,%R12D |
(239) 0x43a2c7 JMP 43a0af |
0x43a2cc INC %R9D |
0x43a2cf XOR %EDX,%EDX |
0x43a2d1 JMP 439cff |
0x43a2d6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 86 |
nb uops | 86 |
loop length | 333 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 6 |
micro-operation queue | 14.33 cycles |
front end | 14.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 7.00 | 7.00 | 7.00 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 7.00 | 7.00 | 7.00 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 14.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 14.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 8% |
load | 9% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ESI,0xc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RDX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 43a2cc <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x64c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%RSI,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R8D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R15),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x14(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R15),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R15),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2685b(%RIP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R15),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x26c35(%RIP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x26c35(%RIP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM15,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM13,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM15,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM14,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM13,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %R10D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R12,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 439cff <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x7f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 86 |
nb uops | 86 |
loop length | 333 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 6 |
micro-operation queue | 14.33 cycles |
front end | 14.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 7.00 | 7.00 | 7.00 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 7.00 | 7.00 | 7.00 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 14.33 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 14.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 8% |
load | 9% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %ESI,0xc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %ECX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RDX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %ESI,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 43a2cc <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x64c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R9D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R9,%RSI,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8D,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R8D,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 43a2a3 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x623> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R15),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x14(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R15),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R15),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2685b(%RIP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R15),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x26c35(%RIP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x26c35(%RIP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM15,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM13,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM15,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM14,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM13,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
ADD %R10D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R12,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 439cff <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0+0x7f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0– | 5.41 | 2.72 |
▼Loop 239 - ideal_gas.cpp:39-45 - exec– | 0 | 0 |
○Loop 240 - ideal_gas.cpp:40-45 - exec | 5.41 | 2.72 |