Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.01% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
0x42d8b0 PUSH %RBP |
0x42d8b1 MOV %RSP,%RBP |
0x42d8b4 PUSH %R15 |
0x42d8b6 PUSH %R14 |
0x42d8b8 PUSH %R13 |
0x42d8ba PUSH %R12 |
0x42d8bc PUSH %RBX |
0x42d8bd AND $-0x40,%RSP |
0x42d8c1 SUB $0x100,%RSP |
0x42d8c8 MOV 0x34(%RDI),%EAX |
0x42d8cb MOV 0x28(%RDI),%ESI |
0x42d8ce MOV 0x30(%RDI),%R12D |
0x42d8d2 MOV 0x2c(%RDI),%EDX |
0x42d8d5 ADD $0x4,%EAX |
0x42d8d8 LEA 0x1(%RSI),%ECX |
0x42d8db MOV %EAX,0x68(%RSP) |
0x42d8df MOV %ECX,0x64(%RSP) |
0x42d8e3 CMP %EAX,%R12D |
0x42d8e6 JGE 42e173 |
0x42d8ec MOV %EAX,%R14D |
0x42d8ef LEA 0x3(%RDX),%R15D |
0x42d8f3 SUB %R12D,%R14D |
0x42d8f6 CMP %R15D,%ECX |
0x42d8f9 JGE 42e173 |
0x42d8ff MOV %RDI,%RBX |
0x42d902 MOV %R15D,%EDI |
0x42d905 SUB %ECX,%EDI |
0x42d907 MOV %EDI,0x6c(%RSP) |
0x42d90b CALL 404650 <omp_get_num_threads@plt> |
0x42d910 MOV %EAX,%R13D |
0x42d913 CALL 404540 <omp_get_thread_num@plt> |
0x42d918 XOR %EDX,%EDX |
0x42d91a MOV %EAX,%R8D |
0x42d91d MOV 0x6c(%RSP),%EAX |
0x42d921 IMUL %R14D,%EAX |
0x42d925 DIV %R13D |
0x42d928 MOV %EAX,%ECX |
0x42d92a CMP %EDX,%R8D |
0x42d92d JB 42e196 |
0x42d933 IMUL %ECX,%R8D |
0x42d937 LEA (%R8,%RDX,1),%R10D |
0x42d93b LEA (%RCX,%R10,1),%R9D |
0x42d93f MOV %R9D,0x60(%RSP) |
0x42d944 CMP %R9D,%R10D |
0x42d947 JAE 42e173 |
0x42d94d MOV %R10D,%EAX |
0x42d950 XOR %EDX,%EDX |
0x42d952 MOV 0x64(%RSP),%R11D |
0x42d957 MOV (%RBX),%RSI |
0x42d95a DIVL 0x6c(%RSP) |
0x42d95e MOV 0x10(%RBX),%R14 |
0x42d962 MOV 0x8(%RBX),%RDI |
0x42d966 MOV %R10D,0xfc(%RSP) |
0x42d96e VMOVSD 0x30e8a(%RIP),%XMM3 |
0x42d976 MOV %RSI,0x40(%RSP) |
0x42d97b MOV %R14,0x30(%RSP) |
0x42d980 MOV %RDI,0x28(%RSP) |
0x42d985 MOV %R15D,%R8D |
0x42d988 MOV 0x20(%RBX),%R15 |
0x42d98c MOV 0x18(%RBX),%RBX |
0x42d990 VBROADCASTSD %XMM3,%YMM4 |
0x42d995 VBROADCASTSD %XMM3,%ZMM2 |
0x42d99b MOV %R15,0x38(%RSP) |
0x42d9a0 MOV %RBX,0x20(%RSP) |
0x42d9a5 ADD %R12D,%EAX |
0x42d9a8 ADD %EDX,%R11D |
0x42d9ab MOV %EAX,0xac(%RSP) |
0x42d9b2 CLTQ |
0x42d9b4 SUB %R11D,%R8D |
0x42d9b7 MOV %R11D,0xf8(%RSP) |
0x42d9bf MOV %RAX,0x88(%RSP) |
0x42d9c7 NOPW (%RAX,%RAX,1) |
(192) 0x42d9d0 CMP %R8D,%ECX |
(192) 0x42d9d3 MOV 0xfc(%RSP),%R12D |
(192) 0x42d9db CMOVBE %ECX,%R8D |
(192) 0x42d9df LEA (%R12,%R8,1),%ECX |
(192) 0x42d9e3 MOV %ECX,0xa8(%RSP) |
(192) 0x42d9ea CMP %ECX,%R12D |
(192) 0x42d9ed JAE 42e12d |
(192) 0x42d9f3 MOV 0x40(%RSP),%R10 |
(192) 0x42d9f8 MOV 0x30(%RSP),%R14 |
(192) 0x42d9fd MOV 0xac(%RSP),%EAX |
(192) 0x42da04 MOV 0x38(%RSP),%R9 |
(192) 0x42da09 MOV (%R10),%RCX |
(192) 0x42da0c MOV 0x10(%R14),%RDX |
(192) 0x42da10 MOV 0x10(%R10),%R13 |
(192) 0x42da14 MOV 0x28(%RSP),%R10 |
(192) 0x42da19 DEC %EAX |
(192) 0x42da1b CLTQ |
(192) 0x42da1d MOV (%R9),%R11 |
(192) 0x42da20 MOV 0x10(%R9),%R15 |
(192) 0x42da24 MOV %RDX,0xd8(%RSP) |
(192) 0x42da2c MOV (%R10),%RDX |
(192) 0x42da2f MOV %RAX,%RSI |
(192) 0x42da32 MOV %RAX,%R9 |
(192) 0x42da35 MOV 0x88(%RSP),%R12 |
(192) 0x42da3d MOV (%R14),%RBX |
(192) 0x42da40 IMUL %RCX,%RSI |
(192) 0x42da44 MOV 0x10(%R10),%R14 |
(192) 0x42da48 IMUL %RDX,%RAX |
(192) 0x42da4c IMUL %R12,%RBX |
(192) 0x42da50 MOV %R14,0xb8(%RSP) |
(192) 0x42da58 IMUL %R11,%R9 |
(192) 0x42da5c MOV %RSI,0x90(%RSP) |
(192) 0x42da64 MOV %RAX,0xe0(%RSP) |
(192) 0x42da6c MOV 0x20(%RSP),%RAX |
(192) 0x42da71 IMUL %R12,%RCX |
(192) 0x42da75 IMUL %R12,%R11 |
(192) 0x42da79 MOV %RBX,0xb0(%RSP) |
(192) 0x42da81 LEA -0x1(%R8),%EBX |
(192) 0x42da85 IMUL %R12,%RDX |
(192) 0x42da89 MOV 0x10(%RAX),%RDI |
(192) 0x42da8d MOV %R9,0xc0(%RSP) |
(192) 0x42da95 IMUL (%RAX),%R12 |
(192) 0x42da99 MOV %RCX,0xc8(%RSP) |
(192) 0x42daa1 MOV %R11,0xd0(%RSP) |
(192) 0x42daa9 MOV %RDX,0xf0(%RSP) |
(192) 0x42dab1 MOV %RDI,0xe8(%RSP) |
(192) 0x42dab9 MOV %R12,0xa0(%RSP) |
(192) 0x42dac1 CMP $0x6,%EBX |
(192) 0x42dac4 JBE 42e188 |
(192) 0x42daca MOVSXD 0xf8(%RSP),%RAX |
(192) 0x42dad2 LEA (%RSI,%RAX,1),%R10 |
(192) 0x42dad6 LEA (%R9,%RAX,1),%RSI |
(192) 0x42dada ADD %RAX,%RCX |
(192) 0x42dadd SAL $0x3,%RSI |
(192) 0x42dae1 SAL $0x3,%R10 |
(192) 0x42dae5 LEA (%R11,%RAX,1),%RDI |
(192) 0x42dae9 SAL $0x3,%RCX |
(192) 0x42daed LEA (%R15,%RSI,1),%R9 |
(192) 0x42daf1 LEA (%R13,%R10,1),%RBX |
(192) 0x42daf6 SAL $0x3,%RDI |
(192) 0x42dafa LEA (%R13,%RCX,1),%R14 |
(192) 0x42daff MOV %R9,0x78(%RSP) |
(192) 0x42db04 LEA -0x8(%R13,%R10,1),%R9 |
(192) 0x42db09 LEA -0x8(%R13,%RCX,1),%R10 |
(192) 0x42db0e MOV 0xb0(%RSP),%RCX |
(192) 0x42db16 LEA -0x8(%R15,%RSI,1),%R11 |
(192) 0x42db1b MOV 0xd8(%RSP),%RSI |
(192) 0x42db23 MOV %R14,0x80(%RSP) |
(192) 0x42db2b LEA (%R15,%RDI,1),%R14 |
(192) 0x42db2f LEA (%RCX,%RAX,1),%RDX |
(192) 0x42db33 MOV 0xe0(%RSP),%RCX |
(192) 0x42db3b MOV %R11,0x70(%RSP) |
(192) 0x42db40 LEA -0x8(%R15,%RDI,1),%R11 |
(192) 0x42db45 LEA (%RSI,%RDX,8),%RDI |
(192) 0x42db49 MOV 0xb8(%RSP),%RSI |
(192) 0x42db51 LEA (%RCX,%RAX,1),%RDX |
(192) 0x42db55 LEA (%RSI,%RDX,8),%RCX |
(192) 0x42db59 MOV 0xf0(%RSP),%RDX |
(192) 0x42db61 MOV 0xb8(%RSP),%RSI |
(192) 0x42db69 LEA (%RDX,%RAX,1),%RDX |
(192) 0x42db6d ADD %R12,%RAX |
(192) 0x42db70 LEA (%RSI,%RDX,8),%RSI |
(192) 0x42db74 MOV 0xe8(%RSP),%RDX |
(192) 0x42db7c LEA (%RDX,%RAX,8),%RDX |
(192) 0x42db80 MOV %R8D,%EAX |
(192) 0x42db83 SHR $0x3,%EAX |
(192) 0x42db86 MOV %RAX,%R12 |
(192) 0x42db89 SAL $0x6,%RAX |
(192) 0x42db8d MOV %RAX,0x98(%RSP) |
(192) 0x42db95 XOR %EAX,%EAX |
(192) 0x42db97 AND $0x1,%R12D |
(192) 0x42db9b JE 42dc24 |
(192) 0x42dba1 MOV 0x80(%RSP),%RAX |
(192) 0x42dba9 VMOVUPD (%R10),%ZMM5 |
(192) 0x42dbaf MOV 0x78(%RSP),%R12 |
(192) 0x42dbb4 VMOVUPD (%R9),%ZMM8 |
(192) 0x42dbba VMOVUPD (%RAX),%ZMM6 |
(192) 0x42dbc0 VMULPD (%R11),%ZMM5,%ZMM1 |
(192) 0x42dbc6 VMOVUPD (%R12),%ZMM7 |
(192) 0x42dbcd MOV 0x70(%RSP),%RAX |
(192) 0x42dbd2 VMULPD (%R14),%ZMM6,%ZMM0 |
(192) 0x42dbd8 MOV 0x98(%RSP),%R12 |
(192) 0x42dbe0 VFMADD231PD (%RAX),%ZMM8,%ZMM1 |
(192) 0x42dbe6 MOV $0x40,%EAX |
(192) 0x42dbeb VFMADD231PD (%RBX),%ZMM7,%ZMM0 |
(192) 0x42dbf1 VADDPD %ZMM1,%ZMM0,%ZMM9 |
(192) 0x42dbf7 VMULPD %ZMM2,%ZMM9,%ZMM10 |
(192) 0x42dbfd VMOVUPD %ZMM10,(%RDI) |
(192) 0x42dc03 VMOVUPD (%RSI),%ZMM11 |
(192) 0x42dc09 VSUBPD (%RCX),%ZMM11,%ZMM12 |
(192) 0x42dc0f VADDPD %ZMM10,%ZMM12,%ZMM13 |
(192) 0x42dc15 VMOVUPD %ZMM13,(%RDX) |
(192) 0x42dc1b CMP %R12,%RAX |
(192) 0x42dc1e JE 42dd42 |
(192) 0x42dc24 MOV %R15,0x50(%RSP) |
(192) 0x42dc29 MOV 0x78(%RSP),%R12 |
(192) 0x42dc2e MOV %R8D,0x5c(%RSP) |
(192) 0x42dc33 MOV 0x80(%RSP),%R8 |
(192) 0x42dc3b MOV %R13,0x48(%RSP) |
(192) 0x42dc40 MOV 0x70(%RSP),%R13 |
(193) 0x42dc45 VMOVUPD (%R8,%RAX,1),%ZMM14 |
(193) 0x42dc4c VMOVUPD (%R10,%RAX,1),%ZMM0 |
(193) 0x42dc53 VMOVUPD (%R12,%RAX,1),%ZMM6 |
(193) 0x42dc5a VMOVUPD (%R9,%RAX,1),%ZMM5 |
(193) 0x42dc61 VMULPD (%R14,%RAX,1),%ZMM14,%ZMM15 |
(193) 0x42dc68 MOV 0x98(%RSP),%R15 |
(193) 0x42dc70 VMULPD (%R11,%RAX,1),%ZMM0,%ZMM7 |
(193) 0x42dc77 VFMADD231PD (%RBX,%RAX,1),%ZMM6,%ZMM15 |
(193) 0x42dc7e VFMADD231PD (%R13,%RAX,1),%ZMM5,%ZMM7 |
(193) 0x42dc86 VADDPD %ZMM7,%ZMM15,%ZMM1 |
(193) 0x42dc8c VMULPD %ZMM2,%ZMM1,%ZMM8 |
(193) 0x42dc92 VMOVUPD %ZMM8,(%RDI,%RAX,1) |
(193) 0x42dc99 VMOVUPD (%RSI,%RAX,1),%ZMM9 |
(193) 0x42dca0 VSUBPD (%RCX,%RAX,1),%ZMM9,%ZMM10 |
(193) 0x42dca7 VADDPD %ZMM8,%ZMM10,%ZMM11 |
(193) 0x42dcad VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(193) 0x42dcb4 VMOVUPD 0x40(%R8,%RAX,1),%ZMM12 |
(193) 0x42dcbc VMOVUPD 0x40(%R10,%RAX,1),%ZMM15 |
(193) 0x42dcc4 VMOVUPD 0x40(%R12,%RAX,1),%ZMM14 |
(193) 0x42dccc VMOVUPD 0x40(%R9,%RAX,1),%ZMM0 |
(193) 0x42dcd4 VMULPD 0x40(%R14,%RAX,1),%ZMM12,%ZMM13 |
(193) 0x42dcdc VMULPD 0x40(%R11,%RAX,1),%ZMM15,%ZMM6 |
(193) 0x42dce4 VFMADD231PD 0x40(%RBX,%RAX,1),%ZMM14,%ZMM13 |
(193) 0x42dcec VFMADD231PD 0x40(%R13,%RAX,1),%ZMM0,%ZMM6 |
(193) 0x42dcf4 VADDPD %ZMM6,%ZMM13,%ZMM7 |
(193) 0x42dcfa VMULPD %ZMM2,%ZMM7,%ZMM8 |
(193) 0x42dd00 VMOVUPD %ZMM8,0x40(%RDI,%RAX,1) |
(193) 0x42dd08 VMOVUPD 0x40(%RSI,%RAX,1),%ZMM5 |
(193) 0x42dd10 VSUBPD 0x40(%RCX,%RAX,1),%ZMM5,%ZMM1 |
(193) 0x42dd18 VADDPD %ZMM8,%ZMM1,%ZMM9 |
(193) 0x42dd1e VMOVUPD %ZMM9,0x40(%RDX,%RAX,1) |
(193) 0x42dd26 SUB $-0x80,%RAX |
(193) 0x42dd2a CMP %R15,%RAX |
(193) 0x42dd2d JNE 42dc45 |
(192) 0x42dd33 MOV 0x5c(%RSP),%R8D |
(192) 0x42dd38 MOV 0x50(%RSP),%R15 |
(192) 0x42dd3d MOV 0x48(%RSP),%R13 |
(192) 0x42dd42 MOV 0xf8(%RSP),%EBX |
(192) 0x42dd49 MOV %R8D,%EDX |
(192) 0x42dd4c AND $-0x8,%EDX |
(192) 0x42dd4f ADD %EDX,0xfc(%RSP) |
(192) 0x42dd56 LEA (%RDX,%RBX,1),%ECX |
(192) 0x42dd59 TEST $0x7,%R8B |
(192) 0x42dd5d JE 42e11d |
(192) 0x42dd63 MOV %R8D,%ESI |
(192) 0x42dd66 SUB %EDX,%ESI |
(192) 0x42dd68 LEA -0x1(%RSI),%R14D |
(192) 0x42dd6c CMP $0x2,%R14D |
(192) 0x42dd70 JBE 42de7b |
(192) 0x42dd76 MOVSXD 0xf8(%RSP),%RAX |
(192) 0x42dd7e MOV 0x90(%RSP),%R9 |
(192) 0x42dd86 MOV 0xc8(%RSP),%R10 |
(192) 0x42dd8e MOV 0xd0(%RSP),%R11 |
(192) 0x42dd96 LEA (%R9,%RAX,1),%R8 |
(192) 0x42dd9a MOV 0xc0(%RSP),%RDI |
(192) 0x42dda2 MOV 0xe0(%RSP),%RBX |
(192) 0x42ddaa LEA (%R10,%RAX,1),%R9 |
(192) 0x42ddae LEA (%R11,%RAX,1),%R10 |
(192) 0x42ddb2 ADD %RDX,%R8 |
(192) 0x42ddb5 MOV 0xb0(%RSP),%R12 |
(192) 0x42ddbd ADD %RDX,%R9 |
(192) 0x42ddc0 ADD %RDX,%R10 |
(192) 0x42ddc3 VMOVUPD (%R13,%R8,8),%YMM12 |
(192) 0x42ddca VMOVUPD -0x8(%R13,%R8,8),%YMM15 |
(192) 0x42ddd1 VMOVUPD (%R13,%R9,8),%YMM10 |
(192) 0x42ddd8 VMOVUPD -0x8(%R15,%R10,8),%YMM13 |
(192) 0x42dddf ADD %RAX,%RDI |
(192) 0x42dde2 LEA (%RBX,%RAX,1),%R11 |
(192) 0x42dde6 ADD %RDX,%RDI |
(192) 0x42dde9 MOV 0xf0(%RSP),%RBX |
(192) 0x42ddf1 MOV 0xa0(%RSP),%R14 |
(192) 0x42ddf9 LEA (%R12,%RAX,1),%R12 |
(192) 0x42ddfd VMULPD (%R15,%R10,8),%YMM10,%YMM11 |
(192) 0x42de03 ADD %RDX,%R12 |
(192) 0x42de06 ADD %RDX,%R11 |
(192) 0x42de09 MOV 0xb8(%RSP),%R8 |
(192) 0x42de11 VMULPD -0x8(%R13,%R9,8),%YMM13,%YMM14 |
(192) 0x42de18 ADD %RAX,%RBX |
(192) 0x42de1b ADD %R14,%RAX |
(192) 0x42de1e ADD %RDX,%RBX |
(192) 0x42de21 ADD %RDX,%RAX |
(192) 0x42de24 MOV 0xd8(%RSP),%RDX |
(192) 0x42de2c VFMADD231PD (%R15,%RDI,8),%YMM12,%YMM11 |
(192) 0x42de32 VFMADD231PD -0x8(%R15,%RDI,8),%YMM15,%YMM14 |
(192) 0x42de39 MOV 0xe8(%RSP),%RDI |
(192) 0x42de41 VADDPD %YMM14,%YMM11,%YMM6 |
(192) 0x42de46 VMULPD %YMM4,%YMM6,%YMM0 |
(192) 0x42de4a VMOVUPD %YMM0,(%RDX,%R12,8) |
(192) 0x42de50 VMOVUPD (%R8,%RBX,8),%YMM7 |
(192) 0x42de56 VSUBPD (%R8,%R11,8),%YMM7,%YMM8 |
(192) 0x42de5c VADDPD %YMM0,%YMM8,%YMM5 |
(192) 0x42de60 VMOVUPD %YMM5,(%RDI,%RAX,8) |
(192) 0x42de65 TEST $0x3,%SIL |
(192) 0x42de69 JE 42e11d |
(192) 0x42de6f AND $-0x4,%ESI |
(192) 0x42de72 ADD %ESI,0xfc(%RSP) |
(192) 0x42de79 ADD %ESI,%ECX |
(192) 0x42de7b MOV 0xc0(%RSP),%R10 |
(192) 0x42de83 MOV 0xd0(%RSP),%RBX |
(192) 0x42de8b MOVSXD %ECX,%RAX |
(192) 0x42de8e LEA -0x1(%RCX),%EDX |
(192) 0x42de91 MOVSXD %EDX,%RDX |
(192) 0x42de94 MOV 0xc8(%RSP),%R14 |
(192) 0x42de9c MOV 0x90(%RSP),%R12 |
(192) 0x42dea4 LEA (%R10,%RAX,1),%R9 |
(192) 0x42dea8 ADD %RAX,%RBX |
(192) 0x42deab LEA (%R15,%R9,8),%RDI |
(192) 0x42deaf LEA (%R15,%RBX,8),%R9 |
(192) 0x42deb3 MOV 0xd0(%RSP),%RBX |
(192) 0x42debb LEA (%R14,%RAX,1),%R11 |
(192) 0x42debf VMOVSD (%R9),%XMM1 |
(192) 0x42dec4 VMOVSD (%RDI),%XMM10 |
(192) 0x42dec8 LEA (%R12,%RAX,1),%RSI |
(192) 0x42decc ADD %RDX,%RBX |
(192) 0x42decf LEA (%R13,%R11,8),%R8 |
(192) 0x42ded4 LEA (%RDX,%R10,1),%R11 |
(192) 0x42ded8 VMOVSD (%R15,%RBX,8),%XMM11 |
(192) 0x42dede LEA (%RDX,%R12,1),%R10 |
(192) 0x42dee2 ADD %R14,%RDX |
(192) 0x42dee5 VMOVSD (%R15,%R11,8),%XMM13 |
(192) 0x42deeb VMULSD (%R8),%XMM1,%XMM9 |
(192) 0x42def0 LEA (%R13,%RSI,8),%RSI |
(192) 0x42def5 MOV 0xb0(%RSP),%R14 |
(192) 0x42defd VMULSD (%R13,%RDX,8),%XMM11,%XMM12 |
(192) 0x42df04 MOV 0xd8(%RSP),%R11 |
(192) 0x42df0c LEA (%R14,%RAX,1),%RDX |
(192) 0x42df10 MOV 0xe0(%RSP),%R14 |
(192) 0x42df18 VFMADD231SD (%RSI),%XMM10,%XMM9 |
(192) 0x42df1d VFMADD231SD (%R13,%R10,8),%XMM13,%XMM12 |
(192) 0x42df24 MOV 0xa0(%RSP),%R10 |
(192) 0x42df2c LEA (%R10,%RAX,1),%RBX |
(192) 0x42df30 MOV 0xa8(%RSP),%R10D |
(192) 0x42df38 VADDSD %XMM12,%XMM9,%XMM14 |
(192) 0x42df3d VMULSD %XMM3,%XMM14,%XMM15 |
(192) 0x42df41 VMOVSD %XMM15,(%R11,%RDX,8) |
(192) 0x42df47 MOV 0xf0(%RSP),%RDX |
(192) 0x42df4f MOV 0xfc(%RSP),%R11D |
(192) 0x42df57 ADD %RAX,%RDX |
(192) 0x42df5a ADD %R14,%RAX |
(192) 0x42df5d MOV 0xb8(%RSP),%R14 |
(192) 0x42df65 INC %R11D |
(192) 0x42df68 VMOVSD (%R14,%RDX,8),%XMM6 |
(192) 0x42df6e VSUBSD (%R14,%RAX,8),%XMM6,%XMM0 |
(192) 0x42df74 MOV 0xe8(%RSP),%RAX |
(192) 0x42df7c VADDSD %XMM15,%XMM0,%XMM7 |
(192) 0x42df81 VMOVSD %XMM7,(%RAX,%RBX,8) |
(192) 0x42df86 LEA 0x1(%RCX),%EAX |
(192) 0x42df89 CMP %R10D,%R11D |
(192) 0x42df8c JAE 42e11d |
(192) 0x42df92 MOV 0xc0(%RSP),%RDX |
(192) 0x42df9a CLTQ |
(192) 0x42df9c VMOVSD (%R9),%XMM9 |
(192) 0x42dfa1 ADD $0x2,%ECX |
(192) 0x42dfa4 LEA (%R12,%RAX,1),%RBX |
(192) 0x42dfa8 VMOVSD (%RDI),%XMM11 |
(192) 0x42dfac MOV 0xb0(%RSP),%R9 |
(192) 0x42dfb4 LEA (%RDX,%RAX,1),%R11 |
(192) 0x42dfb8 LEA (%R13,%RBX,8),%R10 |
(192) 0x42dfbd MOV 0xc8(%RSP),%RBX |
(192) 0x42dfc5 VMULSD (%R8),%XMM9,%XMM10 |
(192) 0x42dfca LEA (%R15,%R11,8),%RDX |
(192) 0x42dfce MOV 0xd0(%RSP),%R11 |
(192) 0x42dfd6 MOV 0xd8(%RSP),%RDI |
(192) 0x42dfde ADD %RAX,%RBX |
(192) 0x42dfe1 VMOVSD (%RDX),%XMM1 |
(192) 0x42dfe5 MOV 0xa0(%RSP),%R8 |
(192) 0x42dfed ADD %RAX,%R11 |
(192) 0x42dff0 LEA (%R13,%RBX,8),%RBX |
(192) 0x42dff5 LEA (%R15,%R11,8),%R11 |
(192) 0x42dff9 VMOVSD (%R11),%XMM8 |
(192) 0x42dffe VFMADD231SD (%RSI),%XMM11,%XMM10 |
(192) 0x42e003 LEA (%R9,%RAX,1),%RSI |
(192) 0x42e007 VMULSD (%RBX),%XMM8,%XMM5 |
(192) 0x42e00b VFMADD132SD (%R10),%XMM5,%XMM1 |
(192) 0x42e010 VADDSD %XMM1,%XMM10,%XMM12 |
(192) 0x42e014 VMULSD %XMM3,%XMM12,%XMM13 |
(192) 0x42e018 VMOVSD %XMM13,(%RDI,%RSI,8) |
(192) 0x42e01d MOV 0xf0(%RSP),%RDI |
(192) 0x42e025 LEA (%R8,%RAX,1),%RSI |
(192) 0x42e029 MOV 0xe0(%RSP),%R8 |
(192) 0x42e031 LEA (%RDI,%RAX,1),%RDI |
(192) 0x42e035 ADD %R8,%RAX |
(192) 0x42e038 VMOVSD (%R14,%RDI,8),%XMM14 |
(192) 0x42e03e MOV 0xfc(%RSP),%EDI |
(192) 0x42e045 VSUBSD (%R14,%RAX,8),%XMM14,%XMM15 |
(192) 0x42e04b MOV 0xe8(%RSP),%RAX |
(192) 0x42e053 ADD $0x2,%EDI |
(192) 0x42e056 VADDSD %XMM13,%XMM15,%XMM6 |
(192) 0x42e05b VMOVSD %XMM6,(%RAX,%RSI,8) |
(192) 0x42e060 MOV 0xa8(%RSP),%ESI |
(192) 0x42e067 CMP %ESI,%EDI |
(192) 0x42e069 JAE 42e11d |
(192) 0x42e06f MOV 0xc8(%RSP),%RDI |
(192) 0x42e077 MOVSXD %ECX,%RCX |
(192) 0x42e07a MOV 0xd0(%RSP),%R8 |
(192) 0x42e082 VMOVSD (%RBX),%XMM5 |
(192) 0x42e086 MOV 0xc0(%RSP),%RAX |
(192) 0x42e08e ADD %RCX,%R12 |
(192) 0x42e091 ADD %RCX,%R9 |
(192) 0x42e094 ADD %RCX,%RDI |
(192) 0x42e097 ADD %RCX,%R8 |
(192) 0x42e09a VMOVSD (%R13,%R12,8),%XMM7 |
(192) 0x42e0a1 VMOVSD (%R10),%XMM9 |
(192) 0x42e0a6 VMOVSD (%R13,%RDI,8),%XMM0 |
(192) 0x42e0ad VMULSD (%R11),%XMM5,%XMM1 |
(192) 0x42e0b2 ADD %RCX,%RAX |
(192) 0x42e0b5 MOV 0xd8(%RSP),%R13 |
(192) 0x42e0bd MOV 0xf0(%RSP),%R10 |
(192) 0x42e0c5 VMULSD (%R15,%R8,8),%XMM0,%XMM8 |
(192) 0x42e0cb ADD %RCX,%R10 |
(192) 0x42e0ce VFMADD231SD (%RDX),%XMM9,%XMM1 |
(192) 0x42e0d3 MOV 0xe0(%RSP),%RDX |
(192) 0x42e0db VFMADD231SD (%R15,%RAX,8),%XMM7,%XMM8 |
(192) 0x42e0e1 ADD %RCX,%RDX |
(192) 0x42e0e4 MOV 0xa0(%RSP),%R15 |
(192) 0x42e0ec ADD %RCX,%R15 |
(192) 0x42e0ef VADDSD %XMM1,%XMM8,%XMM10 |
(192) 0x42e0f3 VMULSD %XMM3,%XMM10,%XMM11 |
(192) 0x42e0f7 VMOVSD %XMM11,(%R13,%R9,8) |
(192) 0x42e0fe VMOVSD (%R14,%R10,8),%XMM12 |
(192) 0x42e104 VSUBSD (%R14,%RDX,8),%XMM12,%XMM13 |
(192) 0x42e10a MOV 0xe8(%RSP),%R14 |
(192) 0x42e112 VADDSD %XMM11,%XMM13,%XMM14 |
(192) 0x42e117 VMOVSD %XMM14,(%R14,%R15,8) |
(192) 0x42e11d MOV 0xa8(%RSP),%R11D |
(192) 0x42e125 MOV %R11D,0xfc(%RSP) |
(192) 0x42e12d INCL 0xac(%RSP) |
(192) 0x42e134 INCQ 0x88(%RSP) |
(192) 0x42e13c MOV 0xac(%RSP),%ESI |
(192) 0x42e143 CMP %ESI,0x68(%RSP) |
(192) 0x42e147 JLE 42e170 |
(192) 0x42e149 MOV 0x60(%RSP),%ECX |
(192) 0x42e14d MOV 0xfc(%RSP),%R12D |
(192) 0x42e155 MOV 0x64(%RSP),%EAX |
(192) 0x42e159 MOV 0x6c(%RSP),%R8D |
(192) 0x42e15e SUB %R12D,%ECX |
(192) 0x42e161 MOV %EAX,0xf8(%RSP) |
(192) 0x42e168 JMP 42d9d0 |
0x42e16d NOPL (%RAX) |
0x42e170 VZEROUPPER |
0x42e173 LEA -0x28(%RBP),%RSP |
0x42e177 POP %RBX |
0x42e178 POP %R12 |
0x42e17a POP %R13 |
0x42e17c POP %R14 |
0x42e17e POP %R15 |
0x42e180 POP %RBP |
0x42e181 RET |
0x42e182 NOPW (%RAX,%RAX,1) |
(192) 0x42e188 MOV 0xf8(%RSP),%ECX |
(192) 0x42e18f XOR %EDX,%EDX |
(192) 0x42e191 JMP 42dd63 |
0x42e196 INC %ECX |
0x42e198 XOR %EDX,%EDX |
0x42e19a JMP 42d933 |
0x42e19f NOP |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e196 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x30e8a(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42d933 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e196 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x30e8a(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42d933 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.01 | 2.27 |
▼Loop 192 - advec_mom.cpp:169-172 - exec– | 0 | 0 |
○Loop 193 - advec_mom.cpp:170-172 - exec | 3.01 | 2.26 |