Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.55% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.55% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 71 - 75 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for simd collapse(2) |
72: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
73: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
74: post_vol(i, j) = volume(i, j); |
75: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42db70 PUSH %RBP |
0x42db71 MOV %RSP,%RBP |
0x42db74 PUSH %R15 |
0x42db76 PUSH %R14 |
0x42db78 PUSH %R13 |
0x42db7a PUSH %R12 |
0x42db7c PUSH %RBX |
0x42db7d MOV %RDI,%R13 |
0x42db80 AND $-0x40,%RSP |
0x42db84 SUB $0x40,%RSP |
0x42db88 MOV 0x28(%RDI),%EAX |
0x42db8b MOV 0x2c(%RDI),%EDX |
0x42db8e MOV 0x20(%RDI),%EDI |
0x42db91 MOV 0x24(%R13),%ECX |
0x42db95 ADD $0x4,%EDX |
0x42db98 DEC %EDI |
0x42db9a LEA -0x1(%RAX),%R14D |
0x42db9e MOV %EDX,0x18(%RSP) |
0x42dba2 MOV %EDI,0x14(%RSP) |
0x42dba6 CMP %EDX,%R14D |
0x42dba9 JGE 42e173 |
0x42dbaf MOV %EDX,%R12D |
0x42dbb2 LEA 0x4(%RCX),%R15D |
0x42dbb6 SUB %R14D,%R12D |
0x42dbb9 CMP %R15D,%EDI |
0x42dbbc JGE 42e173 |
0x42dbc2 MOV %R15D,%EBX |
0x42dbc5 SUB %EDI,%EBX |
0x42dbc7 MOV %EBX,0x1c(%RSP) |
0x42dbcb CALL 404650 <omp_get_num_threads@plt> |
0x42dbd0 MOV %EAX,%EBX |
0x42dbd2 CALL 404540 <omp_get_thread_num@plt> |
0x42dbd7 XOR %EDX,%EDX |
0x42dbd9 MOV %EAX,%R8D |
0x42dbdc MOV 0x1c(%RSP),%EAX |
0x42dbe0 IMUL %R12D,%EAX |
0x42dbe4 DIV %EBX |
0x42dbe6 MOV %EAX,%ESI |
0x42dbe8 CMP %EDX,%R8D |
0x42dbeb JB 42e19c |
0x42dbf1 IMUL %ESI,%R8D |
0x42dbf5 LEA (%R8,%RDX,1),%EDI |
0x42dbf9 LEA (%RSI,%RDI,1),%R9D |
0x42dbfd MOV %R9D,0x10(%RSP) |
0x42dc02 CMP %R9D,%EDI |
0x42dc05 JAE 42e173 |
0x42dc0b MOV %EDI,%EAX |
0x42dc0d XOR %EDX,%EDX |
0x42dc0f MOV 0x14(%RSP),%R10D |
0x42dc14 VMOVQ 0x8(%R13),%XMM4 |
0x42dc1a DIVL 0x1c(%RSP) |
0x42dc1e VMOVQ 0x18(%R13),%XMM5 |
0x42dc24 VMOVQ (%R13),%XMM3 |
0x42dc2a VMOVQ 0x10(%R13),%XMM2 |
0x42dc30 ADD %EDX,%R10D |
0x42dc33 LEA (%RAX,%R14,1),%R11D |
0x42dc37 SUB %R10D,%R15D |
0x42dc3a MOVSXD %R11D,%R8 |
0x42dc3d MOV %R10D,0x3c(%RSP) |
0x42dc42 NOPW %CS:(%RAX,%RAX,1) |
0x42dc4d NOPL (%RAX) |
(183) 0x42dc50 CMP %R15D,%ESI |
(183) 0x42dc53 MOV %R15D,%ECX |
(183) 0x42dc56 CMOVBE %ESI,%ECX |
(183) 0x42dc59 LEA (%RDI,%RCX,1),%ESI |
(183) 0x42dc5c MOV %ESI,0x38(%RSP) |
(183) 0x42dc60 CMP %ESI,%EDI |
(183) 0x42dc62 JAE 42e13b |
(183) 0x42dc68 VMOVQ %XMM5,%R13 |
(183) 0x42dc6d VMOVQ %XMM4,%R15 |
(183) 0x42dc72 LEA -0x1(%RCX),%R10D |
(183) 0x42dc76 VMOVQ %XMM2,%RAX |
(183) 0x42dc7b MOV (%R13),%R9 |
(183) 0x42dc7f MOV (%R15),%R12 |
(183) 0x42dc82 VMOVQ %XMM3,%RBX |
(183) 0x42dc87 MOV (%RAX),%RDX |
(183) 0x42dc8a MOV (%RBX),%R11 |
(183) 0x42dc8d VMOVQ 0x10(%R13),%XMM7 |
(183) 0x42dc93 VMOVQ 0x10(%R15),%XMM8 |
(183) 0x42dc99 MOV 0x10(%RBX),%R14 |
(183) 0x42dc9d MOV 0x10(%RAX),%R13 |
(183) 0x42dca1 IMUL %R8,%R12 |
(183) 0x42dca5 IMUL %R8,%R9 |
(183) 0x42dca9 IMUL %R8,%RDX |
(183) 0x42dcad IMUL %R8,%R11 |
(183) 0x42dcb1 MOV %R12,0x20(%RSP) |
(183) 0x42dcb6 MOV %R9,0x28(%RSP) |
(183) 0x42dcbb MOV %RDX,0x30(%RSP) |
(183) 0x42dcc0 CMP $0x6,%R10D |
(183) 0x42dcc4 JBE 42e190 |
(183) 0x42dcca MOVSXD 0x3c(%RSP),%RAX |
(183) 0x42dccf VMOVQ %XMM8,%RBX |
(183) 0x42dcd4 VMOVQ %XMM7,%R10 |
(183) 0x42dcd9 LEA 0x1(%R11,%RAX,1),%R15 |
(183) 0x42dcde LEA (%R9,%RAX,1),%R9 |
(183) 0x42dce2 LEA (%R12,%RAX,1),%R12 |
(183) 0x42dce6 ADD %RDX,%RAX |
(183) 0x42dce9 SAL $0x3,%R15 |
(183) 0x42dced LEA (%RBX,%R12,8),%R12 |
(183) 0x42dcf1 LEA (%R10,%R9,8),%RBX |
(183) 0x42dcf5 LEA (%R14,%R15,1),%R10 |
(183) 0x42dcf9 LEA -0x8(%R14,%R15,1),%R9 |
(183) 0x42dcfe MOV %ECX,%R15D |
(183) 0x42dd01 LEA (%R13,%RAX,8),%RDX |
(183) 0x42dd06 XOR %EAX,%EAX |
(183) 0x42dd08 SHR $0x3,%R15D |
(183) 0x42dd0c SAL $0x6,%R15 |
(183) 0x42dd10 LEA -0x40(%R15),%RSI |
(183) 0x42dd14 SHR $0x6,%RSI |
(183) 0x42dd18 INC %RSI |
(183) 0x42dd1b AND $0x7,%ESI |
(183) 0x42dd1e JE 42de6b |
(183) 0x42dd24 CMP $0x1,%RSI |
(183) 0x42dd28 JE 42de3b |
(183) 0x42dd2e CMP $0x2,%RSI |
(183) 0x42dd32 JE 42de14 |
(183) 0x42dd38 CMP $0x3,%RSI |
(183) 0x42dd3c JE 42dded |
(183) 0x42dd42 CMP $0x4,%RSI |
(183) 0x42dd46 JE 42ddc6 |
(183) 0x42dd48 CMP $0x5,%RSI |
(183) 0x42dd4c JE 42dd9f |
(183) 0x42dd4e CMP $0x6,%RSI |
(183) 0x42dd52 JE 42dd78 |
(183) 0x42dd54 VMOVUPD (%R12),%ZMM6 |
(183) 0x42dd5b MOV $0x40,%EAX |
(183) 0x42dd60 VMOVUPD %ZMM6,(%RBX) |
(183) 0x42dd66 VADDPD (%R10),%ZMM6,%ZMM0 |
(183) 0x42dd6c VSUBPD (%R9),%ZMM0,%ZMM1 |
(183) 0x42dd72 VMOVUPD %ZMM1,(%RDX) |
(183) 0x42dd78 VMOVUPD (%R12,%RAX,1),%ZMM9 |
(183) 0x42dd7f VMOVUPD %ZMM9,(%RBX,%RAX,1) |
(183) 0x42dd86 VADDPD (%R10,%RAX,1),%ZMM9,%ZMM10 |
(183) 0x42dd8d VSUBPD (%R9,%RAX,1),%ZMM10,%ZMM11 |
(183) 0x42dd94 VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(183) 0x42dd9b ADD $0x40,%RAX |
(183) 0x42dd9f VMOVUPD (%R12,%RAX,1),%ZMM12 |
(183) 0x42dda6 VMOVUPD %ZMM12,(%RBX,%RAX,1) |
(183) 0x42ddad VADDPD (%R10,%RAX,1),%ZMM12,%ZMM13 |
(183) 0x42ddb4 VSUBPD (%R9,%RAX,1),%ZMM13,%ZMM14 |
(183) 0x42ddbb VMOVUPD %ZMM14,(%RDX,%RAX,1) |
(183) 0x42ddc2 ADD $0x40,%RAX |
(183) 0x42ddc6 VMOVUPD (%R12,%RAX,1),%ZMM15 |
(183) 0x42ddcd VMOVUPD %ZMM15,(%RBX,%RAX,1) |
(183) 0x42ddd4 VADDPD (%R10,%RAX,1),%ZMM15,%ZMM6 |
(183) 0x42dddb VSUBPD (%R9,%RAX,1),%ZMM6,%ZMM0 |
(183) 0x42dde2 VMOVUPD %ZMM0,(%RDX,%RAX,1) |
(183) 0x42dde9 ADD $0x40,%RAX |
(183) 0x42dded VMOVUPD (%R12,%RAX,1),%ZMM1 |
(183) 0x42ddf4 VMOVUPD %ZMM1,(%RBX,%RAX,1) |
(183) 0x42ddfb VADDPD (%R10,%RAX,1),%ZMM1,%ZMM9 |
(183) 0x42de02 VSUBPD (%R9,%RAX,1),%ZMM9,%ZMM10 |
(183) 0x42de09 VMOVUPD %ZMM10,(%RDX,%RAX,1) |
(183) 0x42de10 ADD $0x40,%RAX |
(183) 0x42de14 VMOVUPD (%R12,%RAX,1),%ZMM11 |
(183) 0x42de1b VMOVUPD %ZMM11,(%RBX,%RAX,1) |
(183) 0x42de22 VADDPD (%R10,%RAX,1),%ZMM11,%ZMM12 |
(183) 0x42de29 VSUBPD (%R9,%RAX,1),%ZMM12,%ZMM13 |
(183) 0x42de30 VMOVUPD %ZMM13,(%RDX,%RAX,1) |
(183) 0x42de37 ADD $0x40,%RAX |
(183) 0x42de3b VMOVUPD (%R12,%RAX,1),%ZMM14 |
(183) 0x42de42 VMOVUPD %ZMM14,(%RBX,%RAX,1) |
(183) 0x42de49 VADDPD (%R10,%RAX,1),%ZMM14,%ZMM15 |
(183) 0x42de50 VSUBPD (%R9,%RAX,1),%ZMM15,%ZMM6 |
(183) 0x42de57 VMOVUPD %ZMM6,(%RDX,%RAX,1) |
(183) 0x42de5e ADD $0x40,%RAX |
(183) 0x42de62 CMP %RAX,%R15 |
(183) 0x42de65 JE 42dfb5 |
(184) 0x42de6b VMOVUPD (%R12,%RAX,1),%ZMM0 |
(184) 0x42de72 VMOVUPD %ZMM0,(%RBX,%RAX,1) |
(184) 0x42de79 VADDPD (%R10,%RAX,1),%ZMM0,%ZMM1 |
(184) 0x42de80 VSUBPD (%R9,%RAX,1),%ZMM1,%ZMM9 |
(184) 0x42de87 VMOVUPD %ZMM9,(%RDX,%RAX,1) |
(184) 0x42de8e VMOVUPD 0x40(%R12,%RAX,1),%ZMM10 |
(184) 0x42de96 VMOVUPD %ZMM10,0x40(%RBX,%RAX,1) |
(184) 0x42de9e VADDPD 0x40(%R10,%RAX,1),%ZMM10,%ZMM11 |
(184) 0x42dea6 VSUBPD 0x40(%R9,%RAX,1),%ZMM11,%ZMM12 |
(184) 0x42deae VMOVUPD %ZMM12,0x40(%RDX,%RAX,1) |
(184) 0x42deb6 VMOVUPD 0x80(%R12,%RAX,1),%ZMM13 |
(184) 0x42debe VMOVUPD %ZMM13,0x80(%RBX,%RAX,1) |
(184) 0x42dec6 VADDPD 0x80(%R10,%RAX,1),%ZMM13,%ZMM14 |
(184) 0x42dece VSUBPD 0x80(%R9,%RAX,1),%ZMM14,%ZMM15 |
(184) 0x42ded6 VMOVUPD %ZMM15,0x80(%RDX,%RAX,1) |
(184) 0x42dede VMOVUPD 0xc0(%R12,%RAX,1),%ZMM6 |
(184) 0x42dee6 VMOVUPD %ZMM6,0xc0(%RBX,%RAX,1) |
(184) 0x42deee VADDPD 0xc0(%R10,%RAX,1),%ZMM6,%ZMM0 |
(184) 0x42def6 VSUBPD 0xc0(%R9,%RAX,1),%ZMM0,%ZMM1 |
(184) 0x42defe VMOVUPD %ZMM1,0xc0(%RDX,%RAX,1) |
(184) 0x42df06 VMOVUPD 0x100(%R12,%RAX,1),%ZMM9 |
(184) 0x42df0e VMOVUPD %ZMM9,0x100(%RBX,%RAX,1) |
(184) 0x42df16 VADDPD 0x100(%R10,%RAX,1),%ZMM9,%ZMM10 |
(184) 0x42df1e VSUBPD 0x100(%R9,%RAX,1),%ZMM10,%ZMM11 |
(184) 0x42df26 VMOVUPD %ZMM11,0x100(%RDX,%RAX,1) |
(184) 0x42df2e VMOVUPD 0x140(%R12,%RAX,1),%ZMM12 |
(184) 0x42df36 VMOVUPD %ZMM12,0x140(%RBX,%RAX,1) |
(184) 0x42df3e VADDPD 0x140(%R10,%RAX,1),%ZMM12,%ZMM13 |
(184) 0x42df46 VSUBPD 0x140(%R9,%RAX,1),%ZMM13,%ZMM14 |
(184) 0x42df4e VMOVUPD %ZMM14,0x140(%RDX,%RAX,1) |
(184) 0x42df56 VMOVUPD 0x180(%R12,%RAX,1),%ZMM15 |
(184) 0x42df5e VMOVUPD %ZMM15,0x180(%RBX,%RAX,1) |
(184) 0x42df66 VADDPD 0x180(%R10,%RAX,1),%ZMM15,%ZMM6 |
(184) 0x42df6e VSUBPD 0x180(%R9,%RAX,1),%ZMM6,%ZMM0 |
(184) 0x42df76 VMOVUPD %ZMM0,0x180(%RDX,%RAX,1) |
(184) 0x42df7e VMOVUPD 0x1c0(%R12,%RAX,1),%ZMM1 |
(184) 0x42df86 VMOVUPD %ZMM1,0x1c0(%RBX,%RAX,1) |
(184) 0x42df8e VADDPD 0x1c0(%R10,%RAX,1),%ZMM1,%ZMM9 |
(184) 0x42df96 VSUBPD 0x1c0(%R9,%RAX,1),%ZMM9,%ZMM10 |
(184) 0x42df9e VMOVUPD %ZMM10,0x1c0(%RDX,%RAX,1) |
(184) 0x42dfa6 ADD $0x200,%RAX |
(184) 0x42dfac CMP %RAX,%R15 |
(184) 0x42dfaf JNE 42de6b |
(183) 0x42dfb5 MOV 0x3c(%RSP),%R12D |
(183) 0x42dfba MOV %ECX,%ESI |
(183) 0x42dfbc AND $-0x8,%ESI |
(183) 0x42dfbf ADD %ESI,%EDI |
(183) 0x42dfc1 LEA (%RSI,%R12,1),%R15D |
(183) 0x42dfc5 TEST $0x7,%CL |
(183) 0x42dfc8 JE 42e137 |
(183) 0x42dfce SUB %ESI,%ECX |
(183) 0x42dfd0 LEA -0x1(%RCX),%EBX |
(183) 0x42dfd3 CMP $0x2,%EBX |
(183) 0x42dfd6 JBE 42e049 |
(183) 0x42dfd8 MOVSXD 0x3c(%RSP),%R10 |
(183) 0x42dfdd MOV 0x20(%RSP),%RDX |
(183) 0x42dfe2 VMOVQ %XMM8,%RBX |
(183) 0x42dfe7 LEA (%R11,%R10,1),%R9 |
(183) 0x42dfeb LEA (%RDX,%R10,1),%R12 |
(183) 0x42dfef LEA 0x1(%RSI,%R9,1),%RAX |
(183) 0x42dff4 ADD %RSI,%R12 |
(183) 0x42dff7 MOV 0x28(%RSP),%R9 |
(183) 0x42dffc VMOVUPD (%RBX,%R12,8),%YMM11 |
(183) 0x42e002 VMOVQ %XMM7,%R12 |
(183) 0x42e007 LEA (%R9,%R10,1),%RDX |
(183) 0x42e00b ADD %RSI,%RDX |
(183) 0x42e00e VMOVUPD %YMM11,(%R12,%RDX,8) |
(183) 0x42e014 VMOVUPD (%R14,%RAX,8),%YMM12 |
(183) 0x42e01a VSUBPD -0x8(%R14,%RAX,8),%YMM12,%YMM13 |
(183) 0x42e021 MOV 0x30(%RSP),%RAX |
(183) 0x42e026 ADD %RAX,%R10 |
(183) 0x42e029 ADD %RSI,%R10 |
(183) 0x42e02c VADDPD %YMM11,%YMM13,%YMM14 |
(183) 0x42e031 VMOVUPD %YMM14,(%R13,%R10,8) |
(183) 0x42e038 TEST $0x3,%CL |
(183) 0x42e03b JE 42e137 |
(183) 0x42e041 AND $-0x4,%ECX |
(183) 0x42e044 ADD %ECX,%EDI |
(183) 0x42e046 ADD %ECX,%R15D |
(183) 0x42e049 MOV 0x20(%RSP),%RBX |
(183) 0x42e04e MOV 0x28(%RSP),%R9 |
(183) 0x42e053 MOVSXD %R15D,%RDX |
(183) 0x42e056 VMOVQ %XMM8,%RCX |
(183) 0x42e05b VMOVQ %XMM7,%R12 |
(183) 0x42e060 LEA 0x1(%R15),%EAX |
(183) 0x42e064 CLTQ |
(183) 0x42e066 LEA (%RBX,%RDX,1),%RSI |
(183) 0x42e06a LEA (%R9,%RDX,1),%R10 |
(183) 0x42e06e VMOVSD (%RCX,%RSI,8),%XMM15 |
(183) 0x42e073 VMOVSD %XMM15,(%R12,%R10,8) |
(183) 0x42e079 MOV 0x30(%RSP),%R10 |
(183) 0x42e07e LEA (%R11,%RAX,1),%RSI |
(183) 0x42e082 LEA (%R14,%RSI,8),%RCX |
(183) 0x42e086 MOV 0x38(%RSP),%ESI |
(183) 0x42e08a VMOVSD (%RCX),%XMM6 |
(183) 0x42e08e LEA (%R10,%RDX,1),%R12 |
(183) 0x42e092 ADD %R11,%RDX |
(183) 0x42e095 VSUBSD (%R14,%RDX,8),%XMM6,%XMM0 |
(183) 0x42e09b LEA 0x1(%RDI),%EDX |
(183) 0x42e09e VADDSD %XMM15,%XMM0,%XMM1 |
(183) 0x42e0a3 VMOVSD %XMM1,(%R13,%R12,8) |
(183) 0x42e0aa CMP %ESI,%EDX |
(183) 0x42e0ac JAE 42e137 |
(183) 0x42e0b2 LEA (%RAX,%RBX,1),%RDX |
(183) 0x42e0b6 VMOVQ %XMM8,%R12 |
(183) 0x42e0bb LEA (%RAX,%R9,1),%RSI |
(183) 0x42e0bf ADD %R10,%RAX |
(183) 0x42e0c2 VMOVSD (%R12,%RDX,8),%XMM9 |
(183) 0x42e0c8 VMOVQ %XMM7,%RDX |
(183) 0x42e0cd LEA 0x2(%R15),%R12D |
(183) 0x42e0d1 ADD $0x2,%EDI |
(183) 0x42e0d4 VMOVSD %XMM9,(%RDX,%RSI,8) |
(183) 0x42e0d9 MOVSXD %R12D,%RDX |
(183) 0x42e0dc MOV %R10,%R12 |
(183) 0x42e0df LEA (%R11,%RDX,1),%RSI |
(183) 0x42e0e3 LEA (%R14,%RSI,8),%RSI |
(183) 0x42e0e7 VADDSD (%RSI),%XMM9,%XMM10 |
(183) 0x42e0eb VSUBSD (%RCX),%XMM10,%XMM11 |
(183) 0x42e0ef VMOVSD %XMM11,(%R13,%RAX,8) |
(183) 0x42e0f6 MOV 0x38(%RSP),%EAX |
(183) 0x42e0fa CMP %EAX,%EDI |
(183) 0x42e0fc JAE 42e137 |
(183) 0x42e0fe ADD $0x3,%R15D |
(183) 0x42e102 VMOVQ %XMM7,%RCX |
(183) 0x42e107 ADD %RDX,%RBX |
(183) 0x42e10a VMOVQ %XMM8,%RDI |
(183) 0x42e10f MOVSXD %R15D,%R15 |
(183) 0x42e112 ADD %RDX,%R9 |
(183) 0x42e115 ADD %RDX,%R12 |
(183) 0x42e118 VMOVSD (%RDI,%RBX,8),%XMM8 |
(183) 0x42e11d ADD %R11,%R15 |
(183) 0x42e120 VMOVSD %XMM8,(%RCX,%R9,8) |
(183) 0x42e126 VADDSD (%R14,%R15,8),%XMM8,%XMM7 |
(183) 0x42e12c VSUBSD (%RSI),%XMM7,%XMM12 |
(183) 0x42e130 VMOVSD %XMM12,(%R13,%R12,8) |
(183) 0x42e137 MOV 0x38(%RSP),%EDI |
(183) 0x42e13b INC %R8 |
(183) 0x42e13e LEA (%R8),%R14D |
(183) 0x42e141 CMP %R14D,0x18(%RSP) |
(183) 0x42e146 JLE 42e170 |
(183) 0x42e148 MOV 0x10(%RSP),%ESI |
(183) 0x42e14c MOV 0x14(%RSP),%R11D |
(183) 0x42e151 MOV 0x1c(%RSP),%R15D |
(183) 0x42e156 SUB %EDI,%ESI |
(183) 0x42e158 MOV %R11D,0x3c(%RSP) |
(183) 0x42e15d JMP 42dc50 |
0x42e162 NOPW %CS:(%RAX,%RAX,1) |
0x42e16d NOPL (%RAX) |
0x42e170 VZEROUPPER |
0x42e173 LEA -0x28(%RBP),%RSP |
0x42e177 POP %RBX |
0x42e178 POP %R12 |
0x42e17a POP %R13 |
0x42e17c POP %R14 |
0x42e17e POP %R15 |
0x42e180 POP %RBP |
0x42e181 RET |
0x42e182 NOPW %CS:(%RAX,%RAX,1) |
0x42e18d NOPL (%RAX) |
(183) 0x42e190 MOV 0x3c(%RSP),%R15D |
(183) 0x42e195 XOR %ESI,%ESI |
(183) 0x42e197 JMP 42dfce |
0x42e19c INC %ESI |
0x42e19e XOR %EDX,%EDX |
0x42e1a0 JMP 42dbf1 |
0x42e1a5 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 78 |
nb uops | 76 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EBX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42e19c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x62c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ESI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RDI,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x14(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x1c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x18(%R13),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R14,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R10D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42dbf1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 78 |
nb uops | 76 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EBX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42e19c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x62c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ESI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RDI,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42e173 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x603> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x14(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x1c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x18(%R13),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R14,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R10D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42dbf1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3– | 2.55 | 1.29 |
▼Loop 183 - advec_mom.cpp:73-75 - exec– | 0 | 0.01 |
○Loop 184 - advec_mom.cpp:74-75 - exec | 2.55 | 1.28 |