Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 0.62% |
---|
Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 0.62% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
0x40f0c0 PUSH %RBP |
0x40f0c1 MOV %RSP,%RBP |
0x40f0c4 PUSH %R14 |
0x40f0c6 PUSH %R13 |
0x40f0c8 PUSH %R12 |
0x40f0ca MOV %RDI,%R12 |
0x40f0cd PUSH %RBX |
0x40f0ce CALL 403060 <omp_get_num_threads@plt> |
0x40f0d3 MOV %EAX,%EBX |
0x40f0d5 CALL 403160 <omp_get_thread_num@plt> |
0x40f0da MOV %EAX,%R9D |
0x40f0dd MOV 0x10(%R12),%EAX |
0x40f0e2 CLTD |
0x40f0e3 IDIV %EBX |
0x40f0e5 CMP %EDX,%R9D |
0x40f0e8 JL 40f3ac |
0x40f0ee IMUL %EAX,%R9D |
0x40f0f2 ADD %EDX,%R9D |
0x40f0f5 LEA (%RAX,%R9,1),%EBX |
0x40f0f9 CMP %EBX,%R9D |
0x40f0fc JGE 40f3a3 |
0x40f102 VMOVSD 0x8(%R12),%XMM0 |
0x40f109 MOV (%R12),%R12 |
0x40f10d MOVSXD %R9D,%R10 |
0x40f110 SAL $0x6,%R9D |
0x40f114 LEA (%R10,%R10,2),%RAX |
0x40f118 VMOVSD 0x2618(%RIP),%XMM2 |
0x40f120 MOV 0x18(%R12),%RCX |
0x40f125 SAL $0x9,%RAX |
0x40f129 MOV 0x78(%RCX),%R13 |
0x40f12d NOPL (%RAX) |
(94) 0x40f130 MOVSXD (%R13,%R10,4),%R8 |
(94) 0x40f135 TEST %R8D,%R8D |
(94) 0x40f138 JLE 40f38d |
(94) 0x40f13e MOV 0x20(%R12),%R11 |
(94) 0x40f143 MOVSXD %R9D,%RSI |
(94) 0x40f146 MOV 0x28(%R12),%RDI |
(94) 0x40f14b MOV 0x10(%R11),%R14 |
(94) 0x40f14f MOV 0x18(%R11),%RDX |
(94) 0x40f153 MOV 0x20(%R11),%RCX |
(94) 0x40f157 MOV %R10,%R11 |
(94) 0x40f15a SAL $0x6,%R11 |
(94) 0x40f15e LEA (%R14,%RSI,4),%RSI |
(94) 0x40f162 ADD %RAX,%RDX |
(94) 0x40f165 ADD %R8,%R11 |
(94) 0x40f168 ADD %RAX,%RCX |
(94) 0x40f16b LEA (%R14,%R11,4),%R11 |
(94) 0x40f16f MOV %R11,%R8 |
(94) 0x40f172 SUB %RSI,%R8 |
(94) 0x40f175 SUB $0x4,%R8 |
(94) 0x40f179 SHR $0x2,%R8 |
(94) 0x40f17d INC %R8 |
(94) 0x40f180 AND $0x3,%R8D |
(94) 0x40f184 JE 40f27e |
(94) 0x40f18a CMP $0x1,%R8 |
(94) 0x40f18e JE 40f22c |
(94) 0x40f194 CMP $0x2,%R8 |
(94) 0x40f198 JE 40f1e3 |
(94) 0x40f19a MOVSXD (%RSI),%R14 |
(94) 0x40f19d VMULSD (%RCX),%XMM0,%XMM3 |
(94) 0x40f1a1 ADD $0x4,%RSI |
(94) 0x40f1a5 ADD $0x18,%RDX |
(94) 0x40f1a9 ADD $0x18,%RCX |
(94) 0x40f1ad SAL $0x4,%R14 |
(94) 0x40f1b1 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 |
(94) 0x40f1b8 VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 |
(94) 0x40f1be VMOVSD %XMM3,-0x18(%RDX) |
(94) 0x40f1c3 VMULSD -0x10(%RCX),%XMM0,%XMM4 |
(94) 0x40f1c8 VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 |
(94) 0x40f1ce VMOVSD %XMM4,-0x10(%RDX) |
(94) 0x40f1d3 VMULSD -0x8(%RCX),%XMM0,%XMM5 |
(94) 0x40f1d8 VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 |
(94) 0x40f1de VMOVSD %XMM1,-0x8(%RDX) |
(94) 0x40f1e3 MOVSXD (%RSI),%R8 |
(94) 0x40f1e6 VMULSD (%RCX),%XMM0,%XMM7 |
(94) 0x40f1ea ADD $0x4,%RSI |
(94) 0x40f1ee ADD $0x18,%RDX |
(94) 0x40f1f2 ADD $0x18,%RCX |
(94) 0x40f1f6 SAL $0x4,%R8 |
(94) 0x40f1fa VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 |
(94) 0x40f201 VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 |
(94) 0x40f207 VMOVSD %XMM7,-0x18(%RDX) |
(94) 0x40f20c VMULSD -0x10(%RCX),%XMM0,%XMM8 |
(94) 0x40f211 VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 |
(94) 0x40f217 VMOVSD %XMM8,-0x10(%RDX) |
(94) 0x40f21c VMULSD -0x8(%RCX),%XMM0,%XMM9 |
(94) 0x40f221 VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 |
(94) 0x40f227 VMOVSD %XMM6,-0x8(%RDX) |
(94) 0x40f22c MOVSXD (%RSI),%R14 |
(94) 0x40f22f VMULSD (%RCX),%XMM0,%XMM11 |
(94) 0x40f233 ADD $0x4,%RSI |
(94) 0x40f237 ADD $0x18,%RDX |
(94) 0x40f23b ADD $0x18,%RCX |
(94) 0x40f23f SAL $0x4,%R14 |
(94) 0x40f243 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 |
(94) 0x40f24a VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 |
(94) 0x40f250 VMOVSD %XMM11,-0x18(%RDX) |
(94) 0x40f255 VMULSD -0x10(%RCX),%XMM0,%XMM12 |
(94) 0x40f25a VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 |
(94) 0x40f260 VMOVSD %XMM12,-0x10(%RDX) |
(94) 0x40f265 VMULSD -0x8(%RCX),%XMM0,%XMM13 |
(94) 0x40f26a VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 |
(94) 0x40f270 VMOVSD %XMM10,-0x8(%RDX) |
(94) 0x40f275 CMP %RSI,%R11 |
(94) 0x40f278 JE 40f38d |
(95) 0x40f27e MOVSXD (%RSI),%R8 |
(95) 0x40f281 VMULSD (%RCX),%XMM0,%XMM15 |
(95) 0x40f285 ADD $0x10,%RSI |
(95) 0x40f289 ADD $0x60,%RDX |
(95) 0x40f28d MOVSXD -0xc(%RSI),%R14 |
(95) 0x40f291 ADD $0x60,%RCX |
(95) 0x40f295 SAL $0x4,%R8 |
(95) 0x40f299 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM14 |
(95) 0x40f2a0 SAL $0x4,%R14 |
(95) 0x40f2a4 MOVSXD -0x8(%RSI),%R8 |
(95) 0x40f2a8 VFMADD213SD -0x60(%RDX),%XMM14,%XMM15 |
(95) 0x40f2ae SAL $0x4,%R8 |
(95) 0x40f2b2 VMOVSD %XMM15,-0x60(%RDX) |
(95) 0x40f2b7 VMULSD -0x58(%RCX),%XMM0,%XMM1 |
(95) 0x40f2bc VFMADD213SD -0x58(%RDX),%XMM14,%XMM1 |
(95) 0x40f2c2 VMOVSD %XMM1,-0x58(%RDX) |
(95) 0x40f2c7 VMULSD -0x50(%RCX),%XMM0,%XMM3 |
(95) 0x40f2cc VFMADD213SD -0x50(%RDX),%XMM3,%XMM14 |
(95) 0x40f2d2 VMOVSD %XMM14,-0x50(%RDX) |
(95) 0x40f2d7 VMULSD -0x48(%RCX),%XMM0,%XMM5 |
(95) 0x40f2dc VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM4 |
(95) 0x40f2e3 VFMADD213SD -0x48(%RDX),%XMM4,%XMM5 |
(95) 0x40f2e9 VMOVSD %XMM5,-0x48(%RDX) |
(95) 0x40f2ee VMULSD -0x40(%RCX),%XMM0,%XMM6 |
(95) 0x40f2f3 VFMADD213SD -0x40(%RDX),%XMM4,%XMM6 |
(95) 0x40f2f9 VMOVSD %XMM6,-0x40(%RDX) |
(95) 0x40f2fe VMULSD -0x38(%RCX),%XMM0,%XMM7 |
(95) 0x40f303 VFMADD213SD -0x38(%RDX),%XMM7,%XMM4 |
(95) 0x40f309 VMOVSD %XMM4,-0x38(%RDX) |
(95) 0x40f30e VMULSD -0x30(%RCX),%XMM0,%XMM9 |
(95) 0x40f313 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM8 |
(95) 0x40f31a VFMADD213SD -0x30(%RDX),%XMM8,%XMM9 |
(95) 0x40f320 VMOVSD %XMM9,-0x30(%RDX) |
(95) 0x40f325 VMULSD -0x28(%RCX),%XMM0,%XMM10 |
(95) 0x40f32a VFMADD213SD -0x28(%RDX),%XMM8,%XMM10 |
(95) 0x40f330 VMOVSD %XMM10,-0x28(%RDX) |
(95) 0x40f335 VMULSD -0x20(%RCX),%XMM0,%XMM11 |
(95) 0x40f33a VFMADD213SD -0x20(%RDX),%XMM11,%XMM8 |
(95) 0x40f340 VMOVSD %XMM8,-0x20(%RDX) |
(95) 0x40f345 MOVSXD -0x4(%RSI),%R14 |
(95) 0x40f349 VMULSD -0x18(%RCX),%XMM0,%XMM13 |
(95) 0x40f34e SAL $0x4,%R14 |
(95) 0x40f352 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM12 |
(95) 0x40f359 VFMADD213SD -0x18(%RDX),%XMM12,%XMM13 |
(95) 0x40f35f VMOVSD %XMM13,-0x18(%RDX) |
(95) 0x40f364 VMULSD -0x10(%RCX),%XMM0,%XMM14 |
(95) 0x40f369 VFMADD213SD -0x10(%RDX),%XMM12,%XMM14 |
(95) 0x40f36f VMOVSD %XMM14,-0x10(%RDX) |
(95) 0x40f374 VMULSD -0x8(%RCX),%XMM0,%XMM15 |
(95) 0x40f379 VFMADD213SD -0x8(%RDX),%XMM15,%XMM12 |
(95) 0x40f37f VMOVSD %XMM12,-0x8(%RDX) |
(95) 0x40f384 CMP %RSI,%R11 |
(95) 0x40f387 JNE 40f27e |
(94) 0x40f38d INC %R10 |
(94) 0x40f390 ADD $0x40,%R9D |
(94) 0x40f394 ADD $0x600,%RAX |
(94) 0x40f39a CMP %R10D,%EBX |
(94) 0x40f39d JG 40f130 |
0x40f3a3 POP %RBX |
0x40f3a4 POP %R12 |
0x40f3a6 POP %R13 |
0x40f3a8 POP %R14 |
0x40f3aa POP %RBP |
0x40f3ab RET |
0x40f3ac INC %EAX |
0x40f3ae XOR %EDX,%EDX |
0x40f3b0 JMP 40f0ee |
0x40f3b5 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 41 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.83 cycles |
front end | 6.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.50 | 3.25 | 3.25 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.50 | 3.25 | 3.25 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 6.83 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 6.83 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40f3ac <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40f3a3 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x6,%R9D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVSD 0x2618(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x9,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f0ee <advancePosition._omp_fn.0+0x2e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 41 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 6.83 cycles |
front end | 6.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.50 | 3.25 | 3.25 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.50 | 3.25 | 3.25 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 6.83 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 6.83 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40f3ac <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40f3a3 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x6,%R9D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVSD 0x2618(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x9,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f0ee <advancePosition._omp_fn.0+0x2e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advancePosition._omp_fn.0– | 0.62 | 0.06 |
▼Loop 94 - timestep.c:88-94 - exec– | 0.14 | 0.01 |
○Loop 95 - timestep.c:88-94 - exec | 0.47 | 0.02 |