Function: unloadAtomsBuffer | Module: exec | Source: haloExchange.c:407-426 [...] | Coverage: 0.11% |
---|
Function: unloadAtomsBuffer | Module: exec | Source: haloExchange.c:407-426 [...] | Coverage: 0.11% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/haloExchange.c: 407 - 426 |
-------------------------------------------------------------------------------- |
407: { |
408: AtomExchangeParms* parms = (AtomExchangeParms*) vparms; |
409: SimFlat* s = (SimFlat*) data; |
410: AtomMsg* buf = (AtomMsg*) charBuf; |
411: int nBuf = bufSize / sizeof(AtomMsg); |
412: assert(bufSize % sizeof(AtomMsg) == 0); |
413: |
414: for (int ii=0; ii<nBuf; ++ii) |
415: { |
416: int gid = buf[ii].gid; |
417: int type = buf[ii].type; |
418: real_t rx = buf[ii].rx; |
[...] |
424: putAtomInBox(s->boxes, s->atoms, gid, type, rx, ry, rz, px, py, pz); |
425: } |
426: } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/linkCells.c: 178 - 378 |
-------------------------------------------------------------------------------- |
178: int iOff = iBox*MAXATOMS; |
179: iOff += boxes->nAtoms[iBox]; |
180: |
181: // assign values to array elements |
182: if (iBox < boxes->nLocalBoxes) |
183: atoms->nLocal++; |
184: boxes->nAtoms[iBox]++; |
185: atoms->gid[iOff] = gid; |
186: atoms->iSpecies[iOff] = iType; |
187: |
188: atoms->r[iOff][0] = x; |
189: atoms->r[iOff][1] = y; |
190: atoms->r[iOff][2] = z; |
191: |
192: atoms->p[iOff][0] = px; |
193: atoms->p[iOff][1] = py; |
194: atoms->p[iOff][2] = pz; |
[...] |
352: int ix = (int)(floor((rr[0] - localMin[0])*boxes->invBoxSize[0])); |
[...] |
359: if (rr[0] < localMax[0]) |
360: { |
361: if (ix == gridSize[0]) ix = gridSize[0] - 1; |
362: } |
363: else |
364: ix = gridSize[0]; // assign to halo cell |
365: if (rr[1] < localMax[1]) |
[...] |
371: if (rr[2] < localMax[2]) |
[...] |
378: return getBoxFromTuple(boxes, ix, iy, iz); |
0x20c050 PUSH %RBP |
0x20c051 MOV %RSP,%RBP |
0x20c054 PUSH %R15 |
0x20c056 PUSH %R14 |
0x20c058 PUSH %R13 |
0x20c05a PUSH %R12 |
0x20c05c PUSH %RBX |
0x20c05d SUB $0x58,%RSP |
0x20c061 MOVSXD %ECX,%RDX |
0x20c064 MOV $0x6db6db6db6db6db7,%RDI |
0x20c06e MOV $0x2492492492492493,%RAX |
0x20c078 IMUL %RDX,%RDI |
0x20c07c SHR $0x3,%RDX |
0x20c080 MULX %RAX,%RAX,%RAX |
0x20c085 MOV $0x492492492492492,%RDX |
0x20c08f RORX $0x3,%RDI,%RCX |
0x20c095 CMP %RDX,%RCX |
0x20c098 JA 20c23b |
0x20c09e TEST %EAX,%EAX |
0x20c0a0 JLE 20c22c |
0x20c0a6 MOV %EAX,%EAX |
0x20c0a8 IMUL $0x38,%RAX,%RAX |
0x20c0ac MOV %R8,%RBX |
0x20c0af MOV %RSI,%RDI |
0x20c0b2 XOR %R13D,%R13D |
0x20c0b5 MOV %RSI,-0x40(%RBP) |
0x20c0b9 MOV %RAX,-0x38(%RBP) |
0x20c0bd JMP 20c131 |
0x20c0bf NOP |
(52) 0x20c0c0 INC %EDI |
(52) 0x20c0c2 MOV %EDX,%EAX |
(52) 0x20c0c4 SAL $0x6,%EAX |
(52) 0x20c0c7 VMOVAPS -0x80(%RBP),%XMM0 |
(52) 0x20c0cc VMOVAPS -0x70(%RBP),%XMM2 |
(52) 0x20c0d1 VMOVSD -0x48(%RBP),%XMM1 |
(52) 0x20c0d6 ADD $0x38,%R13 |
(52) 0x20c0da MOV %EDI,(%RCX,%RDX,4) |
(52) 0x20c0dd ADD %EAX,%ESI |
(52) 0x20c0df MOV -0x2c(%RBP),%EDX |
(52) 0x20c0e2 MOV -0x40(%RBP),%RDI |
(52) 0x20c0e6 MOV 0x8(%R12),%RAX |
(52) 0x20c0eb MOVSXD %ESI,%RCX |
(52) 0x20c0ee MOV %R14D,(%RAX,%RCX,4) |
(52) 0x20c0f2 MOV 0x10(%R12),%RAX |
(52) 0x20c0f7 MOV %EDX,(%RAX,%RCX,4) |
(52) 0x20c0fa SAL $0x3,%RCX |
(52) 0x20c0fe MOV 0x18(%R12),%RAX |
(52) 0x20c103 LEA (%RCX,%RCX,2),%RCX |
(52) 0x20c107 VMOVUPS %XMM0,(%RAX,%RCX,1) |
(52) 0x20c10c VMOVAPS -0x60(%RBP),%XMM0 |
(52) 0x20c111 VMOVSD %XMM0,0x10(%RAX,%RCX,1) |
(52) 0x20c117 MOV 0x20(%R12),%RAX |
(52) 0x20c11c VMOVUPS %XMM2,(%RAX,%RCX,1) |
(52) 0x20c121 VMOVSD %XMM1,0x10(%RAX,%RCX,1) |
(52) 0x20c127 CMP %R13,-0x38(%RBP) |
(52) 0x20c12b JE 20c22c |
(52) 0x20c131 MOV 0x18(%RDI),%R15 |
(52) 0x20c135 VMOVUPD 0x8(%RBX,%R13,1),%XMM2 |
(52) 0x20c13c VMOVSD 0x30(%R15),%XMM0 |
(52) 0x20c142 VUCOMISD %XMM2,%XMM0 |
(52) 0x20c146 JBE 20c170 |
(52) 0x20c148 VSUBSD 0x18(%R15),%XMM2,%XMM0 |
(52) 0x20c14e MOV (%R15),%ECX |
(52) 0x20c151 VMULSD 0x60(%R15),%XMM0,%XMM0 |
(52) 0x20c157 LEA -0x1(%RCX),%ESI |
(52) 0x20c15a VROUNDSD $0x9,%XMM0,%XMM0,%XMM0 |
(52) 0x20c160 VCVTTSD2SI %XMM0,%EAX |
(52) 0x20c164 CMP %EAX,%ECX |
(52) 0x20c166 CMOVNE %EAX,%ESI |
(52) 0x20c169 JMP 20c173 |
0x20c16b NOPL (%RAX,%RAX,1) |
(52) 0x20c170 MOV (%R15),%ESI |
(52) 0x20c173 VMOVUPS 0x20(%RBX,%R13,1),%XMM0 |
(52) 0x20c17a VMOVSD 0x18(%RBX,%R13,1),%XMM3 |
(52) 0x20c181 VMOVQ 0x4(%R15),%XMM1 |
(52) 0x20c187 VMOVAPD %XMM2,-0x80(%RBP) |
(52) 0x20c18c MOV 0x4(%RBX,%R13,1),%EAX |
(52) 0x20c191 MOV 0x20(%RDI),%R12 |
(52) 0x20c195 MOV (%RBX,%R13,1),%R14D |
(52) 0x20c199 MOV %R15,%RDI |
(52) 0x20c19c MOV %EAX,-0x2c(%RBP) |
(52) 0x20c19f VMOVAPS %XMM0,-0x70(%RBP) |
(52) 0x20c1a4 VMOVSD 0x30(%RBX,%R13,1),%XMM0 |
(52) 0x20c1ab VSHUFPD $0x1,%XMM3,%XMM2,%XMM2 |
(52) 0x20c1b0 VCMPPD $0x1,0x38(%R15),%XMM2,%K2 |
(52) 0x20c1bb VMOVAPD %XMM3,-0x60(%RBP) |
(52) 0x20c1c0 VPCMPEQD %XMM2,%XMM2,%XMM2 |
(52) 0x20c1c4 VMOVSD %XMM0,-0x48(%RBP) |
(52) 0x20c1c9 VMOVDDUP 0x10(%RBX,%R13,1),%XMM0 |
(52) 0x20c1d0 VUNPCKLPD %XMM3,%XMM0,%XMM0 |
(52) 0x20c1d4 VSUBPD 0x20(%R15),%XMM0,%XMM0 |
(52) 0x20c1da VMULPD 0x68(%R15),%XMM0,%XMM0 |
(52) 0x20c1e0 VROUNDPD $0x9,%XMM0,%XMM0 |
(52) 0x20c1e6 VCVTTPD2DQ %XMM0,%XMM0 |
(52) 0x20c1ea VPCMPEQD %XMM0,%XMM1,%K1 |
(52) 0x20c1f0 VPADDD %XMM2,%XMM1,%XMM0{%K1} |
(52) 0x20c1f6 VMOVDQA32 %XMM0,%XMM1{%K2} |
(52) 0x20c1fc VMOVD %XMM1,%EDX |
(52) 0x20c200 VPEXTRD $0x1,%XMM1,%ECX |
(52) 0x20c206 CALL 20f6b0 <getBoxFromTuple> |
(52) 0x20c20b MOV 0x78(%R15),%RCX |
(52) 0x20c20f MOV %EAX,%EDX |
(52) 0x20c211 MOV (%RCX,%RDX,4),%ESI |
(52) 0x20c214 MOV %ESI,%EDI |
(52) 0x20c216 CMP 0xc(%R15),%EAX |
(52) 0x20c21a JGE 20c0c0 |
(52) 0x20c220 INCL (%R12) |
(52) 0x20c224 MOV (%RCX,%RDX,4),%EDI |
(52) 0x20c227 JMP 20c0c0 |
0x20c22c ADD $0x58,%RSP |
0x20c230 POP %RBX |
0x20c231 POP %R12 |
0x20c233 POP %R13 |
0x20c235 POP %R14 |
0x20c237 POP %R15 |
0x20c239 POP %RBP |
0x20c23a RET |
0x20c23b MOV $0x2050d5,%EDI |
0x20c240 MOV $0x203950,%ESI |
0x20c245 MOV $0x204cdd,%ECX |
0x20c24a MOV $0x19c,%EDX |
0x20c24f CALL 2134d0 <@plt_start@+0xd0> |
0x20c254 INT $0x3 |
0x20c255 INT $0x3 |
0x20c256 INT $0x3 |
0x20c257 INT $0x3 |
0x20c258 INT $0x3 |
0x20c259 INT $0x3 |
0x20c25a INT $0x3 |
0x20c25b INT $0x3 |
0x20c25c INT $0x3 |
0x20c25d INT $0x3 |
0x20c25e INT $0x3 |
0x20c25f INT $0x3 |
Path / |
Source file and lines | haloExchange.c:407-426 |
Module | exec |
nb instructions | 55 |
nb uops | 43 |
loop length | 169 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 7.17 cycles |
front end | 7.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 5.00 | 5.00 | 2.50 | 1.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.25 | 5.25 | 5.00 | 5.00 | 2.50 | 1.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 7.17 |
Dispatch | 5.25 |
Overall L1 | 7.17 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x6db6db6db6db6db7,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV $0x2492492492492493,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
IMUL %RDX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MULX %RAX,%RAX,%RAX | 2 | 0.25 | 1.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3-4 | 1 |
MOV $0x492492492492492,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RORX $0x3,%RDI,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JA 20c23b <unloadAtomsBuffer+0x1eb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 20c22c <unloadAtomsBuffer+0x1dc> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL $0x38,%RAX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 20c131 <unloadAtomsBuffer+0xe1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x2050d5,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x203950,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x204cdd,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x19c,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 2134d0 <@plt_start@+0xd0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | haloExchange.c:407-426 |
Module | exec |
nb instructions | 55 |
nb uops | 43 |
loop length | 169 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 7.17 cycles |
front end | 7.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 5.00 | 5.00 | 2.50 | 1.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.25 | 5.25 | 5.00 | 5.00 | 2.50 | 1.00 | 1.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 7.17 |
Dispatch | 5.25 |
Overall L1 | 7.17 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x6db6db6db6db6db7,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV $0x2492492492492493,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
IMUL %RDX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MULX %RAX,%RAX,%RAX | 2 | 0.25 | 1.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3-4 | 1 |
MOV $0x492492492492492,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RORX $0x3,%RDI,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JA 20c23b <unloadAtomsBuffer+0x1eb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 20c22c <unloadAtomsBuffer+0x1dc> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL $0x38,%RAX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 20c131 <unloadAtomsBuffer+0xe1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x2050d5,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x203950,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x204cdd,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x19c,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 2134d0 <@plt_start@+0xd0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼unloadAtomsBuffer– | 0.11 | 0.01 |
○Loop 52 - linkCells.c:178-378 - exec | 0.11 | 1.12 |