Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.76% |
---|
Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.76% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
0x48eb40 PUSH %RBP |
0x48eb41 MOV %RSP,%RBP |
0x48eb44 PUSH %R15 |
0x48eb46 PUSH %R14 |
0x48eb48 PUSH %R13 |
0x48eb4a PUSH %R12 |
0x48eb4c PUSH %RBX |
0x48eb4d AND $-0x20,%RSP |
0x48eb51 SUB $0x220,%RSP |
0x48eb58 MOV 0x10(%RBP),%EAX |
0x48eb5b MOV %EAX,%R14D |
0x48eb5e SUB %R9D,%R14D |
0x48eb61 JLE 48f5a3 |
0x48eb67 VMOVUPD (%RSI),%XMM14 |
0x48eb6b VMOVSD 0x10(%RSI),%XMM20 |
0x48eb72 MOV 0x8(%RDX),%RSI |
0x48eb76 MOV 0x18(%RDX),%RDX |
0x48eb7a LEA (%RDX,%RSI,8),%R10 |
0x48eb7e SAL $0x4,%RSI |
0x48eb82 ADD %RDX,%RSI |
0x48eb85 MOV 0x8(%R8),%R11 |
0x48eb89 MOV 0x18(%R8),%R8 |
0x48eb8d LEA (%R8,%R11,8),%RBX |
0x48eb91 SAL $0x4,%R11 |
0x48eb95 ADD %R8,%R11 |
0x48eb98 VMOVSD 0x48(%RDI),%XMM21 |
0x48eb9f VMOVUPD 0x50(%RDI),%XMM22 |
0x48eba6 VMOVSD 0x60(%RDI),%XMM23 |
0x48ebad VMOVSD 0x68(%RDI),%XMM3 |
0x48ebb2 VMOVSD 0x78(%RDI),%XMM2 |
0x48ebb7 VMOVSD 0x80(%RDI),%XMM29 |
0x48ebbe VMOVSD 0x70(%RDI),%XMM28 |
0x48ebc5 VMOVSD 0x88(%RDI),%XMM30 |
0x48ebcc VMOVSD (%RDI),%XMM25 |
0x48ebd2 VMOVUPD 0x8(%RDI),%XMM26 |
0x48ebdc MOV %R14D,%R13D |
0x48ebdf AND $-0x4,%R13D |
0x48ebe3 JE 48f280 |
0x48ebe9 MOV %R14D,0x4(%RSP) |
0x48ebee VBROADCASTSD %XMM14,%YMM31 |
0x48ebf4 VBROADCASTSD %XMM22,%YMM10 |
0x48ebfa VBROADCASTSD %XMM26,%YMM0 |
0x48ec00 VMOVUPD %YMM0,0xa0(%RSP) |
0x48ec09 AND $-0x10,%R14D |
0x48ec0d VMOVUPD %XMM3,0x20(%RSP) |
0x48ec13 VMOVUPD %XMM2,0x10(%RSP) |
0x48ec19 JE 48f2c0 |
0x48ec1f MOV %R13,0x8(%RSP) |
0x48ec24 LEA -0x1(%R14),%R12D |
0x48ec28 VMOVUPD %YMM14,0x100(%RSP) |
0x48ec31 VXORPS %XMM0,%XMM0,%XMM0 |
0x48ec35 VPERMPD $0x55,%YMM14,%YMM0 |
0x48ec3b VMOVUPD %YMM0,0x1a0(%RSP) |
0x48ec44 VMOVUPD %XMM20,0x90(%RSP) |
0x48ec4c VBROADCASTSD %XMM20,%YMM0 |
0x48ec52 VMOVUPD %YMM0,0x180(%RSP) |
0x48ec5b VMOVUPD %XMM21,0x80(%RSP) |
0x48ec63 VBROADCASTSD %XMM21,%YMM0 |
0x48ec69 VMOVUPD %YMM0,0x160(%RSP) |
0x48ec72 VMOVUPD %YMM22,0xe0(%RSP) |
0x48ec7a VXORPS %XMM0,%XMM0,%XMM0 |
0x48ec7e VPERMPD $0x55,%YMM22,%YMM0 |
0x48ec85 VMOVUPD %YMM0,0x140(%RSP) |
0x48ec8e VMOVUPD %XMM23,0x60(%RSP) |
0x48ec96 VBROADCASTSD %XMM23,%YMM0 |
0x48ec9c VMOVUPD %YMM0,0x120(%RSP) |
0x48eca5 VBROADCASTSD %XMM3,%YMM20 |
0x48ecab VMOVUPD %XMM28,0x50(%RSP) |
0x48ecb3 VBROADCASTSD %XMM28,%YMM21 |
0x48ecb9 VBROADCASTSD %XMM2,%YMM22 |
0x48ecbf VMOVUPD %XMM29,0x40(%RSP) |
0x48ecc7 VBROADCASTSD %XMM29,%YMM23 |
0x48eccd VMOVUPD %XMM30,0x30(%RSP) |
0x48ecd5 VBROADCASTSD %XMM30,%YMM24 |
0x48ecdb VMOVUPD %XMM25,0x70(%RSP) |
0x48ece3 VBROADCASTSD %XMM25,%YMM25 |
0x48ece9 VMOVUPD %YMM26,0xc0(%RSP) |
0x48ecf1 VPERMPD $0x55,%YMM26,%YMM26 |
0x48ecf8 XOR %R13D,%R13D |
0x48ecfb VPBROADCASTQ 0x9f33b(%RIP),%YMM27 |
0x48ed05 VPBROADCASTQ 0xa5971(%RIP),%YMM28 |
0x48ed0f VMOVUPD 0xa0(%RSP),%YMM19 |
0x48ed17 VMOVUPD %YMM31,0x1e0(%RSP) |
0x48ed1f VMOVUPD %YMM10,0x1c0(%RSP) |
0x48ed28 NOPL (%RAX,%RAX,1) |
(1355) 0x48ed30 LEA (%R9,%R13,1),%R15D |
(1355) 0x48ed34 MOVSXD %R15D,%R15 |
(1355) 0x48ed37 VMOVUPD (%RDX,%R15,8),%YMM0 |
(1355) 0x48ed3d VMOVUPD 0x20(%RDX,%R15,8),%YMM1 |
(1355) 0x48ed44 VMOVUPD 0x40(%RDX,%R15,8),%YMM2 |
(1355) 0x48ed4b VMOVUPD 0x60(%RDX,%R15,8),%YMM3 |
(1355) 0x48ed52 VSUBPD %YMM31,%YMM0,%YMM0 |
(1355) 0x48ed58 VSUBPD %YMM31,%YMM1,%YMM1 |
(1355) 0x48ed5e VSUBPD %YMM31,%YMM2,%YMM4 |
(1355) 0x48ed64 VSUBPD %YMM31,%YMM3,%YMM2 |
(1355) 0x48ed6a VMOVUPD (%R10,%R15,8),%YMM3 |
(1355) 0x48ed70 VMOVUPD 0x20(%R10,%R15,8),%YMM5 |
(1355) 0x48ed77 VMOVUPD 0x40(%R10,%R15,8),%YMM6 |
(1355) 0x48ed7e VMOVUPD 0x60(%R10,%R15,8),%YMM7 |
(1355) 0x48ed85 VMOVUPD 0x1a0(%RSP),%YMM11 |
(1355) 0x48ed8e VSUBPD %YMM11,%YMM7,%YMM12 |
(1355) 0x48ed93 VSUBPD %YMM11,%YMM6,%YMM13 |
(1355) 0x48ed98 VSUBPD %YMM11,%YMM5,%YMM5 |
(1355) 0x48ed9d VMOVUPD (%RSI,%R15,8),%YMM6 |
(1355) 0x48eda3 VMOVUPD 0x20(%RSI,%R15,8),%YMM7 |
(1355) 0x48edaa VMOVUPD 0x40(%RSI,%R15,8),%YMM8 |
(1355) 0x48edb1 VMOVUPD 0x60(%RSI,%R15,8),%YMM9 |
(1355) 0x48edb8 VSUBPD %YMM11,%YMM3,%YMM3 |
(1355) 0x48edbd VMOVUPD 0x180(%RSP),%YMM11 |
(1355) 0x48edc6 VSUBPD %YMM11,%YMM6,%YMM14 |
(1355) 0x48edcb VSUBPD %YMM11,%YMM7,%YMM15 |
(1355) 0x48edd0 VMOVUPD 0x160(%RSP),%YMM7 |
(1355) 0x48edd9 VMULPD %YMM2,%YMM7,%YMM6 |
(1355) 0x48eddd VMULPD %YMM4,%YMM7,%YMM30 |
(1355) 0x48ede3 VMULPD %YMM1,%YMM7,%YMM31 |
(1355) 0x48ede9 VSUBPD %YMM11,%YMM8,%YMM16 |
(1355) 0x48edef VMULPD %YMM0,%YMM7,%YMM29 |
(1355) 0x48edf5 VFMADD231PD %YMM3,%YMM10,%YMM29 |
(1355) 0x48edfb VFMADD231PD %YMM5,%YMM10,%YMM31 |
(1355) 0x48ee01 VFMADD231PD %YMM13,%YMM10,%YMM30 |
(1355) 0x48ee07 VFMADD231PD %YMM12,%YMM10,%YMM6 |
(1355) 0x48ee0c VSUBPD %YMM11,%YMM9,%YMM17 |
(1355) 0x48ee12 VMOVUPD 0x140(%RSP),%YMM10 |
(1355) 0x48ee1b VFMADD231PD %YMM17,%YMM10,%YMM6 |
(1355) 0x48ee21 VFMADD231PD %YMM16,%YMM10,%YMM30 |
(1355) 0x48ee27 VMOVUPD 0x120(%RSP),%YMM7 |
(1355) 0x48ee30 VMULPD %YMM0,%YMM7,%YMM11 |
(1355) 0x48ee34 VMULPD %YMM1,%YMM7,%YMM8 |
(1355) 0x48ee38 VMULPD %YMM4,%YMM7,%YMM9 |
(1355) 0x48ee3c VFMADD231PD %YMM15,%YMM10,%YMM31 |
(1355) 0x48ee42 VMULPD %YMM2,%YMM7,%YMM7 |
(1355) 0x48ee46 VFMADD231PD %YMM12,%YMM20,%YMM7 |
(1355) 0x48ee4c VFMADD231PD %YMM13,%YMM20,%YMM9 |
(1355) 0x48ee52 VFMADD231PD %YMM5,%YMM20,%YMM8 |
(1355) 0x48ee58 VFMADD231PD %YMM3,%YMM20,%YMM11 |
(1355) 0x48ee5e VFMADD231PD %YMM14,%YMM10,%YMM29 |
(1355) 0x48ee64 VFMADD231PD %YMM14,%YMM21,%YMM11 |
(1355) 0x48ee6a VFMADD231PD %YMM15,%YMM21,%YMM8 |
(1355) 0x48ee70 VMULPD %YMM2,%YMM22,%YMM2 |
(1355) 0x48ee76 VMULPD %YMM4,%YMM22,%YMM4 |
(1355) 0x48ee7c VMULPD %YMM1,%YMM22,%YMM1 |
(1355) 0x48ee82 VFMADD231PD %YMM16,%YMM21,%YMM9 |
(1355) 0x48ee88 VMULPD %YMM0,%YMM22,%YMM10 |
(1355) 0x48ee8e VFMADD231PD %YMM3,%YMM23,%YMM10 |
(1355) 0x48ee94 VFMADD231PD %YMM5,%YMM23,%YMM1 |
(1355) 0x48ee9a VFMADD231PD %YMM13,%YMM23,%YMM4 |
(1355) 0x48eea0 VFMADD231PD %YMM12,%YMM23,%YMM2 |
(1355) 0x48eea6 VFMADD231PD %YMM17,%YMM21,%YMM7 |
(1355) 0x48eeac VFMADD231PD %YMM17,%YMM24,%YMM2 |
(1355) 0x48eeb2 VFMADD231PD %YMM16,%YMM24,%YMM4 |
(1355) 0x48eeb8 VFMADD231PD %YMM15,%YMM24,%YMM1 |
(1355) 0x48eebe VFMADD231PD %YMM14,%YMM24,%YMM10 |
(1355) 0x48eec4 VMOVDQA64 %YMM28,%YMM0 |
(1355) 0x48eeca VPTERNLOGQ $-0x8,%YMM27,%YMM29,%YMM0 |
(1355) 0x48eed1 VADDPD %YMM0,%YMM29,%YMM0 |
(1355) 0x48eed7 VROUNDPD $0xb,%YMM0,%YMM3 |
(1355) 0x48eedd VMOVDQA64 %YMM28,%YMM0 |
(1355) 0x48eee3 VPTERNLOGQ $-0x8,%YMM27,%YMM31,%YMM0 |
(1355) 0x48eeea VADDPD %YMM0,%YMM31,%YMM0 |
(1355) 0x48eef0 VMOVDQA64 %YMM28,%YMM5 |
(1355) 0x48eef6 VROUNDPD $0xb,%YMM0,%YMM12 |
(1355) 0x48eefc VPTERNLOGQ $-0x8,%YMM27,%YMM30,%YMM5 |
(1355) 0x48ef03 VADDPD %YMM5,%YMM30,%YMM0 |
(1355) 0x48ef09 VMOVDQA64 %YMM28,%YMM5 |
(1355) 0x48ef0f VPTERNLOGQ $-0x8,%YMM27,%YMM6,%YMM5 |
(1355) 0x48ef16 VADDPD %YMM5,%YMM6,%YMM5 |
(1355) 0x48ef1a VROUNDPD $0xb,%YMM0,%YMM13 |
(1355) 0x48ef20 VROUNDPD $0xb,%YMM5,%YMM0 |
(1355) 0x48ef26 VMOVDQA64 %YMM28,%YMM5 |
(1355) 0x48ef2c VPTERNLOGQ $-0x8,%YMM27,%YMM7,%YMM5 |
(1355) 0x48ef33 VADDPD %YMM5,%YMM7,%YMM5 |
(1355) 0x48ef37 VMOVDQA64 %YMM28,%YMM14 |
(1355) 0x48ef3d VROUNDPD $0xb,%YMM5,%YMM5 |
(1355) 0x48ef43 VPTERNLOGQ $-0x8,%YMM27,%YMM9,%YMM14 |
(1355) 0x48ef4a VADDPD %YMM14,%YMM9,%YMM14 |
(1355) 0x48ef4f VMOVDQA64 %YMM28,%YMM15 |
(1355) 0x48ef55 VPTERNLOGQ $-0x8,%YMM27,%YMM8,%YMM15 |
(1355) 0x48ef5c VADDPD %YMM15,%YMM8,%YMM15 |
(1355) 0x48ef61 VROUNDPD $0xb,%YMM14,%YMM14 |
(1355) 0x48ef67 VROUNDPD $0xb,%YMM15,%YMM15 |
(1355) 0x48ef6d VMOVDQA64 %YMM28,%YMM16 |
(1355) 0x48ef73 VPTERNLOGQ $-0x8,%YMM27,%YMM11,%YMM16 |
(1355) 0x48ef7a VADDPD %YMM16,%YMM11,%YMM16 |
(1355) 0x48ef80 VRNDSCALEPD $0xb,%YMM16,%YMM16 |
(1355) 0x48ef87 VSUBPD %YMM0,%YMM6,%YMM0 |
(1355) 0x48ef8b VMOVDQA64 %YMM28,%YMM6 |
(1355) 0x48ef91 VPTERNLOGQ $-0x8,%YMM27,%YMM10,%YMM6 |
(1355) 0x48ef98 VADDPD %YMM6,%YMM10,%YMM6 |
(1355) 0x48ef9c VRNDSCALEPD $0xb,%YMM6,%YMM17 |
(1355) 0x48efa3 VSUBPD %YMM13,%YMM30,%YMM30 |
(1355) 0x48efa9 VSUBPD %YMM12,%YMM31,%YMM31 |
(1355) 0x48efaf VMOVDQA64 %YMM28,%YMM6 |
(1355) 0x48efb5 VPTERNLOGQ $-0x8,%YMM27,%YMM1,%YMM6 |
(1355) 0x48efbc VADDPD %YMM6,%YMM1,%YMM6 |
(1355) 0x48efc0 VRNDSCALEPD $0xb,%YMM6,%YMM18 |
(1355) 0x48efc7 VSUBPD %YMM3,%YMM29,%YMM13 |
(1355) 0x48efcd VSUBPD %YMM16,%YMM11,%YMM11 |
(1355) 0x48efd3 VMOVDQA64 %YMM28,%YMM3 |
(1355) 0x48efd9 VPTERNLOGQ $-0x8,%YMM27,%YMM4,%YMM3 |
(1355) 0x48efe0 VADDPD %YMM3,%YMM4,%YMM3 |
(1355) 0x48efe4 VROUNDPD $0xb,%YMM3,%YMM6 |
(1355) 0x48efea VSUBPD %YMM15,%YMM8,%YMM3 |
(1355) 0x48efef VSUBPD %YMM14,%YMM9,%YMM12 |
(1355) 0x48eff4 VMOVDQA64 %YMM28,%YMM8 |
(1355) 0x48effa VPTERNLOGQ $-0x8,%YMM27,%YMM2,%YMM8 |
(1355) 0x48f001 VADDPD %YMM2,%YMM8,%YMM8 |
(1355) 0x48f005 VROUNDPD $0xb,%YMM8,%YMM8 |
(1355) 0x48f00b VSUBPD %YMM5,%YMM7,%YMM5 |
(1355) 0x48f00f VSUBPD %YMM8,%YMM2,%YMM29 |
(1355) 0x48f015 VSUBPD %YMM6,%YMM4,%YMM2 |
(1355) 0x48f019 VMULPD %YMM13,%YMM25,%YMM7 |
(1355) 0x48f01f VMULPD %YMM31,%YMM25,%YMM9 |
(1355) 0x48f025 VMULPD %YMM30,%YMM25,%YMM6 |
(1355) 0x48f02b VSUBPD %YMM18,%YMM1,%YMM1 |
(1355) 0x48f031 VMULPD %YMM0,%YMM25,%YMM8 |
(1355) 0x48f037 VFMADD231PD %YMM5,%YMM19,%YMM8 |
(1355) 0x48f03d VFMADD231PD %YMM12,%YMM19,%YMM6 |
(1355) 0x48f043 VFMADD231PD %YMM3,%YMM19,%YMM9 |
(1355) 0x48f049 VFMADD231PD %YMM11,%YMM19,%YMM7 |
(1355) 0x48f04f VSUBPD %YMM17,%YMM10,%YMM4 |
(1355) 0x48f055 VFMADD231PD %YMM26,%YMM4,%YMM7 |
(1355) 0x48f05b VFMADD231PD %YMM26,%YMM1,%YMM9 |
(1355) 0x48f061 VFMADD231PD %YMM26,%YMM2,%YMM6 |
(1355) 0x48f067 VFMADD231PD %YMM26,%YMM29,%YMM8 |
(1355) 0x48f06d VMOVUPD %YMM8,0x60(%R8,%R15,8) |
(1355) 0x48f074 VMOVUPD %YMM6,0x40(%R8,%R15,8) |
(1355) 0x48f07b VMOVUPD %YMM9,0x20(%R8,%R15,8) |
(1355) 0x48f082 VMOVUPD %YMM7,(%R8,%R15,8) |
(1355) 0x48f088 VBROADCASTSD 0x18(%RDI),%YMM10 |
(1355) 0x48f08e VMULPD %YMM0,%YMM10,%YMM14 |
(1355) 0x48f092 VMULPD %YMM13,%YMM10,%YMM15 |
(1355) 0x48f097 VMULPD %YMM31,%YMM10,%YMM16 |
(1355) 0x48f09d VBROADCASTSD 0x20(%RDI),%YMM17 |
(1355) 0x48f0a4 VMULPD %YMM30,%YMM10,%YMM10 |
(1355) 0x48f0aa VFMADD231PD %YMM12,%YMM17,%YMM10 |
(1355) 0x48f0b0 VFMADD231PD %YMM3,%YMM17,%YMM16 |
(1355) 0x48f0b6 VFMADD231PD %YMM11,%YMM17,%YMM15 |
(1355) 0x48f0bc VFMADD231PD %YMM17,%YMM5,%YMM14 |
(1355) 0x48f0c2 VBROADCASTSD 0x28(%RDI),%YMM17 |
(1355) 0x48f0c9 VFMADD231PD %YMM29,%YMM17,%YMM14 |
(1355) 0x48f0cf VFMADD231PD %YMM1,%YMM17,%YMM16 |
(1355) 0x48f0d5 VFMADD231PD %YMM17,%YMM2,%YMM10 |
(1355) 0x48f0db VMOVUPD %YMM10,0x40(%RBX,%R15,8) |
(1355) 0x48f0e2 VMOVUPD %YMM16,0x20(%RBX,%R15,8) |
(1355) 0x48f0ea VFMADD231PD %YMM4,%YMM17,%YMM15 |
(1355) 0x48f0f0 VMOVUPD %YMM15,(%RBX,%R15,8) |
(1355) 0x48f0f6 VMOVUPD %YMM14,0x60(%RBX,%R15,8) |
(1355) 0x48f0fd VBROADCASTSD 0x30(%RDI),%YMM17 |
(1355) 0x48f104 VMULPD %YMM0,%YMM17,%YMM0 |
(1355) 0x48f10a VMULPD %YMM13,%YMM17,%YMM13 |
(1355) 0x48f110 VMULPD %YMM31,%YMM17,%YMM18 |
(1355) 0x48f116 VMOVUPD 0x1e0(%RSP),%YMM31 |
(1355) 0x48f11e VMULPD %YMM30,%YMM17,%YMM17 |
(1355) 0x48f124 VBROADCASTSD 0x38(%RDI),%YMM30 |
(1355) 0x48f12b VFMADD231PD %YMM12,%YMM30,%YMM17 |
(1355) 0x48f131 VFMADD231PD %YMM3,%YMM30,%YMM18 |
(1355) 0x48f137 VFMADD231PD %YMM11,%YMM30,%YMM13 |
(1355) 0x48f13d VFMADD231PD %YMM5,%YMM30,%YMM0 |
(1355) 0x48f143 VBROADCASTSD 0x40(%RDI),%YMM3 |
(1355) 0x48f149 VFMADD231PD %YMM29,%YMM3,%YMM0 |
(1355) 0x48f14f VFMADD231PD %YMM4,%YMM3,%YMM13 |
(1355) 0x48f154 VFMADD231PD %YMM1,%YMM3,%YMM18 |
(1355) 0x48f15a VFMADD231PD %YMM2,%YMM3,%YMM17 |
(1355) 0x48f160 VMULPD %YMM9,%YMM9,%YMM1 |
(1355) 0x48f165 VFMADD231PD %YMM16,%YMM16,%YMM1 |
(1355) 0x48f16b VMULPD %YMM7,%YMM7,%YMM2 |
(1355) 0x48f16f VFMADD231PD %YMM15,%YMM15,%YMM2 |
(1355) 0x48f174 VMULPD %YMM8,%YMM8,%YMM3 |
(1355) 0x48f179 VFMADD231PD %YMM14,%YMM14,%YMM3 |
(1355) 0x48f17e VMULPD %YMM6,%YMM6,%YMM4 |
(1355) 0x48f182 VFMADD231PD %YMM10,%YMM10,%YMM4 |
(1355) 0x48f187 VMOVUPD 0x1c0(%RSP),%YMM10 |
(1355) 0x48f190 VMOVUPD %YMM17,0x40(%R11,%R15,8) |
(1355) 0x48f198 VFMADD231PD %YMM17,%YMM17,%YMM4 |
(1355) 0x48f19e VMOVUPD %YMM0,0x60(%R11,%R15,8) |
(1355) 0x48f1a5 VFMADD231PD %YMM0,%YMM0,%YMM3 |
(1355) 0x48f1aa VMOVUPD %YMM13,(%R11,%R15,8) |
(1355) 0x48f1b0 VFMADD231PD %YMM13,%YMM13,%YMM2 |
(1355) 0x48f1b5 VMOVUPD %YMM18,0x20(%R11,%R15,8) |
(1355) 0x48f1bd VSQRTPD %YMM4,%YMM0 |
(1355) 0x48f1c1 VMOVUPD %YMM0,0x40(%RCX,%R15,8) |
(1355) 0x48f1c8 VSQRTPD %YMM3,%YMM0 |
(1355) 0x48f1cc VMOVUPD %YMM0,0x60(%RCX,%R15,8) |
(1355) 0x48f1d3 VSQRTPD %YMM2,%YMM0 |
(1355) 0x48f1d7 VMOVUPD %YMM0,(%RCX,%R15,8) |
(1355) 0x48f1dd VFMADD231PD %YMM18,%YMM18,%YMM1 |
(1355) 0x48f1e3 VSQRTPD %YMM1,%YMM0 |
(1355) 0x48f1e7 VMOVUPD %YMM0,0x20(%RCX,%R15,8) |
(1355) 0x48f1ee ADD $0x10,%R13D |
(1355) 0x48f1f2 CMP %R12D,%R13D |
(1355) 0x48f1f5 JBE 48ed30 |
0x48f1fb MOV 0x8(%RSP),%R13 |
0x48f200 CMP %R14D,%R13D |
0x48f203 VMOVUPD 0x100(%RSP),%YMM14 |
0x48f20c VMOVUPD 0x90(%RSP),%XMM20 |
0x48f214 VMOVUPD 0x80(%RSP),%XMM21 |
0x48f21c VMOVUPD 0xe0(%RSP),%YMM22 |
0x48f224 VMOVUPD 0x70(%RSP),%XMM25 |
0x48f22c VMOVUPD 0xc0(%RSP),%YMM26 |
0x48f234 VMOVUPD 0x60(%RSP),%XMM23 |
0x48f23c VMOVUPD 0x20(%RSP),%XMM3 |
0x48f242 VMOVUPD 0x10(%RSP),%XMM2 |
0x48f248 VMOVUPD 0x50(%RSP),%XMM28 |
0x48f250 VMOVUPD 0x40(%RSP),%XMM29 |
0x48f258 VMOVUPD 0x30(%RSP),%XMM30 |
0x48f260 JNE 48f2c3 |
0x48f262 CMP %R13D,0x4(%RSP) |
0x48f267 JNE 48f460 |
0x48f26d JMP 48f5a3 |
0x48f272 NOPW %CS:(%RAX,%RAX,1) |
0x48f280 XOR %R14D,%R14D |
0x48f283 JMP 48f460 |
0x48f288 NOPW %CS:(%RAX,%RAX,1) |
0x48f297 NOPW %CS:(%RAX,%RAX,1) |
0x48f2a6 NOPW %CS:(%RAX,%RAX,1) |
0x48f2b5 NOPW %CS:(%RAX,%RAX,1) |
0x48f2c0 XOR %R14D,%R14D |
0x48f2c3 LEA -0x1(%R13),%R15D |
0x48f2c7 VPERMPD $0x55,%YMM14,%YMM6 |
0x48f2cd VBROADCASTSD %XMM20,%YMM7 |
0x48f2d3 VBROADCASTSD %XMM21,%YMM8 |
0x48f2d9 VPERMPD $0x55,%YMM22,%YMM9 |
0x48f2e0 VBROADCASTSD %XMM23,%YMM27 |
0x48f2e6 VBROADCASTSD %XMM3,%YMM11 |
0x48f2eb VBROADCASTSD %XMM28,%YMM15 |
0x48f2f1 VBROADCASTSD %XMM2,%YMM16 |
0x48f2f7 VBROADCASTSD %XMM29,%YMM17 |
0x48f2fd VBROADCASTSD %XMM30,%YMM1 |
0x48f303 VBROADCASTSD %XMM25,%YMM18 |
0x48f309 VPERMPD $0x55,%YMM26,%YMM19 |
0x48f310 VPBROADCASTQ 0x9ed27(%RIP),%YMM0 |
0x48f319 VPBROADCASTQ 0xa535e(%RIP),%YMM2 |
0x48f322 VMOVUPD 0xa0(%RSP),%YMM24 |
0x48f32a NOPW (%RAX,%RAX,1) |
(1354) 0x48f330 LEA (%R9,%R14,1),%R12D |
(1354) 0x48f334 MOVSXD %R12D,%R12 |
(1354) 0x48f337 VMOVUPD (%RDX,%R12,8),%YMM3 |
(1354) 0x48f33d VSUBPD %YMM31,%YMM3,%YMM3 |
(1354) 0x48f343 VMOVUPD (%R10,%R12,8),%YMM4 |
(1354) 0x48f349 VMOVUPD (%RSI,%R12,8),%YMM5 |
(1354) 0x48f34f VSUBPD %YMM6,%YMM4,%YMM4 |
(1354) 0x48f353 VSUBPD %YMM7,%YMM5,%YMM5 |
(1354) 0x48f357 VMULPD %YMM3,%YMM8,%YMM12 |
(1354) 0x48f35b VFMADD231PD %YMM4,%YMM10,%YMM12 |
(1354) 0x48f360 VMULPD %YMM3,%YMM27,%YMM13 |
(1354) 0x48f366 VFMADD231PD %YMM4,%YMM11,%YMM13 |
(1354) 0x48f36b VFMADD231PD %YMM5,%YMM9,%YMM12 |
(1354) 0x48f370 VFMADD231PD %YMM5,%YMM15,%YMM13 |
(1354) 0x48f375 VMULPD %YMM3,%YMM16,%YMM3 |
(1354) 0x48f37b VFMADD231PD %YMM4,%YMM17,%YMM3 |
(1354) 0x48f381 VMOVDQA %YMM2,%YMM4 |
(1354) 0x48f385 VPTERNLOGQ $-0x8,%YMM0,%YMM12,%YMM4 |
(1354) 0x48f38c VADDPD %YMM4,%YMM12,%YMM4 |
(1354) 0x48f390 VFMADD231PD %YMM5,%YMM1,%YMM3 |
(1354) 0x48f395 VROUNDPD $0xb,%YMM4,%YMM4 |
(1354) 0x48f39b VMOVDQA %YMM2,%YMM5 |
(1354) 0x48f39f VPTERNLOGQ $-0x8,%YMM0,%YMM13,%YMM5 |
(1354) 0x48f3a6 VADDPD %YMM5,%YMM13,%YMM5 |
(1354) 0x48f3aa VROUNDPD $0xb,%YMM5,%YMM5 |
(1354) 0x48f3b0 VSUBPD %YMM4,%YMM12,%YMM4 |
(1354) 0x48f3b4 VMOVDQA %YMM2,%YMM12 |
(1354) 0x48f3b8 VPTERNLOGQ $-0x8,%YMM0,%YMM3,%YMM12 |
(1354) 0x48f3bf VADDPD %YMM3,%YMM12,%YMM12 |
(1354) 0x48f3c3 VROUNDPD $0xb,%YMM12,%YMM12 |
(1354) 0x48f3c9 VSUBPD %YMM5,%YMM13,%YMM5 |
(1354) 0x48f3cd VSUBPD %YMM12,%YMM3,%YMM3 |
(1354) 0x48f3d2 VMULPD %YMM4,%YMM18,%YMM12 |
(1354) 0x48f3d8 VFMADD231PD %YMM5,%YMM24,%YMM12 |
(1354) 0x48f3de VFMADD231PD %YMM19,%YMM3,%YMM12 |
(1354) 0x48f3e4 VMOVUPD %YMM12,(%R8,%R12,8) |
(1354) 0x48f3ea VMULPD 0x18(%RDI){1to4},%YMM4,%YMM13 |
(1354) 0x48f3f1 VFMADD231PD 0x20(%RDI){1to4},%YMM5,%YMM13 |
(1354) 0x48f3f8 VFMADD231PD 0x28(%RDI){1to4},%YMM3,%YMM13 |
(1354) 0x48f3ff VMOVUPD %YMM13,(%RBX,%R12,8) |
(1354) 0x48f405 VMULPD 0x30(%RDI){1to4},%YMM4,%YMM4 |
(1354) 0x48f40c VFMADD231PD 0x38(%RDI){1to4},%YMM5,%YMM4 |
(1354) 0x48f413 VFMADD231PD 0x40(%RDI){1to4},%YMM3,%YMM4 |
(1354) 0x48f41a VMULPD %YMM12,%YMM12,%YMM3 |
(1354) 0x48f41f VFMADD231PD %YMM13,%YMM13,%YMM3 |
(1354) 0x48f424 VMOVUPD %YMM4,(%R11,%R12,8) |
(1354) 0x48f42a VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1354) 0x48f42f VSQRTPD %YMM3,%YMM3 |
(1354) 0x48f433 VMOVUPD %YMM3,(%RCX,%R12,8) |
(1354) 0x48f439 ADD $0x4,%R14D |
(1354) 0x48f43d CMP %R15D,%R14D |
(1354) 0x48f440 JBE 48f330 |
0x48f446 MOV %R13D,%R14D |
0x48f449 VMOVUPD 0x10(%RSP),%XMM2 |
0x48f44f VMOVUPD 0x20(%RSP),%XMM3 |
0x48f455 CMP %R13D,0x4(%RSP) |
0x48f45a JE 48f5a3 |
0x48f460 VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 |
0x48f466 VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 |
0x48f46a VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 |
0x48f470 ADD %R9D,%R14D |
0x48f473 VMOVQ 0xa5205(%RIP),%XMM2 |
0x48f47b VPBROADCASTQ 0x9fe14(%RIP),%XMM4 |
0x48f484 VPBROADCASTQ 0xa51f3(%RIP),%XMM7 |
0x48f48d VPBROADCASTQ 0x9ebaa(%RIP),%XMM8 |
0x48f496 NOPW %CS:(%RAX,%RAX,1) |
(1353) 0x48f4a0 MOVSXD %R14D,%R14 |
(1353) 0x48f4a3 VMOVSD (%RSI,%R14,8),%XMM3 |
(1353) 0x48f4a9 VMOVSD (%RDX,%R14,8),%XMM5 |
(1353) 0x48f4af VMOVHPD (%R10,%R14,8),%XMM5,%XMM5 |
(1353) 0x48f4b5 VSUBSD %XMM20,%XMM3,%XMM3 |
(1353) 0x48f4bb VSUBPD %XMM14,%XMM5,%XMM5 |
(1353) 0x48f4c0 VSHUFPD $0x1,%XMM5,%XMM5,%XMM9 |
(1353) 0x48f4c5 VMULPD %XMM6,%XMM9,%XMM9 |
(1353) 0x48f4c9 VMOVDDUP %XMM3,%XMM3 |
(1353) 0x48f4cd VPUNPCKHQDQ %XMM3,%XMM5,%XMM10 |
(1353) 0x48f4d1 VMULPD %XMM10,%XMM22,%XMM10 |
(1353) 0x48f4d7 VFMADD231PD %XMM5,%XMM1,%XMM9 |
(1353) 0x48f4dc VFMADD213SD %XMM10,%XMM21,%XMM5 |
(1353) 0x48f4e2 VSHUFPD $0x1,%XMM10,%XMM10,%XMM10 |
(1353) 0x48f4e8 VADDSD %XMM5,%XMM10,%XMM5 |
(1353) 0x48f4ec VFMADD231PD %XMM3,%XMM0,%XMM9 |
(1353) 0x48f4f1 VMOVAPD %XMM5,%XMM3 |
(1353) 0x48f4f5 VPTERNLOGQ $-0x28,%XMM4,%XMM2,%XMM3 |
(1353) 0x48f4fc VADDSD %XMM3,%XMM5,%XMM3 |
(1353) 0x48f500 VROUNDSD $0xb,%XMM3,%XMM3,%XMM3 |
(1353) 0x48f506 VSUBSD %XMM3,%XMM5,%XMM3 |
(1353) 0x48f50a VMOVDQA %XMM7,%XMM5 |
(1353) 0x48f50e VPTERNLOGQ $-0x8,%XMM8,%XMM9,%XMM5 |
(1353) 0x48f515 VADDPD %XMM5,%XMM9,%XMM5 |
(1353) 0x48f519 VROUNDPD $0xb,%XMM5,%XMM5 |
(1353) 0x48f51f VSUBPD %XMM5,%XMM9,%XMM5 |
(1353) 0x48f523 VMULPD %XMM5,%XMM26,%XMM9 |
(1353) 0x48f529 VMOVAPD %XMM3,%XMM10 |
(1353) 0x48f52d VFMADD213SD %XMM9,%XMM25,%XMM10 |
(1353) 0x48f533 VSHUFPD $0x1,%XMM9,%XMM9,%XMM9 |
(1353) 0x48f539 VADDSD %XMM9,%XMM10,%XMM9 |
(1353) 0x48f53e VMOVSD %XMM9,(%R8,%R14,8) |
(1353) 0x48f544 VMULSD 0x18(%RDI),%XMM3,%XMM10 |
(1353) 0x48f549 VFMADD231SD 0x20(%RDI),%XMM5,%XMM10 |
(1353) 0x48f54f VSHUFPD $0x1,%XMM5,%XMM5,%XMM11 |
(1353) 0x48f554 VFMADD132SD 0x28(%RDI),%XMM10,%XMM11 |
(1353) 0x48f55a VMOVSD %XMM11,(%RBX,%R14,8) |
(1353) 0x48f560 VMULPD 0x38(%RDI),%XMM5,%XMM5 |
(1353) 0x48f565 VMOVAPD %XMM5,%XMM10 |
(1353) 0x48f569 VFMADD231SD 0x30(%RDI),%XMM3,%XMM10 |
(1353) 0x48f56f VSHUFPD $0x1,%XMM5,%XMM5,%XMM3 |
(1353) 0x48f574 VADDSD %XMM3,%XMM10,%XMM3 |
(1353) 0x48f578 VMOVSD %XMM3,(%R11,%R14,8) |
(1353) 0x48f57e VMULSD %XMM9,%XMM9,%XMM5 |
(1353) 0x48f583 VFMADD231SD %XMM11,%XMM11,%XMM5 |
(1353) 0x48f588 VFMADD231SD %XMM3,%XMM3,%XMM5 |
(1353) 0x48f58d VSQRTSD %XMM5,%XMM5,%XMM3 |
(1353) 0x48f591 VMOVSD %XMM3,(%RCX,%R14,8) |
(1353) 0x48f597 INC %R14D |
(1353) 0x48f59a CMP %R14D,%EAX |
(1353) 0x48f59d JNE 48f4a0 |
0x48f5a3 LEA -0x28(%RBP),%RSP |
0x48f5a7 POP %RBX |
0x48f5a8 POP %R12 |
0x48f5aa POP %R13 |
0x48f5ac POP %R14 |
0x48f5ae POP %R15 |
0x48f5b0 POP %RBP |
0x48f5b1 VZEROUPPER |
0x48f5b4 RET |
0x48f5b5 NOPW %CS:(%RAX,%RAX,1) |
0x48f5bf NOP |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 153 |
nb uops | 154 |
loop length | 924 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 18 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 25.67 cycles |
front end | 25.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
cycles | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 1.83 |
ROB full (events) | 2.20 |
Front-end | 25.67 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 61% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 11% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 27% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 36% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x220,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 48f280 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 48f2c0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x9f33b(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa5971(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 48f2c3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 48f460 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x920> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 48f460 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x920> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM26,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x9ed27(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa535e(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0xa5205(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x9fe14(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa51f3(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x9ebaa(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 153 |
nb uops | 154 |
loop length | 924 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 18 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 25.67 cycles |
front end | 25.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
cycles | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 1.83 |
ROB full (events) | 2.20 |
Front-end | 25.67 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 61% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 11% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 27% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 36% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x220,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 48f280 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 48f2c0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x9f33b(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa5971(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 48f2c3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 48f460 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x920> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 48f460 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x920> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM26,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x9ed27(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa535e(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 48f5a3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa63> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0xa5205(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x9fe14(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xa51f3(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x9ebaa(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii– | 1.76 | 1.29 |
○Loop 1355 - ParticleBConds3DSoa.h:234-255 - exec | 1.75 | 1.23 |
○Loop 1353 - ParticleBConds3DSoa.h:235-255 - exec | 0 | 0 |
○Loop 1354 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |