Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.79% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.79% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
0x490d40 PUSH %RBP |
0x490d41 MOV %RSP,%RBP |
0x490d44 PUSH %R15 |
0x490d46 PUSH %R14 |
0x490d48 PUSH %R13 |
0x490d4a PUSH %R12 |
0x490d4c PUSH %RBX |
0x490d4d AND $-0x20,%RSP |
0x490d51 SUB $0x200,%RSP |
0x490d58 MOV 0x10(%RBP),%EAX |
0x490d5b MOV %EAX,%R14D |
0x490d5e SUB %R9D,%R14D |
0x490d61 JLE 4917b0 |
0x490d67 VMOVUPD (%RSI),%XMM14 |
0x490d6b VMOVSD 0x10(%RSI),%XMM20 |
0x490d72 MOV 0x8(%RDX),%RSI |
0x490d76 MOV 0x18(%RDX),%RDX |
0x490d7a LEA (%RDX,%RSI,8),%R10 |
0x490d7e SAL $0x4,%RSI |
0x490d82 ADD %RDX,%RSI |
0x490d85 MOV 0x8(%R8),%R11 |
0x490d89 MOV 0x18(%R8),%R8 |
0x490d8d LEA (%R8,%R11,8),%RBX |
0x490d91 SAL $0x4,%R11 |
0x490d95 ADD %R8,%R11 |
0x490d98 VMOVSD 0x48(%RDI),%XMM21 |
0x490d9f VMOVUPD 0x50(%RDI),%XMM22 |
0x490da6 VMOVSD 0x60(%RDI),%XMM23 |
0x490dad VMOVSD 0x68(%RDI),%XMM3 |
0x490db2 VMOVSD 0x70(%RDI),%XMM2 |
0x490db7 VMOVSD 0x78(%RDI),%XMM28 |
0x490dbe VMOVSD 0x80(%RDI),%XMM30 |
0x490dc5 VMOVSD 0x88(%RDI),%XMM29 |
0x490dcc VMOVSD (%RDI),%XMM25 |
0x490dd2 VMOVUPD 0x8(%RDI),%XMM26 |
0x490ddc MOV %R14D,%R13D |
0x490ddf AND $-0x4,%R13D |
0x490de3 JE 491480 |
0x490de9 MOV %R14D,0x4(%RSP) |
0x490dee VBROADCASTSD %XMM14,%YMM31 |
0x490df4 VBROADCASTSD %XMM22,%YMM10 |
0x490dfa VBROADCASTSD %XMM26,%YMM0 |
0x490e00 VMOVUPD %YMM0,0xa0(%RSP) |
0x490e09 AND $-0x10,%R14D |
0x490e0d VMOVUPD %XMM3,0x20(%RSP) |
0x490e13 VMOVUPD %XMM2,0x10(%RSP) |
0x490e19 JE 4914c0 |
0x490e1f MOV %R13,0x8(%RSP) |
0x490e24 LEA -0x1(%R14),%R12D |
0x490e28 VMOVUPD %YMM14,0x100(%RSP) |
0x490e31 VXORPS %XMM0,%XMM0,%XMM0 |
0x490e35 VPERMPD $0x55,%YMM14,%YMM0 |
0x490e3b VMOVUPD %YMM0,0x180(%RSP) |
0x490e44 VMOVUPD %XMM20,0x90(%RSP) |
0x490e4c VBROADCASTSD %XMM20,%YMM0 |
0x490e52 VMOVUPD %YMM0,0x160(%RSP) |
0x490e5b VMOVUPD %XMM21,0x80(%RSP) |
0x490e63 VBROADCASTSD %XMM21,%YMM0 |
0x490e69 VMOVUPD %YMM0,0x140(%RSP) |
0x490e72 VMOVUPD %YMM22,0xe0(%RSP) |
0x490e7a VXORPS %XMM0,%XMM0,%XMM0 |
0x490e7e VPERMPD $0x55,%YMM22,%YMM0 |
0x490e85 VMOVUPD %YMM0,0x120(%RSP) |
0x490e8e VMOVUPD %XMM23,0x60(%RSP) |
0x490e96 VBROADCASTSD %XMM23,%YMM19 |
0x490e9c VBROADCASTSD %XMM3,%YMM20 |
0x490ea2 VBROADCASTSD %XMM2,%YMM21 |
0x490ea8 VMOVUPD %XMM28,0x50(%RSP) |
0x490eb0 VBROADCASTSD %XMM28,%YMM22 |
0x490eb6 VMOVUPD %XMM30,0x30(%RSP) |
0x490ebe VBROADCASTSD %XMM30,%YMM23 |
0x490ec4 VMOVUPD %XMM29,0x40(%RSP) |
0x490ecc VBROADCASTSD %XMM29,%YMM24 |
0x490ed2 VMOVUPD %XMM25,0x70(%RSP) |
0x490eda VBROADCASTSD %XMM25,%YMM25 |
0x490ee0 VMOVUPD %YMM26,0xc0(%RSP) |
0x490ee8 VPERMPD $0x55,%YMM26,%YMM26 |
0x490eef XOR %R13D,%R13D |
0x490ef2 VPBROADCASTQ 0xad604(%RIP),%YMM27 |
0x490efc VPBROADCASTQ 0xb3dda(%RIP),%YMM28 |
0x490f06 VMOVUPD 0xa0(%RSP),%YMM18 |
0x490f0e VMOVUPD %YMM31,0x1c0(%RSP) |
0x490f16 VMOVUPD %YMM10,0x1a0(%RSP) |
0x490f1f NOP |
(1363) 0x490f20 LEA (%R9,%R13,1),%R15D |
(1363) 0x490f24 MOVSXD %R15D,%R15 |
(1363) 0x490f27 VMOVUPD (%RDX,%R15,8),%YMM0 |
(1363) 0x490f2d VMOVUPD 0x20(%RDX,%R15,8),%YMM1 |
(1363) 0x490f34 VMOVUPD 0x40(%RDX,%R15,8),%YMM2 |
(1363) 0x490f3b VMOVUPD 0x60(%RDX,%R15,8),%YMM4 |
(1363) 0x490f42 VSUBPD %YMM31,%YMM0,%YMM3 |
(1363) 0x490f48 VSUBPD %YMM31,%YMM1,%YMM1 |
(1363) 0x490f4e VSUBPD %YMM31,%YMM2,%YMM2 |
(1363) 0x490f54 VSUBPD %YMM31,%YMM4,%YMM0 |
(1363) 0x490f5a VMOVUPD (%R10,%R15,8),%YMM5 |
(1363) 0x490f60 VMOVUPD 0x20(%R10,%R15,8),%YMM6 |
(1363) 0x490f67 VMOVUPD 0x40(%R10,%R15,8),%YMM7 |
(1363) 0x490f6e VMOVUPD 0x60(%R10,%R15,8),%YMM4 |
(1363) 0x490f75 VMOVUPD 0x180(%RSP),%YMM11 |
(1363) 0x490f7e VSUBPD %YMM11,%YMM4,%YMM4 |
(1363) 0x490f83 VSUBPD %YMM11,%YMM7,%YMM12 |
(1363) 0x490f88 VSUBPD %YMM11,%YMM6,%YMM14 |
(1363) 0x490f8d VMOVUPD (%RSI,%R15,8),%YMM6 |
(1363) 0x490f93 VMOVUPD 0x20(%RSI,%R15,8),%YMM7 |
(1363) 0x490f9a VMOVUPD 0x40(%RSI,%R15,8),%YMM8 |
(1363) 0x490fa1 VMOVUPD 0x60(%RSI,%R15,8),%YMM9 |
(1363) 0x490fa8 VSUBPD %YMM11,%YMM5,%YMM15 |
(1363) 0x490fad VMOVUPD 0x160(%RSP),%YMM11 |
(1363) 0x490fb6 VSUBPD %YMM11,%YMM6,%YMM5 |
(1363) 0x490fbb VSUBPD %YMM11,%YMM7,%YMM13 |
(1363) 0x490fc0 VMOVUPD 0x140(%RSP),%YMM6 |
(1363) 0x490fc9 VMULPD %YMM0,%YMM6,%YMM7 |
(1363) 0x490fcd VMULPD %YMM2,%YMM6,%YMM30 |
(1363) 0x490fd3 VMULPD %YMM1,%YMM6,%YMM31 |
(1363) 0x490fd9 VSUBPD %YMM11,%YMM8,%YMM16 |
(1363) 0x490fdf VMULPD %YMM3,%YMM6,%YMM29 |
(1363) 0x490fe5 VFMADD231PD %YMM15,%YMM10,%YMM29 |
(1363) 0x490feb VFMADD231PD %YMM14,%YMM10,%YMM31 |
(1363) 0x490ff1 VFMADD231PD %YMM12,%YMM10,%YMM30 |
(1363) 0x490ff7 VFMADD231PD %YMM4,%YMM10,%YMM7 |
(1363) 0x490ffc VSUBPD %YMM11,%YMM9,%YMM17 |
(1363) 0x491002 VMOVUPD 0x120(%RSP),%YMM10 |
(1363) 0x49100b VFMADD231PD %YMM17,%YMM10,%YMM7 |
(1363) 0x491011 VFMADD231PD %YMM16,%YMM10,%YMM30 |
(1363) 0x491017 VMULPD %YMM3,%YMM19,%YMM11 |
(1363) 0x49101d VMULPD %YMM1,%YMM19,%YMM8 |
(1363) 0x491023 VMULPD %YMM2,%YMM19,%YMM9 |
(1363) 0x491029 VFMADD231PD %YMM13,%YMM10,%YMM31 |
(1363) 0x49102f VMULPD %YMM0,%YMM19,%YMM6 |
(1363) 0x491035 VFMADD231PD %YMM4,%YMM20,%YMM6 |
(1363) 0x49103b VFMADD231PD %YMM12,%YMM20,%YMM9 |
(1363) 0x491041 VFMADD231PD %YMM14,%YMM20,%YMM8 |
(1363) 0x491047 VFMADD231PD %YMM15,%YMM20,%YMM11 |
(1363) 0x49104d VFMADD231PD %YMM5,%YMM10,%YMM29 |
(1363) 0x491053 VFMADD231PD %YMM5,%YMM21,%YMM11 |
(1363) 0x491059 VFMADD231PD %YMM13,%YMM21,%YMM8 |
(1363) 0x49105f VMULPD %YMM0,%YMM22,%YMM0 |
(1363) 0x491065 VMULPD %YMM2,%YMM22,%YMM2 |
(1363) 0x49106b VMULPD %YMM1,%YMM22,%YMM1 |
(1363) 0x491071 VFMADD231PD %YMM16,%YMM21,%YMM9 |
(1363) 0x491077 VMULPD %YMM3,%YMM22,%YMM10 |
(1363) 0x49107d VFMADD231PD %YMM15,%YMM23,%YMM10 |
(1363) 0x491083 VFMADD231PD %YMM14,%YMM23,%YMM1 |
(1363) 0x491089 VFMADD231PD %YMM12,%YMM23,%YMM2 |
(1363) 0x49108f VFMADD231PD %YMM4,%YMM23,%YMM0 |
(1363) 0x491095 VFMADD231PD %YMM17,%YMM21,%YMM6 |
(1363) 0x49109b VFMADD231PD %YMM17,%YMM24,%YMM0 |
(1363) 0x4910a1 VFMADD231PD %YMM16,%YMM24,%YMM2 |
(1363) 0x4910a7 VFMADD231PD %YMM13,%YMM24,%YMM1 |
(1363) 0x4910ad VFMADD231PD %YMM5,%YMM24,%YMM10 |
(1363) 0x4910b3 VMOVDQA64 %YMM28,%YMM3 |
(1363) 0x4910b9 VPTERNLOGQ $-0x8,%YMM27,%YMM29,%YMM3 |
(1363) 0x4910c0 VADDPD %YMM3,%YMM29,%YMM3 |
(1363) 0x4910c6 VROUNDPD $0xb,%YMM3,%YMM3 |
(1363) 0x4910cc VMOVDQA64 %YMM28,%YMM4 |
(1363) 0x4910d2 VPTERNLOGQ $-0x8,%YMM27,%YMM31,%YMM4 |
(1363) 0x4910d9 VADDPD %YMM4,%YMM31,%YMM4 |
(1363) 0x4910df VMOVDQA64 %YMM28,%YMM5 |
(1363) 0x4910e5 VROUNDPD $0xb,%YMM4,%YMM4 |
(1363) 0x4910eb VPTERNLOGQ $-0x8,%YMM27,%YMM30,%YMM5 |
(1363) 0x4910f2 VADDPD %YMM5,%YMM30,%YMM5 |
(1363) 0x4910f8 VMOVDQA64 %YMM28,%YMM12 |
(1363) 0x4910fe VPTERNLOGQ $-0x8,%YMM27,%YMM7,%YMM12 |
(1363) 0x491105 VADDPD %YMM7,%YMM12,%YMM12 |
(1363) 0x491109 VROUNDPD $0xb,%YMM5,%YMM5 |
(1363) 0x49110f VROUNDPD $0xb,%YMM12,%YMM12 |
(1363) 0x491115 VMOVDQA64 %YMM28,%YMM13 |
(1363) 0x49111b VPTERNLOGQ $-0x8,%YMM27,%YMM6,%YMM13 |
(1363) 0x491122 VADDPD %YMM6,%YMM13,%YMM13 |
(1363) 0x491126 VMOVDQA64 %YMM28,%YMM14 |
(1363) 0x49112c VROUNDPD $0xb,%YMM13,%YMM13 |
(1363) 0x491132 VPTERNLOGQ $-0x8,%YMM27,%YMM9,%YMM14 |
(1363) 0x491139 VADDPD %YMM14,%YMM9,%YMM14 |
(1363) 0x49113e VMOVDQA64 %YMM28,%YMM15 |
(1363) 0x491144 VPTERNLOGQ $-0x8,%YMM27,%YMM8,%YMM15 |
(1363) 0x49114b VADDPD %YMM15,%YMM8,%YMM15 |
(1363) 0x491150 VROUNDPD $0xb,%YMM14,%YMM14 |
(1363) 0x491156 VROUNDPD $0xb,%YMM15,%YMM15 |
(1363) 0x49115c VMOVDQA64 %YMM28,%YMM16 |
(1363) 0x491162 VPTERNLOGQ $-0x8,%YMM27,%YMM11,%YMM16 |
(1363) 0x491169 VADDPD %YMM16,%YMM11,%YMM16 |
(1363) 0x49116f VRNDSCALEPD $0xb,%YMM16,%YMM16 |
(1363) 0x491176 VSUBPD %YMM12,%YMM7,%YMM7 |
(1363) 0x49117b VMOVDQA64 %YMM28,%YMM12 |
(1363) 0x491181 VPTERNLOGQ $-0x8,%YMM27,%YMM10,%YMM12 |
(1363) 0x491188 VADDPD %YMM12,%YMM10,%YMM12 |
(1363) 0x49118d VRNDSCALEPD $0xb,%YMM12,%YMM17 |
(1363) 0x491194 VSUBPD %YMM5,%YMM30,%YMM30 |
(1363) 0x49119a VSUBPD %YMM4,%YMM31,%YMM31 |
(1363) 0x4911a0 VMOVDQA64 %YMM28,%YMM4 |
(1363) 0x4911a6 VPTERNLOGQ $-0x8,%YMM27,%YMM1,%YMM4 |
(1363) 0x4911ad VADDPD %YMM4,%YMM1,%YMM4 |
(1363) 0x4911b1 VROUNDPD $0xb,%YMM4,%YMM12 |
(1363) 0x4911b7 VSUBPD %YMM3,%YMM29,%YMM4 |
(1363) 0x4911bd VSUBPD %YMM16,%YMM11,%YMM11 |
(1363) 0x4911c3 VMOVDQA64 %YMM28,%YMM3 |
(1363) 0x4911c9 VPTERNLOGQ $-0x8,%YMM27,%YMM2,%YMM3 |
(1363) 0x4911d0 VADDPD %YMM3,%YMM2,%YMM3 |
(1363) 0x4911d4 VRNDSCALEPD $0xb,%YMM3,%YMM16 |
(1363) 0x4911db VSUBPD %YMM15,%YMM8,%YMM29 |
(1363) 0x4911e1 VSUBPD %YMM14,%YMM9,%YMM5 |
(1363) 0x4911e6 VMOVDQA64 %YMM28,%YMM3 |
(1363) 0x4911ec VPTERNLOGQ $-0x8,%YMM27,%YMM0,%YMM3 |
(1363) 0x4911f3 VADDPD %YMM3,%YMM0,%YMM3 |
(1363) 0x4911f7 VROUNDPD $0xb,%YMM3,%YMM8 |
(1363) 0x4911fd VSUBPD %YMM13,%YMM6,%YMM3 |
(1363) 0x491202 VSUBPD %YMM8,%YMM0,%YMM9 |
(1363) 0x491207 VSUBPD %YMM16,%YMM2,%YMM8 |
(1363) 0x49120d VMULPD %YMM4,%YMM25,%YMM0 |
(1363) 0x491213 VMULPD %YMM31,%YMM25,%YMM2 |
(1363) 0x491219 VMULPD %YMM30,%YMM25,%YMM6 |
(1363) 0x49121f VSUBPD %YMM12,%YMM1,%YMM12 |
(1363) 0x491224 VMULPD %YMM7,%YMM25,%YMM1 |
(1363) 0x49122a VFMADD231PD %YMM3,%YMM18,%YMM1 |
(1363) 0x491230 VFMADD231PD %YMM5,%YMM18,%YMM6 |
(1363) 0x491236 VFMADD231PD %YMM29,%YMM18,%YMM2 |
(1363) 0x49123c VFMADD231PD %YMM11,%YMM18,%YMM0 |
(1363) 0x491242 VSUBPD %YMM17,%YMM10,%YMM13 |
(1363) 0x491248 VFMADD231PD %YMM26,%YMM13,%YMM0 |
(1363) 0x49124e VFMADD231PD %YMM26,%YMM12,%YMM2 |
(1363) 0x491254 VFMADD231PD %YMM26,%YMM8,%YMM6 |
(1363) 0x49125a VFMADD231PD %YMM26,%YMM9,%YMM1 |
(1363) 0x491260 VMOVUPD %YMM1,0x60(%R8,%R15,8) |
(1363) 0x491267 VMOVUPD %YMM6,0x40(%R8,%R15,8) |
(1363) 0x49126e VMOVUPD %YMM2,0x20(%R8,%R15,8) |
(1363) 0x491275 VMOVUPD %YMM0,(%R8,%R15,8) |
(1363) 0x49127b VBROADCASTSD 0x18(%RDI),%YMM14 |
(1363) 0x491281 VMULPD %YMM7,%YMM14,%YMM10 |
(1363) 0x491285 VMULPD %YMM4,%YMM14,%YMM15 |
(1363) 0x491289 VMULPD %YMM31,%YMM14,%YMM16 |
(1363) 0x49128f VBROADCASTSD 0x20(%RDI),%YMM17 |
(1363) 0x491296 VMULPD %YMM30,%YMM14,%YMM14 |
(1363) 0x49129c VFMADD231PD %YMM5,%YMM17,%YMM14 |
(1363) 0x4912a2 VFMADD231PD %YMM29,%YMM17,%YMM16 |
(1363) 0x4912a8 VFMADD231PD %YMM11,%YMM17,%YMM15 |
(1363) 0x4912ae VFMADD231PD %YMM17,%YMM3,%YMM10 |
(1363) 0x4912b4 VBROADCASTSD 0x28(%RDI),%YMM17 |
(1363) 0x4912bb VFMADD231PD %YMM9,%YMM17,%YMM10 |
(1363) 0x4912c1 VFMADD231PD %YMM12,%YMM17,%YMM16 |
(1363) 0x4912c7 VFMADD231PD %YMM17,%YMM8,%YMM14 |
(1363) 0x4912cd VMOVUPD %YMM14,0x40(%RBX,%R15,8) |
(1363) 0x4912d4 VMOVUPD %YMM16,0x20(%RBX,%R15,8) |
(1363) 0x4912dc VFMADD231PD %YMM13,%YMM17,%YMM15 |
(1363) 0x4912e2 VMOVUPD %YMM15,(%RBX,%R15,8) |
(1363) 0x4912e8 VMOVUPD %YMM10,0x60(%RBX,%R15,8) |
(1363) 0x4912ef VBROADCASTSD 0x30(%RDI),%YMM17 |
(1363) 0x4912f6 VMULPD %YMM7,%YMM17,%YMM7 |
(1363) 0x4912fc VMULPD %YMM4,%YMM17,%YMM4 |
(1363) 0x491302 VMULPD %YMM31,%YMM17,%YMM31 |
(1363) 0x491308 VMULPD %YMM30,%YMM17,%YMM17 |
(1363) 0x49130e VBROADCASTSD 0x38(%RDI),%YMM30 |
(1363) 0x491315 VFMADD231PD %YMM5,%YMM30,%YMM17 |
(1363) 0x49131b VFMADD231PD %YMM29,%YMM30,%YMM31 |
(1363) 0x491321 VFMADD231PD %YMM11,%YMM30,%YMM4 |
(1363) 0x491327 VFMADD231PD %YMM3,%YMM30,%YMM7 |
(1363) 0x49132d VBROADCASTSD 0x40(%RDI),%YMM3 |
(1363) 0x491333 VFMADD231PD %YMM9,%YMM3,%YMM7 |
(1363) 0x491338 VFMADD231PD %YMM13,%YMM3,%YMM4 |
(1363) 0x49133d VFMADD231PD %YMM12,%YMM3,%YMM31 |
(1363) 0x491343 VFMADD231PD %YMM8,%YMM3,%YMM17 |
(1363) 0x491349 VMULPD %YMM2,%YMM2,%YMM2 |
(1363) 0x49134d VFMADD231PD %YMM16,%YMM16,%YMM2 |
(1363) 0x491353 VMULPD %YMM0,%YMM0,%YMM0 |
(1363) 0x491357 VFMADD231PD %YMM15,%YMM15,%YMM0 |
(1363) 0x49135c VMULPD %YMM1,%YMM1,%YMM1 |
(1363) 0x491360 VFMADD231PD %YMM10,%YMM10,%YMM1 |
(1363) 0x491365 VMOVUPD 0x1a0(%RSP),%YMM10 |
(1363) 0x49136e VMULPD %YMM6,%YMM6,%YMM3 |
(1363) 0x491372 VFMADD231PD %YMM14,%YMM14,%YMM3 |
(1363) 0x491377 VMOVUPD %YMM17,0x40(%R11,%R15,8) |
(1363) 0x49137f VFMADD231PD %YMM17,%YMM17,%YMM3 |
(1363) 0x491385 VMOVUPD %YMM7,0x60(%R11,%R15,8) |
(1363) 0x49138c VFMADD231PD %YMM7,%YMM7,%YMM1 |
(1363) 0x491391 VMOVUPD %YMM4,(%R11,%R15,8) |
(1363) 0x491397 VFMADD231PD %YMM4,%YMM4,%YMM0 |
(1363) 0x49139c VMOVUPD %YMM31,0x20(%R11,%R15,8) |
(1363) 0x4913a4 VSQRTPD %YMM3,%YMM3 |
(1363) 0x4913a8 VMOVUPD %YMM3,0x40(%RCX,%R15,8) |
(1363) 0x4913af VSQRTPD %YMM1,%YMM1 |
(1363) 0x4913b3 VMOVUPD %YMM1,0x60(%RCX,%R15,8) |
(1363) 0x4913ba VSQRTPD %YMM0,%YMM0 |
(1363) 0x4913be VMOVUPD %YMM0,(%RCX,%R15,8) |
(1363) 0x4913c4 VFMADD231PD %YMM31,%YMM31,%YMM2 |
(1363) 0x4913ca VMOVUPD 0x1c0(%RSP),%YMM31 |
(1363) 0x4913d2 VSQRTPD %YMM2,%YMM0 |
(1363) 0x4913d6 VMOVUPD %YMM0,0x20(%RCX,%R15,8) |
(1363) 0x4913dd ADD $0x10,%R13D |
(1363) 0x4913e1 CMP %R12D,%R13D |
(1363) 0x4913e4 JBE 490f20 |
0x4913ea MOV 0x8(%RSP),%R13 |
0x4913ef CMP %R14D,%R13D |
0x4913f2 VMOVUPD 0x100(%RSP),%YMM14 |
0x4913fb VMOVUPD 0x90(%RSP),%XMM20 |
0x491403 VMOVUPD 0x80(%RSP),%XMM21 |
0x49140b VMOVUPD 0xe0(%RSP),%YMM22 |
0x491413 VMOVUPD 0x70(%RSP),%XMM25 |
0x49141b VMOVUPD 0xc0(%RSP),%YMM26 |
0x491423 VMOVUPD 0x60(%RSP),%XMM23 |
0x49142b VMOVUPD 0x20(%RSP),%XMM3 |
0x491431 VMOVUPD 0x10(%RSP),%XMM2 |
0x491437 VMOVUPD 0x50(%RSP),%XMM28 |
0x49143f VMOVUPD 0x40(%RSP),%XMM29 |
0x491447 VMOVUPD 0x30(%RSP),%XMM30 |
0x49144f JNE 4914c3 |
0x491451 CMP %R13D,0x4(%RSP) |
0x491456 JNE 491673 |
0x49145c JMP 4917b0 |
0x491461 NOPW %CS:(%RAX,%RAX,1) |
0x491470 NOPW %CS:(%RAX,%RAX,1) |
0x49147f NOP |
0x491480 XOR %R14D,%R14D |
0x491483 JMP 491673 |
0x491488 NOPW %CS:(%RAX,%RAX,1) |
0x491497 NOPW %CS:(%RAX,%RAX,1) |
0x4914a6 NOPW %CS:(%RAX,%RAX,1) |
0x4914b5 NOPW %CS:(%RAX,%RAX,1) |
0x4914c0 XOR %R14D,%R14D |
0x4914c3 LEA -0x1(%R13),%R15D |
0x4914c7 VPERMPD $0x55,%YMM14,%YMM6 |
0x4914cd VBROADCASTSD %XMM20,%YMM1 |
0x4914d3 VBROADCASTSD %XMM21,%YMM7 |
0x4914d9 VPERMPD $0x55,%YMM22,%YMM8 |
0x4914e0 VBROADCASTSD %XMM23,%YMM9 |
0x4914e6 VBROADCASTSD %XMM3,%YMM0 |
0x4914eb VBROADCASTSD %XMM2,%YMM2 |
0x4914f0 VBROADCASTSD %XMM28,%YMM27 |
0x4914f6 VBROADCASTSD %XMM30,%YMM11 |
0x4914fc VBROADCASTSD %XMM29,%YMM15 |
0x491502 VBROADCASTSD %XMM25,%YMM16 |
0x491508 VPXORD %XMM17,%XMM17,%XMM17 |
0x49150e VPERMPD $0x55,%YMM26,%YMM17 |
0x491515 VPBROADCASTQ 0xacfe1(%RIP),%YMM18 |
0x49151f VPBROADCASTQ 0xb37b7(%RIP),%YMM19 |
0x491529 VMOVUPD 0xa0(%RSP),%YMM24 |
0x491531 NOPW %CS:(%RAX,%RAX,1) |
(1362) 0x491540 LEA (%R9,%R14,1),%R12D |
(1362) 0x491544 MOVSXD %R12D,%R12 |
(1362) 0x491547 VMOVUPD (%RDX,%R12,8),%YMM3 |
(1362) 0x49154d VSUBPD %YMM31,%YMM3,%YMM3 |
(1362) 0x491553 VMOVUPD (%R10,%R12,8),%YMM4 |
(1362) 0x491559 VMOVUPD (%RSI,%R12,8),%YMM5 |
(1362) 0x49155f VSUBPD %YMM6,%YMM4,%YMM4 |
(1362) 0x491563 VSUBPD %YMM1,%YMM5,%YMM5 |
(1362) 0x491567 VMULPD %YMM3,%YMM7,%YMM12 |
(1362) 0x49156b VFMADD231PD %YMM4,%YMM10,%YMM12 |
(1362) 0x491570 VMULPD %YMM3,%YMM9,%YMM13 |
(1362) 0x491574 VFMADD231PD %YMM4,%YMM0,%YMM13 |
(1362) 0x491579 VFMADD231PD %YMM5,%YMM8,%YMM12 |
(1362) 0x49157e VFMADD231PD %YMM5,%YMM2,%YMM13 |
(1362) 0x491583 VMULPD %YMM3,%YMM27,%YMM3 |
(1362) 0x491589 VFMADD231PD %YMM4,%YMM11,%YMM3 |
(1362) 0x49158e VMOVDQA64 %YMM19,%YMM4 |
(1362) 0x491594 VPTERNLOGQ $-0x8,%YMM18,%YMM12,%YMM4 |
(1362) 0x49159b VADDPD %YMM4,%YMM12,%YMM4 |
(1362) 0x49159f VFMADD231PD %YMM5,%YMM15,%YMM3 |
(1362) 0x4915a4 VROUNDPD $0xb,%YMM4,%YMM4 |
(1362) 0x4915aa VMOVDQA64 %YMM19,%YMM5 |
(1362) 0x4915b0 VPTERNLOGQ $-0x8,%YMM18,%YMM13,%YMM5 |
(1362) 0x4915b7 VADDPD %YMM5,%YMM13,%YMM5 |
(1362) 0x4915bb VROUNDPD $0xb,%YMM5,%YMM5 |
(1362) 0x4915c1 VSUBPD %YMM4,%YMM12,%YMM4 |
(1362) 0x4915c5 VMOVDQA64 %YMM19,%YMM12 |
(1362) 0x4915cb VPTERNLOGQ $-0x8,%YMM18,%YMM3,%YMM12 |
(1362) 0x4915d2 VADDPD %YMM3,%YMM12,%YMM12 |
(1362) 0x4915d6 VROUNDPD $0xb,%YMM12,%YMM12 |
(1362) 0x4915dc VSUBPD %YMM5,%YMM13,%YMM5 |
(1362) 0x4915e0 VSUBPD %YMM12,%YMM3,%YMM3 |
(1362) 0x4915e5 VMULPD %YMM4,%YMM16,%YMM12 |
(1362) 0x4915eb VFMADD231PD %YMM5,%YMM24,%YMM12 |
(1362) 0x4915f1 VFMADD231PD %YMM17,%YMM3,%YMM12 |
(1362) 0x4915f7 VMOVUPD %YMM12,(%R8,%R12,8) |
(1362) 0x4915fd VMULPD 0x18(%RDI){1to4},%YMM4,%YMM13 |
(1362) 0x491604 VFMADD231PD 0x20(%RDI){1to4},%YMM5,%YMM13 |
(1362) 0x49160b VFMADD231PD 0x28(%RDI){1to4},%YMM3,%YMM13 |
(1362) 0x491612 VMOVUPD %YMM13,(%RBX,%R12,8) |
(1362) 0x491618 VMULPD 0x30(%RDI){1to4},%YMM4,%YMM4 |
(1362) 0x49161f VFMADD231PD 0x38(%RDI){1to4},%YMM5,%YMM4 |
(1362) 0x491626 VFMADD231PD 0x40(%RDI){1to4},%YMM3,%YMM4 |
(1362) 0x49162d VMULPD %YMM12,%YMM12,%YMM3 |
(1362) 0x491632 VFMADD231PD %YMM13,%YMM13,%YMM3 |
(1362) 0x491637 VMOVUPD %YMM4,(%R11,%R12,8) |
(1362) 0x49163d VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1362) 0x491642 VSQRTPD %YMM3,%YMM3 |
(1362) 0x491646 VMOVUPD %YMM3,(%RCX,%R12,8) |
(1362) 0x49164c ADD $0x4,%R14D |
(1362) 0x491650 CMP %R15D,%R14D |
(1362) 0x491653 JBE 491540 |
0x491659 MOV %R13D,%R14D |
0x49165c VMOVUPD 0x10(%RSP),%XMM2 |
0x491662 VMOVUPD 0x20(%RSP),%XMM3 |
0x491668 CMP %R13D,0x4(%RSP) |
0x49166d JE 4917b0 |
0x491673 VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 |
0x491679 VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 |
0x49167f VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 |
0x491685 ADD %R9D,%R14D |
0x491688 VMOVQ 0xb3650(%RIP),%XMM6 |
0x491690 VPBROADCASTQ 0xadfef(%RIP),%XMM7 |
0x491699 VPBROADCASTQ 0xb363e(%RIP),%XMM8 |
0x4916a2 VPBROADCASTQ 0xace55(%RIP),%XMM9 |
0x4916ab NOPL (%RAX,%RAX,1) |
(1361) 0x4916b0 MOVSXD %R14D,%R14 |
(1361) 0x4916b3 VMOVSD (%RSI,%R14,8),%XMM3 |
(1361) 0x4916b9 VMOVSD (%RDX,%R14,8),%XMM4 |
(1361) 0x4916bf VMOVHPD (%R10,%R14,8),%XMM4,%XMM4 |
(1361) 0x4916c5 VSUBSD %XMM20,%XMM3,%XMM3 |
(1361) 0x4916cb VSUBPD %XMM14,%XMM4,%XMM4 |
(1361) 0x4916d0 VSHUFPD $0x1,%XMM4,%XMM4,%XMM5 |
(1361) 0x4916d5 VMULPD %XMM5,%XMM1,%XMM5 |
(1361) 0x4916d9 VMOVDDUP %XMM3,%XMM3 |
(1361) 0x4916dd VPUNPCKHQDQ %XMM3,%XMM4,%XMM10 |
(1361) 0x4916e1 VMULPD %XMM10,%XMM22,%XMM10 |
(1361) 0x4916e7 VFMADD231PD %XMM4,%XMM0,%XMM5 |
(1361) 0x4916ec VFMADD213SD %XMM10,%XMM21,%XMM4 |
(1361) 0x4916f2 VSHUFPD $0x1,%XMM10,%XMM10,%XMM10 |
(1361) 0x4916f8 VADDSD %XMM4,%XMM10,%XMM4 |
(1361) 0x4916fc VFMADD231PD %XMM3,%XMM2,%XMM5 |
(1361) 0x491701 VMOVAPD %XMM4,%XMM3 |
(1361) 0x491705 VPTERNLOGQ $-0x28,%XMM7,%XMM6,%XMM3 |
(1361) 0x49170c VADDSD %XMM3,%XMM4,%XMM3 |
(1361) 0x491710 VROUNDSD $0xb,%XMM3,%XMM3,%XMM3 |
(1361) 0x491716 VSUBSD %XMM3,%XMM4,%XMM3 |
(1361) 0x49171a VMOVDQA %XMM8,%XMM4 |
(1361) 0x49171e VPTERNLOGQ $-0x8,%XMM9,%XMM5,%XMM4 |
(1361) 0x491725 VADDPD %XMM4,%XMM5,%XMM4 |
(1361) 0x491729 VROUNDPD $0xb,%XMM4,%XMM4 |
(1361) 0x49172f VSUBPD %XMM4,%XMM5,%XMM4 |
(1361) 0x491733 VMULPD %XMM4,%XMM26,%XMM5 |
(1361) 0x491739 VMOVAPD %XMM3,%XMM10 |
(1361) 0x49173d VFMADD213SD %XMM5,%XMM25,%XMM10 |
(1361) 0x491743 VSHUFPD $0x1,%XMM5,%XMM5,%XMM5 |
(1361) 0x491748 VADDSD %XMM5,%XMM10,%XMM5 |
(1361) 0x49174c VMOVSD %XMM5,(%R8,%R14,8) |
(1361) 0x491752 VMULSD 0x18(%RDI),%XMM3,%XMM10 |
(1361) 0x491757 VFMADD231SD 0x20(%RDI),%XMM4,%XMM10 |
(1361) 0x49175d VSHUFPD $0x1,%XMM4,%XMM4,%XMM11 |
(1361) 0x491762 VFMADD132SD 0x28(%RDI),%XMM10,%XMM11 |
(1361) 0x491768 VMOVSD %XMM11,(%RBX,%R14,8) |
(1361) 0x49176e VMULPD 0x38(%RDI),%XMM4,%XMM4 |
(1361) 0x491773 VMOVAPD %XMM4,%XMM10 |
(1361) 0x491777 VFMADD231SD 0x30(%RDI),%XMM3,%XMM10 |
(1361) 0x49177d VSHUFPD $0x1,%XMM4,%XMM4,%XMM3 |
(1361) 0x491782 VADDSD %XMM3,%XMM10,%XMM3 |
(1361) 0x491786 VMOVSD %XMM3,(%R11,%R14,8) |
(1361) 0x49178c VMULSD %XMM5,%XMM5,%XMM4 |
(1361) 0x491790 VFMADD231SD %XMM11,%XMM11,%XMM4 |
(1361) 0x491795 VFMADD231SD %XMM3,%XMM3,%XMM4 |
(1361) 0x49179a VSQRTSD %XMM4,%XMM4,%XMM3 |
(1361) 0x49179e VMOVSD %XMM3,(%RCX,%R14,8) |
(1361) 0x4917a4 INC %R14D |
(1361) 0x4917a7 CMP %R14D,%EAX |
(1361) 0x4917aa JNE 4916b0 |
0x4917b0 LEA -0x28(%RBP),%RSP |
0x4917b4 POP %RBX |
0x4917b5 POP %R12 |
0x4917b7 POP %R13 |
0x4917b9 POP %R14 |
0x4917bb POP %R15 |
0x4917bd POP %RBP |
0x4917be VZEROUPPER |
0x4917c1 RET |
0x4917c2 NOPW %CS:(%RAX,%RAX,1) |
0x4917cc NOPW %CS:(%RAX,%RAX,1) |
0x4917d6 NOPW %CS:(%RAX,%RAX,1) |
0x4917e0 NOPW %CS:(%RAX,%RAX,1) |
0x4917ea NOPW %CS:(%RAX,%RAX,1) |
0x4917f4 NOPW %CS:(%RAX,%RAX,1) |
0x4917fe XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►41.74+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:283 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►40.05+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:275 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►9.48+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:185 | exec |
○ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:242 | exec |
○ | main.extracted.113 | miniqmc.cpp:396 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►4.77+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableABOMPTarget.h:366 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►3.93+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableABOMPTarget.h:361 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 160 |
nb uops | 161 |
loop length | 989 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.17 |
Stall cycles | 0.67 |
ROB full (events) | 1.01 |
Front-end | 26.83 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 12% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 491480 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 4914c0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0xad604(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb3dda(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 4914c3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 491673 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x933> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 491673 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x933> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0xacfe1(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb37b7(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0xb3650(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0xadfef(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb363e(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xace55(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 160 |
nb uops | 161 |
loop length | 989 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.17 |
Stall cycles | 0.67 |
ROB full (events) | 1.01 |
Front-end | 26.83 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 12% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 491480 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 4914c0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0xad604(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb3dda(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 4914c3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 491673 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x933> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 491673 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x933> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0xacfe1(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb37b7(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4917b0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0xb3650(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0xadfef(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xb363e(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0xace55(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 1.79 | 1.44 |
○Loop 1363 - ParticleBConds3DSoa.h:235-256 - exec | 1.77 | 1.4 |
○Loop 1361 - ParticleBConds3DSoa.h:235-255 - exec | 0 | 0 |
○Loop 1362 - ParticleBConds3DSoa.h:235-256 - exec | 0 | 0 |