Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 2.02% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 2.02% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
0x46f3e0 PUSH %RBP |
0x46f3e1 MOV %RSP,%RBP |
0x46f3e4 PUSH %R15 |
0x46f3e6 PUSH %R14 |
0x46f3e8 PUSH %R13 |
0x46f3ea PUSH %R12 |
0x46f3ec PUSH %RBX |
0x46f3ed AND $-0x20,%RSP |
0x46f3f1 SUB $0x200,%RSP |
0x46f3f8 MOV 0x10(%RBP),%EAX |
0x46f3fb MOV %EAX,%R13D |
0x46f3fe SUB %R9D,%R13D |
0x46f401 JLE 46fe00 |
0x46f407 VMOVUPD (%RSI),%XMM14 |
0x46f40b VMOVSD 0x10(%RSI),%XMM20 |
0x46f412 MOV 0x8(%RDX),%RSI |
0x46f416 MOV 0x18(%RDX),%RDX |
0x46f41a LEA (%RDX,%RSI,8),%R10 |
0x46f41e SAL $0x4,%RSI |
0x46f422 ADD %RDX,%RSI |
0x46f425 MOV 0x8(%R8),%R11 |
0x46f429 MOV 0x18(%R8),%R8 |
0x46f42d LEA (%R8,%R11,8),%RBX |
0x46f431 SAL $0x4,%R11 |
0x46f435 ADD %R8,%R11 |
0x46f438 VMOVSD 0x48(%RDI),%XMM21 |
0x46f43f VMOVUPD 0x50(%RDI),%XMM22 |
0x46f446 VMOVSD 0x60(%RDI),%XMM23 |
0x46f44d VMOVSD 0x68(%RDI),%XMM3 |
0x46f452 VMOVSD 0x70(%RDI),%XMM2 |
0x46f457 VMOVSD 0x78(%RDI),%XMM28 |
0x46f45e VMOVSD 0x80(%RDI),%XMM30 |
0x46f465 VMOVSD 0x88(%RDI),%XMM29 |
0x46f46c VMOVSD (%RDI),%XMM25 |
0x46f472 VMOVUPD 0x8(%RDI),%XMM26 |
0x46f47c MOV %R13D,%R15D |
0x46f47f AND $-0x4,%R15D |
0x46f483 JE 46fb14 |
0x46f489 MOV %R13D,%R14D |
0x46f48c VBROADCASTSD %XMM14,%YMM31 |
0x46f492 VBROADCASTSD %XMM22,%YMM10 |
0x46f498 VBROADCASTSD %XMM26,%YMM0 |
0x46f49e VMOVUPD %YMM0,0xa0(%RSP) |
0x46f4a7 AND $-0x10,%R14D |
0x46f4ab VMOVUPD %XMM3,0x20(%RSP) |
0x46f4b1 VMOVUPD %XMM2,0x10(%RSP) |
0x46f4b7 MOV %R15,0x8(%RSP) |
0x46f4bc JE 46fb1c |
0x46f4c2 MOV %R13D,0x4(%RSP) |
0x46f4c7 LEA -0x1(%R14),%R13D |
0x46f4cb VMOVUPD %YMM14,0x100(%RSP) |
0x46f4d4 VXORPS %XMM0,%XMM0,%XMM0 |
0x46f4d8 VPERMPD $0x55,%YMM14,%YMM0 |
0x46f4de VMOVUPD %YMM0,0x180(%RSP) |
0x46f4e7 VMOVUPD %XMM20,0x90(%RSP) |
0x46f4ef VBROADCASTSD %XMM20,%YMM0 |
0x46f4f5 VMOVUPD %YMM0,0x160(%RSP) |
0x46f4fe VMOVUPD %XMM21,0x80(%RSP) |
0x46f506 VBROADCASTSD %XMM21,%YMM0 |
0x46f50c VMOVUPD %YMM0,0x140(%RSP) |
0x46f515 VMOVUPD %YMM22,0xe0(%RSP) |
0x46f51d VXORPS %XMM0,%XMM0,%XMM0 |
0x46f521 VPERMPD $0x55,%YMM22,%YMM0 |
0x46f528 VMOVUPD %YMM0,0x120(%RSP) |
0x46f531 VMOVUPD %XMM23,0x60(%RSP) |
0x46f539 VBROADCASTSD %XMM23,%YMM19 |
0x46f53f VBROADCASTSD %XMM3,%YMM20 |
0x46f545 VBROADCASTSD %XMM2,%YMM21 |
0x46f54b VMOVUPD %XMM28,0x50(%RSP) |
0x46f553 VBROADCASTSD %XMM28,%YMM22 |
0x46f559 VMOVUPD %XMM30,0x30(%RSP) |
0x46f561 VBROADCASTSD %XMM30,%YMM23 |
0x46f567 VMOVUPD %XMM29,0x40(%RSP) |
0x46f56f VBROADCASTSD %XMM29,%YMM24 |
0x46f575 VMOVUPD %XMM25,0x70(%RSP) |
0x46f57d VBROADCASTSD %XMM25,%YMM25 |
0x46f583 VMOVUPD %YMM26,0xc0(%RSP) |
0x46f58b VPERMPD $0x55,%YMM26,%YMM26 |
0x46f592 XOR %R12D,%R12D |
0x46f595 VPBROADCASTQ 0x8bca1(%RIP),%YMM27 |
0x46f59f VPBROADCASTQ 0x92417(%RIP),%YMM28 |
0x46f5a9 VMOVUPD 0xa0(%RSP),%YMM18 |
0x46f5b1 VMOVUPD %YMM31,0x1c0(%RSP) |
0x46f5b9 VMOVUPD %YMM10,0x1a0(%RSP) |
0x46f5c2 NOPW %CS:(%RAX,%RAX,1) |
(1370) 0x46f5d0 LEA (%R9,%R12,1),%R15D |
(1370) 0x46f5d4 MOVSXD %R15D,%R15 |
(1370) 0x46f5d7 VMOVUPD (%RDX,%R15,8),%YMM0 |
(1370) 0x46f5dd VMOVUPD 0x20(%RDX,%R15,8),%YMM1 |
(1370) 0x46f5e4 VMOVUPD 0x40(%RDX,%R15,8),%YMM2 |
(1370) 0x46f5eb VMOVUPD 0x60(%RDX,%R15,8),%YMM4 |
(1370) 0x46f5f2 VSUBPD %YMM31,%YMM0,%YMM3 |
(1370) 0x46f5f8 VSUBPD %YMM31,%YMM1,%YMM1 |
(1370) 0x46f5fe VSUBPD %YMM31,%YMM2,%YMM2 |
(1370) 0x46f604 VSUBPD %YMM31,%YMM4,%YMM0 |
(1370) 0x46f60a VMOVUPD (%R10,%R15,8),%YMM5 |
(1370) 0x46f610 VMOVUPD 0x20(%R10,%R15,8),%YMM6 |
(1370) 0x46f617 VMOVUPD 0x40(%R10,%R15,8),%YMM7 |
(1370) 0x46f61e VMOVUPD 0x60(%R10,%R15,8),%YMM4 |
(1370) 0x46f625 VMOVUPD 0x180(%RSP),%YMM11 |
(1370) 0x46f62e VSUBPD %YMM11,%YMM4,%YMM4 |
(1370) 0x46f633 VSUBPD %YMM11,%YMM7,%YMM12 |
(1370) 0x46f638 VSUBPD %YMM11,%YMM6,%YMM14 |
(1370) 0x46f63d VMOVUPD (%RSI,%R15,8),%YMM6 |
(1370) 0x46f643 VMOVUPD 0x20(%RSI,%R15,8),%YMM7 |
(1370) 0x46f64a VMOVUPD 0x40(%RSI,%R15,8),%YMM8 |
(1370) 0x46f651 VMOVUPD 0x60(%RSI,%R15,8),%YMM9 |
(1370) 0x46f658 VSUBPD %YMM11,%YMM5,%YMM15 |
(1370) 0x46f65d VMOVUPD 0x160(%RSP),%YMM11 |
(1370) 0x46f666 VSUBPD %YMM11,%YMM6,%YMM5 |
(1370) 0x46f66b VSUBPD %YMM11,%YMM7,%YMM13 |
(1370) 0x46f670 VMOVUPD 0x140(%RSP),%YMM6 |
(1370) 0x46f679 VMULPD %YMM0,%YMM6,%YMM7 |
(1370) 0x46f67d VMULPD %YMM2,%YMM6,%YMM30 |
(1370) 0x46f683 VMULPD %YMM1,%YMM6,%YMM31 |
(1370) 0x46f689 VSUBPD %YMM11,%YMM8,%YMM16 |
(1370) 0x46f68f VMULPD %YMM3,%YMM6,%YMM29 |
(1370) 0x46f695 VFMADD231PD %YMM15,%YMM10,%YMM29 |
(1370) 0x46f69b VFMADD231PD %YMM14,%YMM10,%YMM31 |
(1370) 0x46f6a1 VFMADD231PD %YMM12,%YMM10,%YMM30 |
(1370) 0x46f6a7 VFMADD231PD %YMM4,%YMM10,%YMM7 |
(1370) 0x46f6ac VSUBPD %YMM11,%YMM9,%YMM17 |
(1370) 0x46f6b2 VMOVUPD 0x120(%RSP),%YMM10 |
(1370) 0x46f6bb VFMADD231PD %YMM17,%YMM10,%YMM7 |
(1370) 0x46f6c1 VFMADD231PD %YMM16,%YMM10,%YMM30 |
(1370) 0x46f6c7 VMULPD %YMM3,%YMM19,%YMM11 |
(1370) 0x46f6cd VMULPD %YMM1,%YMM19,%YMM8 |
(1370) 0x46f6d3 VMULPD %YMM2,%YMM19,%YMM9 |
(1370) 0x46f6d9 VFMADD231PD %YMM13,%YMM10,%YMM31 |
(1370) 0x46f6df VMULPD %YMM0,%YMM19,%YMM6 |
(1370) 0x46f6e5 VFMADD231PD %YMM4,%YMM20,%YMM6 |
(1370) 0x46f6eb VFMADD231PD %YMM12,%YMM20,%YMM9 |
(1370) 0x46f6f1 VFMADD231PD %YMM14,%YMM20,%YMM8 |
(1370) 0x46f6f7 VFMADD231PD %YMM15,%YMM20,%YMM11 |
(1370) 0x46f6fd VFMADD231PD %YMM5,%YMM10,%YMM29 |
(1370) 0x46f703 VFMADD231PD %YMM5,%YMM21,%YMM11 |
(1370) 0x46f709 VFMADD231PD %YMM13,%YMM21,%YMM8 |
(1370) 0x46f70f VMULPD %YMM0,%YMM22,%YMM0 |
(1370) 0x46f715 VMULPD %YMM2,%YMM22,%YMM2 |
(1370) 0x46f71b VMULPD %YMM1,%YMM22,%YMM1 |
(1370) 0x46f721 VFMADD231PD %YMM16,%YMM21,%YMM9 |
(1370) 0x46f727 VMULPD %YMM3,%YMM22,%YMM10 |
(1370) 0x46f72d VFMADD231PD %YMM15,%YMM23,%YMM10 |
(1370) 0x46f733 VFMADD231PD %YMM14,%YMM23,%YMM1 |
(1370) 0x46f739 VFMADD231PD %YMM12,%YMM23,%YMM2 |
(1370) 0x46f73f VFMADD231PD %YMM4,%YMM23,%YMM0 |
(1370) 0x46f745 VFMADD231PD %YMM17,%YMM21,%YMM6 |
(1370) 0x46f74b VFMADD231PD %YMM17,%YMM24,%YMM0 |
(1370) 0x46f751 VFMADD231PD %YMM16,%YMM24,%YMM2 |
(1370) 0x46f757 VFMADD231PD %YMM13,%YMM24,%YMM1 |
(1370) 0x46f75d VFMADD231PD %YMM5,%YMM24,%YMM10 |
(1370) 0x46f763 VMOVDQA64 %YMM28,%YMM3 |
(1370) 0x46f769 VPTERNLOGQ $-0x8,%YMM27,%YMM29,%YMM3 |
(1370) 0x46f770 VADDPD %YMM3,%YMM29,%YMM3 |
(1370) 0x46f776 VROUNDPD $0xb,%YMM3,%YMM3 |
(1370) 0x46f77c VMOVDQA64 %YMM28,%YMM4 |
(1370) 0x46f782 VPTERNLOGQ $-0x8,%YMM27,%YMM31,%YMM4 |
(1370) 0x46f789 VADDPD %YMM4,%YMM31,%YMM4 |
(1370) 0x46f78f VMOVDQA64 %YMM28,%YMM5 |
(1370) 0x46f795 VROUNDPD $0xb,%YMM4,%YMM4 |
(1370) 0x46f79b VPTERNLOGQ $-0x8,%YMM27,%YMM30,%YMM5 |
(1370) 0x46f7a2 VADDPD %YMM5,%YMM30,%YMM5 |
(1370) 0x46f7a8 VMOVDQA64 %YMM28,%YMM12 |
(1370) 0x46f7ae VPTERNLOGQ $-0x8,%YMM27,%YMM7,%YMM12 |
(1370) 0x46f7b5 VADDPD %YMM7,%YMM12,%YMM12 |
(1370) 0x46f7b9 VROUNDPD $0xb,%YMM5,%YMM5 |
(1370) 0x46f7bf VROUNDPD $0xb,%YMM12,%YMM12 |
(1370) 0x46f7c5 VMOVDQA64 %YMM28,%YMM13 |
(1370) 0x46f7cb VPTERNLOGQ $-0x8,%YMM27,%YMM6,%YMM13 |
(1370) 0x46f7d2 VADDPD %YMM6,%YMM13,%YMM13 |
(1370) 0x46f7d6 VMOVDQA64 %YMM28,%YMM14 |
(1370) 0x46f7dc VROUNDPD $0xb,%YMM13,%YMM13 |
(1370) 0x46f7e2 VPTERNLOGQ $-0x8,%YMM27,%YMM9,%YMM14 |
(1370) 0x46f7e9 VADDPD %YMM14,%YMM9,%YMM14 |
(1370) 0x46f7ee VMOVDQA64 %YMM28,%YMM15 |
(1370) 0x46f7f4 VPTERNLOGQ $-0x8,%YMM27,%YMM8,%YMM15 |
(1370) 0x46f7fb VADDPD %YMM15,%YMM8,%YMM15 |
(1370) 0x46f800 VROUNDPD $0xb,%YMM14,%YMM14 |
(1370) 0x46f806 VROUNDPD $0xb,%YMM15,%YMM15 |
(1370) 0x46f80c VMOVDQA64 %YMM28,%YMM16 |
(1370) 0x46f812 VPTERNLOGQ $-0x8,%YMM27,%YMM11,%YMM16 |
(1370) 0x46f819 VADDPD %YMM16,%YMM11,%YMM16 |
(1370) 0x46f81f VRNDSCALEPD $0xb,%YMM16,%YMM16 |
(1370) 0x46f826 VSUBPD %YMM12,%YMM7,%YMM7 |
(1370) 0x46f82b VMOVDQA64 %YMM28,%YMM12 |
(1370) 0x46f831 VPTERNLOGQ $-0x8,%YMM27,%YMM10,%YMM12 |
(1370) 0x46f838 VADDPD %YMM12,%YMM10,%YMM12 |
(1370) 0x46f83d VRNDSCALEPD $0xb,%YMM12,%YMM17 |
(1370) 0x46f844 VSUBPD %YMM5,%YMM30,%YMM30 |
(1370) 0x46f84a VSUBPD %YMM4,%YMM31,%YMM31 |
(1370) 0x46f850 VMOVDQA64 %YMM28,%YMM4 |
(1370) 0x46f856 VPTERNLOGQ $-0x8,%YMM27,%YMM1,%YMM4 |
(1370) 0x46f85d VADDPD %YMM4,%YMM1,%YMM4 |
(1370) 0x46f861 VROUNDPD $0xb,%YMM4,%YMM12 |
(1370) 0x46f867 VSUBPD %YMM3,%YMM29,%YMM4 |
(1370) 0x46f86d VSUBPD %YMM16,%YMM11,%YMM11 |
(1370) 0x46f873 VMOVDQA64 %YMM28,%YMM3 |
(1370) 0x46f879 VPTERNLOGQ $-0x8,%YMM27,%YMM2,%YMM3 |
(1370) 0x46f880 VADDPD %YMM3,%YMM2,%YMM3 |
(1370) 0x46f884 VRNDSCALEPD $0xb,%YMM3,%YMM16 |
(1370) 0x46f88b VSUBPD %YMM15,%YMM8,%YMM29 |
(1370) 0x46f891 VSUBPD %YMM14,%YMM9,%YMM5 |
(1370) 0x46f896 VMOVDQA64 %YMM28,%YMM3 |
(1370) 0x46f89c VPTERNLOGQ $-0x8,%YMM27,%YMM0,%YMM3 |
(1370) 0x46f8a3 VADDPD %YMM3,%YMM0,%YMM3 |
(1370) 0x46f8a7 VROUNDPD $0xb,%YMM3,%YMM8 |
(1370) 0x46f8ad VSUBPD %YMM13,%YMM6,%YMM3 |
(1370) 0x46f8b2 VSUBPD %YMM8,%YMM0,%YMM9 |
(1370) 0x46f8b7 VSUBPD %YMM16,%YMM2,%YMM8 |
(1370) 0x46f8bd VMULPD %YMM4,%YMM25,%YMM0 |
(1370) 0x46f8c3 VMULPD %YMM31,%YMM25,%YMM2 |
(1370) 0x46f8c9 VMULPD %YMM30,%YMM25,%YMM6 |
(1370) 0x46f8cf VSUBPD %YMM12,%YMM1,%YMM12 |
(1370) 0x46f8d4 VMULPD %YMM7,%YMM25,%YMM1 |
(1370) 0x46f8da VFMADD231PD %YMM3,%YMM18,%YMM1 |
(1370) 0x46f8e0 VFMADD231PD %YMM5,%YMM18,%YMM6 |
(1370) 0x46f8e6 VFMADD231PD %YMM29,%YMM18,%YMM2 |
(1370) 0x46f8ec VFMADD231PD %YMM11,%YMM18,%YMM0 |
(1370) 0x46f8f2 VSUBPD %YMM17,%YMM10,%YMM13 |
(1370) 0x46f8f8 VFMADD231PD %YMM26,%YMM13,%YMM0 |
(1370) 0x46f8fe VFMADD231PD %YMM26,%YMM12,%YMM2 |
(1370) 0x46f904 VFMADD231PD %YMM26,%YMM8,%YMM6 |
(1370) 0x46f90a VFMADD231PD %YMM26,%YMM9,%YMM1 |
(1370) 0x46f910 VMOVUPD %YMM1,0x60(%R8,%R15,8) |
(1370) 0x46f917 VMOVUPD %YMM6,0x40(%R8,%R15,8) |
(1370) 0x46f91e VMOVUPD %YMM2,0x20(%R8,%R15,8) |
(1370) 0x46f925 VMOVUPD %YMM0,(%R8,%R15,8) |
(1370) 0x46f92b VBROADCASTSD 0x18(%RDI),%YMM14 |
(1370) 0x46f931 VMULPD %YMM7,%YMM14,%YMM10 |
(1370) 0x46f935 VMULPD %YMM4,%YMM14,%YMM15 |
(1370) 0x46f939 VMULPD %YMM31,%YMM14,%YMM16 |
(1370) 0x46f93f VBROADCASTSD 0x20(%RDI),%YMM17 |
(1370) 0x46f946 VMULPD %YMM30,%YMM14,%YMM14 |
(1370) 0x46f94c VFMADD231PD %YMM5,%YMM17,%YMM14 |
(1370) 0x46f952 VFMADD231PD %YMM29,%YMM17,%YMM16 |
(1370) 0x46f958 VFMADD231PD %YMM11,%YMM17,%YMM15 |
(1370) 0x46f95e VFMADD231PD %YMM17,%YMM3,%YMM10 |
(1370) 0x46f964 VBROADCASTSD 0x28(%RDI),%YMM17 |
(1370) 0x46f96b VFMADD231PD %YMM9,%YMM17,%YMM10 |
(1370) 0x46f971 VFMADD231PD %YMM12,%YMM17,%YMM16 |
(1370) 0x46f977 VFMADD231PD %YMM17,%YMM8,%YMM14 |
(1370) 0x46f97d VMOVUPD %YMM14,0x40(%RBX,%R15,8) |
(1370) 0x46f984 VMOVUPD %YMM16,0x20(%RBX,%R15,8) |
(1370) 0x46f98c VFMADD231PD %YMM13,%YMM17,%YMM15 |
(1370) 0x46f992 VMOVUPD %YMM15,(%RBX,%R15,8) |
(1370) 0x46f998 VMOVUPD %YMM10,0x60(%RBX,%R15,8) |
(1370) 0x46f99f VBROADCASTSD 0x30(%RDI),%YMM17 |
(1370) 0x46f9a6 VMULPD %YMM7,%YMM17,%YMM7 |
(1370) 0x46f9ac VMULPD %YMM4,%YMM17,%YMM4 |
(1370) 0x46f9b2 VMULPD %YMM31,%YMM17,%YMM31 |
(1370) 0x46f9b8 VMULPD %YMM30,%YMM17,%YMM17 |
(1370) 0x46f9be VBROADCASTSD 0x38(%RDI),%YMM30 |
(1370) 0x46f9c5 VFMADD231PD %YMM5,%YMM30,%YMM17 |
(1370) 0x46f9cb VFMADD231PD %YMM29,%YMM30,%YMM31 |
(1370) 0x46f9d1 VFMADD231PD %YMM11,%YMM30,%YMM4 |
(1370) 0x46f9d7 VFMADD231PD %YMM3,%YMM30,%YMM7 |
(1370) 0x46f9dd VBROADCASTSD 0x40(%RDI),%YMM3 |
(1370) 0x46f9e3 VFMADD231PD %YMM9,%YMM3,%YMM7 |
(1370) 0x46f9e8 VFMADD231PD %YMM13,%YMM3,%YMM4 |
(1370) 0x46f9ed VFMADD231PD %YMM12,%YMM3,%YMM31 |
(1370) 0x46f9f3 VFMADD231PD %YMM8,%YMM3,%YMM17 |
(1370) 0x46f9f9 VMULPD %YMM2,%YMM2,%YMM2 |
(1370) 0x46f9fd VFMADD231PD %YMM16,%YMM16,%YMM2 |
(1370) 0x46fa03 VMULPD %YMM0,%YMM0,%YMM0 |
(1370) 0x46fa07 VFMADD231PD %YMM15,%YMM15,%YMM0 |
(1370) 0x46fa0c VMULPD %YMM1,%YMM1,%YMM1 |
(1370) 0x46fa10 VFMADD231PD %YMM10,%YMM10,%YMM1 |
(1370) 0x46fa15 VMOVUPD 0x1a0(%RSP),%YMM10 |
(1370) 0x46fa1e VMULPD %YMM6,%YMM6,%YMM3 |
(1370) 0x46fa22 VFMADD231PD %YMM14,%YMM14,%YMM3 |
(1370) 0x46fa27 VMOVUPD %YMM17,0x40(%R11,%R15,8) |
(1370) 0x46fa2f VFMADD231PD %YMM17,%YMM17,%YMM3 |
(1370) 0x46fa35 VMOVUPD %YMM7,0x60(%R11,%R15,8) |
(1370) 0x46fa3c VFMADD231PD %YMM7,%YMM7,%YMM1 |
(1370) 0x46fa41 VMOVUPD %YMM4,(%R11,%R15,8) |
(1370) 0x46fa47 VFMADD231PD %YMM4,%YMM4,%YMM0 |
(1370) 0x46fa4c VMOVUPD %YMM31,0x20(%R11,%R15,8) |
(1370) 0x46fa54 VSQRTPD %YMM3,%YMM3 |
(1370) 0x46fa58 VMOVUPD %YMM3,0x40(%RCX,%R15,8) |
(1370) 0x46fa5f VSQRTPD %YMM1,%YMM1 |
(1370) 0x46fa63 VMOVUPD %YMM1,0x60(%RCX,%R15,8) |
(1370) 0x46fa6a VSQRTPD %YMM0,%YMM0 |
(1370) 0x46fa6e VMOVUPD %YMM0,(%RCX,%R15,8) |
(1370) 0x46fa74 VFMADD231PD %YMM31,%YMM31,%YMM2 |
(1370) 0x46fa7a VMOVUPD 0x1c0(%RSP),%YMM31 |
(1370) 0x46fa82 VSQRTPD %YMM2,%YMM0 |
(1370) 0x46fa86 VMOVUPD %YMM0,0x20(%RCX,%R15,8) |
(1370) 0x46fa8d ADD $0x10,%R12D |
(1370) 0x46fa91 CMP %R13D,%R12D |
(1370) 0x46fa94 JBE 46f5d0 |
0x46fa9a MOV 0x8(%RSP),%R15 |
0x46fa9f CMP %R14D,%R15D |
0x46faa2 VMOVUPD 0x100(%RSP),%YMM14 |
0x46faab VMOVUPD 0x90(%RSP),%XMM20 |
0x46fab3 VMOVUPD 0x80(%RSP),%XMM21 |
0x46fabb VMOVUPD 0xe0(%RSP),%YMM22 |
0x46fac3 VMOVUPD 0x70(%RSP),%XMM25 |
0x46facb VMOVUPD 0xc0(%RSP),%YMM26 |
0x46fad3 MOV 0x4(%RSP),%R13D |
0x46fad8 VMOVUPD 0x60(%RSP),%XMM23 |
0x46fae0 VMOVUPD 0x20(%RSP),%XMM3 |
0x46fae6 VMOVUPD 0x10(%RSP),%XMM2 |
0x46faec VMOVUPD 0x50(%RSP),%XMM28 |
0x46faf4 VMOVUPD 0x40(%RSP),%XMM29 |
0x46fafc VMOVUPD 0x30(%RSP),%XMM30 |
0x46fb04 JNE 46fb1f |
0x46fb06 CMP %R15D,%R13D |
0x46fb09 JNE 46fcc6 |
0x46fb0f JMP 46fe00 |
0x46fb14 XOR %R14D,%R14D |
0x46fb17 JMP 46fcc6 |
0x46fb1c XOR %R14D,%R14D |
0x46fb1f DEC %R15D |
0x46fb22 VPERMPD $0x55,%YMM14,%YMM6 |
0x46fb28 VBROADCASTSD %XMM20,%YMM1 |
0x46fb2e VBROADCASTSD %XMM21,%YMM7 |
0x46fb34 VPERMPD $0x55,%YMM22,%YMM8 |
0x46fb3b VBROADCASTSD %XMM23,%YMM9 |
0x46fb41 VBROADCASTSD %XMM3,%YMM0 |
0x46fb46 VBROADCASTSD %XMM2,%YMM2 |
0x46fb4b VBROADCASTSD %XMM28,%YMM27 |
0x46fb51 VBROADCASTSD %XMM30,%YMM11 |
0x46fb57 VBROADCASTSD %XMM29,%YMM15 |
0x46fb5d VBROADCASTSD %XMM25,%YMM16 |
0x46fb63 VPXORD %XMM17,%XMM17,%XMM17 |
0x46fb69 VPERMPD $0x55,%YMM26,%YMM17 |
0x46fb70 VPBROADCASTQ 0x8b6c6(%RIP),%YMM18 |
0x46fb7a VPBROADCASTQ 0x91e3c(%RIP),%YMM19 |
0x46fb84 VMOVUPD 0xa0(%RSP),%YMM24 |
0x46fb8c NOPL (%RAX) |
(1369) 0x46fb90 LEA (%R9,%R14,1),%R12D |
(1369) 0x46fb94 MOVSXD %R12D,%R12 |
(1369) 0x46fb97 VMOVUPD (%RDX,%R12,8),%YMM3 |
(1369) 0x46fb9d VSUBPD %YMM31,%YMM3,%YMM3 |
(1369) 0x46fba3 VMOVUPD (%R10,%R12,8),%YMM4 |
(1369) 0x46fba9 VMOVUPD (%RSI,%R12,8),%YMM5 |
(1369) 0x46fbaf VSUBPD %YMM6,%YMM4,%YMM4 |
(1369) 0x46fbb3 VSUBPD %YMM1,%YMM5,%YMM5 |
(1369) 0x46fbb7 VMULPD %YMM3,%YMM7,%YMM12 |
(1369) 0x46fbbb VFMADD231PD %YMM4,%YMM10,%YMM12 |
(1369) 0x46fbc0 VMULPD %YMM3,%YMM9,%YMM13 |
(1369) 0x46fbc4 VFMADD231PD %YMM4,%YMM0,%YMM13 |
(1369) 0x46fbc9 VFMADD231PD %YMM5,%YMM8,%YMM12 |
(1369) 0x46fbce VFMADD231PD %YMM5,%YMM2,%YMM13 |
(1369) 0x46fbd3 VMULPD %YMM3,%YMM27,%YMM3 |
(1369) 0x46fbd9 VFMADD231PD %YMM4,%YMM11,%YMM3 |
(1369) 0x46fbde VMOVDQA64 %YMM19,%YMM4 |
(1369) 0x46fbe4 VPTERNLOGQ $-0x8,%YMM18,%YMM12,%YMM4 |
(1369) 0x46fbeb VADDPD %YMM4,%YMM12,%YMM4 |
(1369) 0x46fbef VFMADD231PD %YMM5,%YMM15,%YMM3 |
(1369) 0x46fbf4 VROUNDPD $0xb,%YMM4,%YMM4 |
(1369) 0x46fbfa VMOVDQA64 %YMM19,%YMM5 |
(1369) 0x46fc00 VPTERNLOGQ $-0x8,%YMM18,%YMM13,%YMM5 |
(1369) 0x46fc07 VADDPD %YMM5,%YMM13,%YMM5 |
(1369) 0x46fc0b VROUNDPD $0xb,%YMM5,%YMM5 |
(1369) 0x46fc11 VSUBPD %YMM4,%YMM12,%YMM4 |
(1369) 0x46fc15 VMOVDQA64 %YMM19,%YMM12 |
(1369) 0x46fc1b VPTERNLOGQ $-0x8,%YMM18,%YMM3,%YMM12 |
(1369) 0x46fc22 VADDPD %YMM3,%YMM12,%YMM12 |
(1369) 0x46fc26 VROUNDPD $0xb,%YMM12,%YMM12 |
(1369) 0x46fc2c VSUBPD %YMM5,%YMM13,%YMM5 |
(1369) 0x46fc30 VSUBPD %YMM12,%YMM3,%YMM3 |
(1369) 0x46fc35 VMULPD %YMM4,%YMM16,%YMM12 |
(1369) 0x46fc3b VFMADD231PD %YMM5,%YMM24,%YMM12 |
(1369) 0x46fc41 VFMADD231PD %YMM17,%YMM3,%YMM12 |
(1369) 0x46fc47 VMOVUPD %YMM12,(%R8,%R12,8) |
(1369) 0x46fc4d VMULPD 0x18(%RDI){1to4},%YMM4,%YMM13 |
(1369) 0x46fc54 VFMADD231PD 0x20(%RDI){1to4},%YMM5,%YMM13 |
(1369) 0x46fc5b VFMADD231PD 0x28(%RDI){1to4},%YMM3,%YMM13 |
(1369) 0x46fc62 VMOVUPD %YMM13,(%RBX,%R12,8) |
(1369) 0x46fc68 VMULPD 0x30(%RDI){1to4},%YMM4,%YMM4 |
(1369) 0x46fc6f VFMADD231PD 0x38(%RDI){1to4},%YMM5,%YMM4 |
(1369) 0x46fc76 VFMADD231PD 0x40(%RDI){1to4},%YMM3,%YMM4 |
(1369) 0x46fc7d VMULPD %YMM12,%YMM12,%YMM3 |
(1369) 0x46fc82 VFMADD231PD %YMM13,%YMM13,%YMM3 |
(1369) 0x46fc87 VMOVUPD %YMM4,(%R11,%R12,8) |
(1369) 0x46fc8d VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1369) 0x46fc92 VSQRTPD %YMM3,%YMM3 |
(1369) 0x46fc96 VMOVUPD %YMM3,(%RCX,%R12,8) |
(1369) 0x46fc9c ADD $0x4,%R14D |
(1369) 0x46fca0 CMP %R15D,%R14D |
(1369) 0x46fca3 JBE 46fb90 |
0x46fca9 MOV 0x8(%RSP),%R15 |
0x46fcae MOV %R15D,%R14D |
0x46fcb1 VMOVUPD 0x10(%RSP),%XMM2 |
0x46fcb7 VMOVUPD 0x20(%RSP),%XMM3 |
0x46fcbd CMP %R15D,%R13D |
0x46fcc0 JE 46fe00 |
0x46fcc6 VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 |
0x46fccc VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 |
0x46fcd2 VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 |
0x46fcd8 ADD %R9D,%R14D |
0x46fcdb VMOVQ 0x91cdd(%RIP),%XMM6 |
0x46fce3 VPBROADCASTQ 0x8c6dc(%RIP),%XMM7 |
0x46fcec VPBROADCASTQ 0x91ccb(%RIP),%XMM8 |
0x46fcf5 VPBROADCASTQ 0x8b542(%RIP),%XMM9 |
0x46fcfe XCHG %AX,%AX |
(1368) 0x46fd00 MOVSXD %R14D,%R14 |
(1368) 0x46fd03 VMOVSD (%RSI,%R14,8),%XMM3 |
(1368) 0x46fd09 VMOVSD (%RDX,%R14,8),%XMM4 |
(1368) 0x46fd0f VMOVHPD (%R10,%R14,8),%XMM4,%XMM4 |
(1368) 0x46fd15 VSUBSD %XMM20,%XMM3,%XMM3 |
(1368) 0x46fd1b VSUBPD %XMM14,%XMM4,%XMM4 |
(1368) 0x46fd20 VSHUFPD $0x1,%XMM4,%XMM4,%XMM5 |
(1368) 0x46fd25 VMULPD %XMM5,%XMM1,%XMM5 |
(1368) 0x46fd29 VMOVDDUP %XMM3,%XMM3 |
(1368) 0x46fd2d VPUNPCKHQDQ %XMM3,%XMM4,%XMM10 |
(1368) 0x46fd31 VMULPD %XMM10,%XMM22,%XMM10 |
(1368) 0x46fd37 VFMADD231PD %XMM4,%XMM0,%XMM5 |
(1368) 0x46fd3c VFMADD213SD %XMM10,%XMM21,%XMM4 |
(1368) 0x46fd42 VSHUFPD $0x1,%XMM10,%XMM10,%XMM10 |
(1368) 0x46fd48 VADDSD %XMM4,%XMM10,%XMM4 |
(1368) 0x46fd4c VFMADD231PD %XMM3,%XMM2,%XMM5 |
(1368) 0x46fd51 VMOVAPD %XMM4,%XMM3 |
(1368) 0x46fd55 VPTERNLOGQ $-0x28,%XMM7,%XMM6,%XMM3 |
(1368) 0x46fd5c VADDSD %XMM3,%XMM4,%XMM3 |
(1368) 0x46fd60 VROUNDSD $0xb,%XMM3,%XMM3,%XMM3 |
(1368) 0x46fd66 VSUBSD %XMM3,%XMM4,%XMM3 |
(1368) 0x46fd6a VMOVDQA %XMM8,%XMM4 |
(1368) 0x46fd6e VPTERNLOGQ $-0x8,%XMM9,%XMM5,%XMM4 |
(1368) 0x46fd75 VADDPD %XMM4,%XMM5,%XMM4 |
(1368) 0x46fd79 VROUNDPD $0xb,%XMM4,%XMM4 |
(1368) 0x46fd7f VSUBPD %XMM4,%XMM5,%XMM4 |
(1368) 0x46fd83 VMULPD %XMM4,%XMM26,%XMM5 |
(1368) 0x46fd89 VMOVAPD %XMM3,%XMM10 |
(1368) 0x46fd8d VFMADD213SD %XMM5,%XMM25,%XMM10 |
(1368) 0x46fd93 VSHUFPD $0x1,%XMM5,%XMM5,%XMM5 |
(1368) 0x46fd98 VADDSD %XMM5,%XMM10,%XMM5 |
(1368) 0x46fd9c VMOVSD %XMM5,(%R8,%R14,8) |
(1368) 0x46fda2 VMULSD 0x18(%RDI),%XMM3,%XMM10 |
(1368) 0x46fda7 VFMADD231SD 0x20(%RDI),%XMM4,%XMM10 |
(1368) 0x46fdad VSHUFPD $0x1,%XMM4,%XMM4,%XMM11 |
(1368) 0x46fdb2 VFMADD132SD 0x28(%RDI),%XMM10,%XMM11 |
(1368) 0x46fdb8 VMOVSD %XMM11,(%RBX,%R14,8) |
(1368) 0x46fdbe VMULPD 0x38(%RDI),%XMM4,%XMM4 |
(1368) 0x46fdc3 VMOVAPD %XMM4,%XMM10 |
(1368) 0x46fdc7 VFMADD231SD 0x30(%RDI),%XMM3,%XMM10 |
(1368) 0x46fdcd VSHUFPD $0x1,%XMM4,%XMM4,%XMM3 |
(1368) 0x46fdd2 VADDSD %XMM3,%XMM10,%XMM3 |
(1368) 0x46fdd6 VMOVSD %XMM3,(%R11,%R14,8) |
(1368) 0x46fddc VMULSD %XMM5,%XMM5,%XMM4 |
(1368) 0x46fde0 VFMADD231SD %XMM11,%XMM11,%XMM4 |
(1368) 0x46fde5 VFMADD231SD %XMM3,%XMM3,%XMM4 |
(1368) 0x46fdea VSQRTSD %XMM4,%XMM4,%XMM3 |
(1368) 0x46fdee VMOVSD %XMM3,(%RCX,%R14,8) |
(1368) 0x46fdf4 INC %R14D |
(1368) 0x46fdf7 CMP %R14D,%EAX |
(1368) 0x46fdfa JNE 46fd00 |
0x46fe00 LEA -0x28(%RBP),%RSP |
0x46fe04 POP %RBX |
0x46fe05 POP %R12 |
0x46fe07 POP %R13 |
0x46fe09 POP %R14 |
0x46fe0b POP %R15 |
0x46fe0d POP %RBP |
0x46fe0e VZEROUPPER |
0x46fe11 RET |
0x46fe12 NOPW %CS:(%RAX,%RAX,1) |
0x46fe1c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►42.12+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:283 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►40.98+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:275 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►8.42+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableAAOMPTarget.h:185 | exec |
○ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:242 | exec |
○ | main.extracted.113 | miniqmc.cpp:396 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►4.52+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableABOMPTarget.h:366 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►3.94+ | qmcplusplus::SoaDistanceTableA[...] | SoaDistanceTableABOMPTarget.h:361 | exec |
○ | qmcplusplus::ParticleSet::comp[...] | ParticleSet.cpp:343 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 151 |
nb uops | 152 |
loop length | 861 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 25.33 cycles |
front end | 25.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.16 |
ROB full (events) | 3.06 |
Front-end | 25.33 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 7% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 46% |
load | 48% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 22% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 46fb14 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x734> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 46fb1c <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x8bca1(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x92417(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x4(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 46fb1f <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 46fcc6 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 46fcc6 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DEC %R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x8b6c6(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x91e3c(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x91cdd(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x8c6dc(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x91ccb(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8b542(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 151 |
nb uops | 152 |
loop length | 861 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 25.33 cycles |
front end | 25.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.16 |
ROB full (events) | 3.06 |
Front-end | 25.33 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 7% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 46% |
load | 48% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 22% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 46fb14 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x734> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 46fb1c <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x8bca1(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x92417(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x4(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 46fb1f <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 46fcc6 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 46fcc6 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DEC %R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x8b6c6(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x91e3c(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 46fe00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa20> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x91cdd(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x8c6dc(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x91ccb(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8b542(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 2.02 | 1.62 |
○Loop 1370 - ParticleBConds3DSoa.h:235-256 - exec | 2 | 1.57 |
○Loop 1369 - ParticleBConds3DSoa.h:235-256 - exec | 0 | 0 |
○Loop 1368 - ParticleBConds3DSoa.h:235-255 - exec | 0 | 0 |