Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.77% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 1.77% |
---|
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x4632c0 PUSH %RBP |
0x4632c1 MOV %RSP,%RBP |
0x4632c4 PUSH %R15 |
0x4632c6 PUSH %R14 |
0x4632c8 PUSH %R13 |
0x4632ca PUSH %R12 |
0x4632cc PUSH %RBX |
0x4632cd AND $-0x20,%RSP |
0x4632d1 SUB $0x200,%RSP |
0x4632d8 MOV 0x10(%RBP),%EAX |
0x4632db MOV %EAX,%R14D |
0x4632de SUB %R9D,%R14D |
0x4632e1 JLE 463cd0 |
0x4632e7 VMOVUPD (%RSI),%XMM14 |
0x4632eb VMOVSD 0x10(%RSI),%XMM20 |
0x4632f2 MOV 0x8(%RDX),%RSI |
0x4632f6 MOV 0x18(%RDX),%RDX |
0x4632fa LEA (%RDX,%RSI,8),%R10 |
0x4632fe SAL $0x4,%RSI |
0x463302 ADD %RDX,%RSI |
0x463305 MOV 0x8(%R8),%R11 |
0x463309 MOV 0x18(%R8),%R8 |
0x46330d LEA (%R8,%R11,8),%RBX |
0x463311 SAL $0x4,%R11 |
0x463315 ADD %R8,%R11 |
0x463318 VMOVSD 0x48(%RDI),%XMM21 |
0x46331f VMOVUPD 0x50(%RDI),%XMM22 |
0x463326 VMOVSD 0x60(%RDI),%XMM23 |
0x46332d VMOVSD 0x68(%RDI),%XMM3 |
0x463332 VMOVSD 0x70(%RDI),%XMM2 |
0x463337 VMOVSD 0x78(%RDI),%XMM28 |
0x46333e VMOVSD 0x80(%RDI),%XMM30 |
0x463345 VMOVSD 0x88(%RDI),%XMM29 |
0x46334c VMOVSD (%RDI),%XMM25 |
0x463352 VMOVUPD 0x8(%RDI),%XMM26 |
0x46335c MOV %R14D,%R13D |
0x46335f AND $-0x4,%R13D |
0x463363 JE 4639e1 |
0x463369 MOV %R14D,0x4(%RSP) |
0x46336e VBROADCASTSD %XMM14,%YMM31 |
0x463374 VBROADCASTSD %XMM22,%YMM10 |
0x46337a VBROADCASTSD %XMM26,%YMM0 |
0x463380 VMOVUPD %YMM0,0xa0(%RSP) |
0x463389 AND $-0x10,%R14D |
0x46338d VMOVUPD %XMM3,0x20(%RSP) |
0x463393 VMOVUPD %XMM2,0x10(%RSP) |
0x463399 JE 4639e9 |
0x46339f MOV %R13,0x8(%RSP) |
0x4633a4 LEA -0x1(%R14),%R12D |
0x4633a8 VMOVUPD %YMM14,0x100(%RSP) |
0x4633b1 VXORPS %XMM0,%XMM0,%XMM0 |
0x4633b5 VPERMPD $0x55,%YMM14,%YMM0 |
0x4633bb VMOVUPD %YMM0,0x180(%RSP) |
0x4633c4 VMOVUPD %XMM20,0x90(%RSP) |
0x4633cc VBROADCASTSD %XMM20,%YMM0 |
0x4633d2 VMOVUPD %YMM0,0x160(%RSP) |
0x4633db VMOVUPD %XMM21,0x80(%RSP) |
0x4633e3 VBROADCASTSD %XMM21,%YMM0 |
0x4633e9 VMOVUPD %YMM0,0x140(%RSP) |
0x4633f2 VMOVUPD %YMM22,0xe0(%RSP) |
0x4633fa VXORPS %XMM0,%XMM0,%XMM0 |
0x4633fe VPERMPD $0x55,%YMM22,%YMM0 |
0x463405 VMOVUPD %YMM0,0x120(%RSP) |
0x46340e VMOVUPD %XMM23,0x60(%RSP) |
0x463416 VBROADCASTSD %XMM23,%YMM19 |
0x46341c VBROADCASTSD %XMM3,%YMM20 |
0x463422 VBROADCASTSD %XMM2,%YMM21 |
0x463428 VMOVUPD %XMM28,0x50(%RSP) |
0x463430 VBROADCASTSD %XMM28,%YMM22 |
0x463436 VMOVUPD %XMM30,0x30(%RSP) |
0x46343e VBROADCASTSD %XMM30,%YMM23 |
0x463444 VMOVUPD %XMM29,0x40(%RSP) |
0x46344c VBROADCASTSD %XMM29,%YMM24 |
0x463452 VMOVUPD %XMM25,0x70(%RSP) |
0x46345a VBROADCASTSD %XMM25,%YMM25 |
0x463460 VMOVUPD %YMM26,0xc0(%RSP) |
0x463468 VPERMPD $0x55,%YMM26,%YMM26 |
0x46346f XOR %R13D,%R13D |
0x463472 VPBROADCASTQ 0x8018c(%RIP),%YMM27 |
0x46347c VPBROADCASTQ 0x8683a(%RIP),%YMM28 |
0x463486 VMOVUPD 0xa0(%RSP),%YMM18 |
0x46348e VMOVUPD %YMM31,0x1c0(%RSP) |
0x463496 VMOVUPD %YMM10,0x1a0(%RSP) |
0x46349f NOP |
(1244) 0x4634a0 LEA (%R9,%R13,1),%R15D |
(1244) 0x4634a4 MOVSXD %R15D,%R15 |
(1244) 0x4634a7 VMOVUPD (%RDX,%R15,8),%YMM0 |
(1244) 0x4634ad VMOVUPD 0x20(%RDX,%R15,8),%YMM1 |
(1244) 0x4634b4 VMOVUPD 0x40(%RDX,%R15,8),%YMM2 |
(1244) 0x4634bb VMOVUPD 0x60(%RDX,%R15,8),%YMM4 |
(1244) 0x4634c2 VSUBPD %YMM31,%YMM0,%YMM3 |
(1244) 0x4634c8 VSUBPD %YMM31,%YMM1,%YMM1 |
(1244) 0x4634ce VSUBPD %YMM31,%YMM2,%YMM2 |
(1244) 0x4634d4 VSUBPD %YMM31,%YMM4,%YMM0 |
(1244) 0x4634da VMOVUPD (%R10,%R15,8),%YMM5 |
(1244) 0x4634e0 VMOVUPD 0x20(%R10,%R15,8),%YMM6 |
(1244) 0x4634e7 VMOVUPD 0x40(%R10,%R15,8),%YMM7 |
(1244) 0x4634ee VMOVUPD 0x60(%R10,%R15,8),%YMM4 |
(1244) 0x4634f5 VMOVUPD 0x180(%RSP),%YMM11 |
(1244) 0x4634fe VSUBPD %YMM11,%YMM4,%YMM4 |
(1244) 0x463503 VSUBPD %YMM11,%YMM7,%YMM12 |
(1244) 0x463508 VSUBPD %YMM11,%YMM6,%YMM14 |
(1244) 0x46350d VMOVUPD (%RSI,%R15,8),%YMM6 |
(1244) 0x463513 VMOVUPD 0x20(%RSI,%R15,8),%YMM7 |
(1244) 0x46351a VMOVUPD 0x40(%RSI,%R15,8),%YMM8 |
(1244) 0x463521 VMOVUPD 0x60(%RSI,%R15,8),%YMM9 |
(1244) 0x463528 VSUBPD %YMM11,%YMM5,%YMM15 |
(1244) 0x46352d VMOVUPD 0x160(%RSP),%YMM11 |
(1244) 0x463536 VSUBPD %YMM11,%YMM6,%YMM5 |
(1244) 0x46353b VSUBPD %YMM11,%YMM7,%YMM13 |
(1244) 0x463540 VMOVUPD 0x140(%RSP),%YMM6 |
(1244) 0x463549 VMULPD %YMM0,%YMM6,%YMM7 |
(1244) 0x46354d VMULPD %YMM2,%YMM6,%YMM30 |
(1244) 0x463553 VMULPD %YMM1,%YMM6,%YMM31 |
(1244) 0x463559 VSUBPD %YMM11,%YMM8,%YMM16 |
(1244) 0x46355f VMULPD %YMM3,%YMM6,%YMM29 |
(1244) 0x463565 VFMADD231PD %YMM15,%YMM10,%YMM29 |
(1244) 0x46356b VFMADD231PD %YMM14,%YMM10,%YMM31 |
(1244) 0x463571 VFMADD231PD %YMM12,%YMM10,%YMM30 |
(1244) 0x463577 VFMADD231PD %YMM4,%YMM10,%YMM7 |
(1244) 0x46357c VSUBPD %YMM11,%YMM9,%YMM17 |
(1244) 0x463582 VMOVUPD 0x120(%RSP),%YMM10 |
(1244) 0x46358b VFMADD231PD %YMM17,%YMM10,%YMM7 |
(1244) 0x463591 VFMADD231PD %YMM16,%YMM10,%YMM30 |
(1244) 0x463597 VMULPD %YMM3,%YMM19,%YMM11 |
(1244) 0x46359d VMULPD %YMM1,%YMM19,%YMM8 |
(1244) 0x4635a3 VMULPD %YMM2,%YMM19,%YMM9 |
(1244) 0x4635a9 VFMADD231PD %YMM13,%YMM10,%YMM31 |
(1244) 0x4635af VMULPD %YMM0,%YMM19,%YMM6 |
(1244) 0x4635b5 VFMADD231PD %YMM4,%YMM20,%YMM6 |
(1244) 0x4635bb VFMADD231PD %YMM12,%YMM20,%YMM9 |
(1244) 0x4635c1 VFMADD231PD %YMM14,%YMM20,%YMM8 |
(1244) 0x4635c7 VFMADD231PD %YMM15,%YMM20,%YMM11 |
(1244) 0x4635cd VFMADD231PD %YMM5,%YMM10,%YMM29 |
(1244) 0x4635d3 VFMADD231PD %YMM5,%YMM21,%YMM11 |
(1244) 0x4635d9 VFMADD231PD %YMM13,%YMM21,%YMM8 |
(1244) 0x4635df VMULPD %YMM0,%YMM22,%YMM0 |
(1244) 0x4635e5 VMULPD %YMM2,%YMM22,%YMM2 |
(1244) 0x4635eb VMULPD %YMM1,%YMM22,%YMM1 |
(1244) 0x4635f1 VFMADD231PD %YMM16,%YMM21,%YMM9 |
(1244) 0x4635f7 VMULPD %YMM3,%YMM22,%YMM10 |
(1244) 0x4635fd VFMADD231PD %YMM15,%YMM23,%YMM10 |
(1244) 0x463603 VFMADD231PD %YMM14,%YMM23,%YMM1 |
(1244) 0x463609 VFMADD231PD %YMM12,%YMM23,%YMM2 |
(1244) 0x46360f VFMADD231PD %YMM4,%YMM23,%YMM0 |
(1244) 0x463615 VFMADD231PD %YMM17,%YMM21,%YMM6 |
(1244) 0x46361b VFMADD231PD %YMM17,%YMM24,%YMM0 |
(1244) 0x463621 VFMADD231PD %YMM16,%YMM24,%YMM2 |
(1244) 0x463627 VFMADD231PD %YMM13,%YMM24,%YMM1 |
(1244) 0x46362d VFMADD231PD %YMM5,%YMM24,%YMM10 |
(1244) 0x463633 VMOVDQA64 %YMM28,%YMM3 |
(1244) 0x463639 VPTERNLOGQ $-0x8,%YMM27,%YMM29,%YMM3 |
(1244) 0x463640 VADDPD %YMM3,%YMM29,%YMM3 |
(1244) 0x463646 VROUNDPD $0xb,%YMM3,%YMM3 |
(1244) 0x46364c VMOVDQA64 %YMM28,%YMM4 |
(1244) 0x463652 VPTERNLOGQ $-0x8,%YMM27,%YMM31,%YMM4 |
(1244) 0x463659 VADDPD %YMM4,%YMM31,%YMM4 |
(1244) 0x46365f VMOVDQA64 %YMM28,%YMM5 |
(1244) 0x463665 VROUNDPD $0xb,%YMM4,%YMM4 |
(1244) 0x46366b VPTERNLOGQ $-0x8,%YMM27,%YMM30,%YMM5 |
(1244) 0x463672 VADDPD %YMM5,%YMM30,%YMM5 |
(1244) 0x463678 VMOVDQA64 %YMM28,%YMM12 |
(1244) 0x46367e VPTERNLOGQ $-0x8,%YMM27,%YMM7,%YMM12 |
(1244) 0x463685 VADDPD %YMM7,%YMM12,%YMM12 |
(1244) 0x463689 VROUNDPD $0xb,%YMM5,%YMM5 |
(1244) 0x46368f VROUNDPD $0xb,%YMM12,%YMM12 |
(1244) 0x463695 VMOVDQA64 %YMM28,%YMM13 |
(1244) 0x46369b VPTERNLOGQ $-0x8,%YMM27,%YMM6,%YMM13 |
(1244) 0x4636a2 VADDPD %YMM6,%YMM13,%YMM13 |
(1244) 0x4636a6 VMOVDQA64 %YMM28,%YMM14 |
(1244) 0x4636ac VROUNDPD $0xb,%YMM13,%YMM13 |
(1244) 0x4636b2 VPTERNLOGQ $-0x8,%YMM27,%YMM9,%YMM14 |
(1244) 0x4636b9 VADDPD %YMM14,%YMM9,%YMM14 |
(1244) 0x4636be VMOVDQA64 %YMM28,%YMM15 |
(1244) 0x4636c4 VPTERNLOGQ $-0x8,%YMM27,%YMM8,%YMM15 |
(1244) 0x4636cb VADDPD %YMM15,%YMM8,%YMM15 |
(1244) 0x4636d0 VROUNDPD $0xb,%YMM14,%YMM14 |
(1244) 0x4636d6 VROUNDPD $0xb,%YMM15,%YMM15 |
(1244) 0x4636dc VMOVDQA64 %YMM28,%YMM16 |
(1244) 0x4636e2 VPTERNLOGQ $-0x8,%YMM27,%YMM11,%YMM16 |
(1244) 0x4636e9 VADDPD %YMM16,%YMM11,%YMM16 |
(1244) 0x4636ef VRNDSCALEPD $0xb,%YMM16,%YMM16 |
(1244) 0x4636f6 VSUBPD %YMM12,%YMM7,%YMM7 |
(1244) 0x4636fb VMOVDQA64 %YMM28,%YMM12 |
(1244) 0x463701 VPTERNLOGQ $-0x8,%YMM27,%YMM10,%YMM12 |
(1244) 0x463708 VADDPD %YMM12,%YMM10,%YMM12 |
(1244) 0x46370d VRNDSCALEPD $0xb,%YMM12,%YMM17 |
(1244) 0x463714 VSUBPD %YMM5,%YMM30,%YMM30 |
(1244) 0x46371a VSUBPD %YMM4,%YMM31,%YMM31 |
(1244) 0x463720 VMOVDQA64 %YMM28,%YMM4 |
(1244) 0x463726 VPTERNLOGQ $-0x8,%YMM27,%YMM1,%YMM4 |
(1244) 0x46372d VADDPD %YMM4,%YMM1,%YMM4 |
(1244) 0x463731 VROUNDPD $0xb,%YMM4,%YMM12 |
(1244) 0x463737 VSUBPD %YMM3,%YMM29,%YMM4 |
(1244) 0x46373d VSUBPD %YMM16,%YMM11,%YMM11 |
(1244) 0x463743 VMOVDQA64 %YMM28,%YMM3 |
(1244) 0x463749 VPTERNLOGQ $-0x8,%YMM27,%YMM2,%YMM3 |
(1244) 0x463750 VADDPD %YMM3,%YMM2,%YMM3 |
(1244) 0x463754 VRNDSCALEPD $0xb,%YMM3,%YMM16 |
(1244) 0x46375b VSUBPD %YMM15,%YMM8,%YMM29 |
(1244) 0x463761 VSUBPD %YMM14,%YMM9,%YMM5 |
(1244) 0x463766 VMOVDQA64 %YMM28,%YMM3 |
(1244) 0x46376c VPTERNLOGQ $-0x8,%YMM27,%YMM0,%YMM3 |
(1244) 0x463773 VADDPD %YMM3,%YMM0,%YMM3 |
(1244) 0x463777 VROUNDPD $0xb,%YMM3,%YMM8 |
(1244) 0x46377d VSUBPD %YMM13,%YMM6,%YMM3 |
(1244) 0x463782 VSUBPD %YMM8,%YMM0,%YMM9 |
(1244) 0x463787 VSUBPD %YMM16,%YMM2,%YMM8 |
(1244) 0x46378d VMULPD %YMM4,%YMM25,%YMM0 |
(1244) 0x463793 VMULPD %YMM31,%YMM25,%YMM2 |
(1244) 0x463799 VMULPD %YMM30,%YMM25,%YMM6 |
(1244) 0x46379f VSUBPD %YMM12,%YMM1,%YMM12 |
(1244) 0x4637a4 VMULPD %YMM7,%YMM25,%YMM1 |
(1244) 0x4637aa VFMADD231PD %YMM3,%YMM18,%YMM1 |
(1244) 0x4637b0 VFMADD231PD %YMM5,%YMM18,%YMM6 |
(1244) 0x4637b6 VFMADD231PD %YMM29,%YMM18,%YMM2 |
(1244) 0x4637bc VFMADD231PD %YMM11,%YMM18,%YMM0 |
(1244) 0x4637c2 VSUBPD %YMM17,%YMM10,%YMM13 |
(1244) 0x4637c8 VFMADD231PD %YMM26,%YMM13,%YMM0 |
(1244) 0x4637ce VFMADD231PD %YMM26,%YMM12,%YMM2 |
(1244) 0x4637d4 VFMADD231PD %YMM26,%YMM8,%YMM6 |
(1244) 0x4637da VFMADD231PD %YMM26,%YMM9,%YMM1 |
(1244) 0x4637e0 VMOVUPD %YMM1,0x60(%R8,%R15,8) |
(1244) 0x4637e7 VMOVUPD %YMM6,0x40(%R8,%R15,8) |
(1244) 0x4637ee VMOVUPD %YMM2,0x20(%R8,%R15,8) |
(1244) 0x4637f5 VMOVUPD %YMM0,(%R8,%R15,8) |
(1244) 0x4637fb VBROADCASTSD 0x18(%RDI),%YMM14 |
(1244) 0x463801 VMULPD %YMM7,%YMM14,%YMM10 |
(1244) 0x463805 VMULPD %YMM4,%YMM14,%YMM15 |
(1244) 0x463809 VMULPD %YMM31,%YMM14,%YMM16 |
(1244) 0x46380f VBROADCASTSD 0x20(%RDI),%YMM17 |
(1244) 0x463816 VMULPD %YMM30,%YMM14,%YMM14 |
(1244) 0x46381c VFMADD231PD %YMM5,%YMM17,%YMM14 |
(1244) 0x463822 VFMADD231PD %YMM29,%YMM17,%YMM16 |
(1244) 0x463828 VFMADD231PD %YMM11,%YMM17,%YMM15 |
(1244) 0x46382e VFMADD231PD %YMM17,%YMM3,%YMM10 |
(1244) 0x463834 VBROADCASTSD 0x28(%RDI),%YMM17 |
(1244) 0x46383b VFMADD231PD %YMM9,%YMM17,%YMM10 |
(1244) 0x463841 VFMADD231PD %YMM12,%YMM17,%YMM16 |
(1244) 0x463847 VFMADD231PD %YMM17,%YMM8,%YMM14 |
(1244) 0x46384d VMOVUPD %YMM14,0x40(%RBX,%R15,8) |
(1244) 0x463854 VMOVUPD %YMM16,0x20(%RBX,%R15,8) |
(1244) 0x46385c VFMADD231PD %YMM13,%YMM17,%YMM15 |
(1244) 0x463862 VMOVUPD %YMM15,(%RBX,%R15,8) |
(1244) 0x463868 VMOVUPD %YMM10,0x60(%RBX,%R15,8) |
(1244) 0x46386f VBROADCASTSD 0x30(%RDI),%YMM17 |
(1244) 0x463876 VMULPD %YMM7,%YMM17,%YMM7 |
(1244) 0x46387c VMULPD %YMM4,%YMM17,%YMM4 |
(1244) 0x463882 VMULPD %YMM31,%YMM17,%YMM31 |
(1244) 0x463888 VMULPD %YMM30,%YMM17,%YMM17 |
(1244) 0x46388e VBROADCASTSD 0x38(%RDI),%YMM30 |
(1244) 0x463895 VFMADD231PD %YMM5,%YMM30,%YMM17 |
(1244) 0x46389b VFMADD231PD %YMM29,%YMM30,%YMM31 |
(1244) 0x4638a1 VFMADD231PD %YMM11,%YMM30,%YMM4 |
(1244) 0x4638a7 VFMADD231PD %YMM3,%YMM30,%YMM7 |
(1244) 0x4638ad VBROADCASTSD 0x40(%RDI),%YMM3 |
(1244) 0x4638b3 VFMADD231PD %YMM9,%YMM3,%YMM7 |
(1244) 0x4638b8 VFMADD231PD %YMM13,%YMM3,%YMM4 |
(1244) 0x4638bd VFMADD231PD %YMM12,%YMM3,%YMM31 |
(1244) 0x4638c3 VFMADD231PD %YMM8,%YMM3,%YMM17 |
(1244) 0x4638c9 VMULPD %YMM2,%YMM2,%YMM2 |
(1244) 0x4638cd VFMADD231PD %YMM16,%YMM16,%YMM2 |
(1244) 0x4638d3 VMULPD %YMM0,%YMM0,%YMM0 |
(1244) 0x4638d7 VFMADD231PD %YMM15,%YMM15,%YMM0 |
(1244) 0x4638dc VMULPD %YMM1,%YMM1,%YMM1 |
(1244) 0x4638e0 VFMADD231PD %YMM10,%YMM10,%YMM1 |
(1244) 0x4638e5 VMOVUPD 0x1a0(%RSP),%YMM10 |
(1244) 0x4638ee VMULPD %YMM6,%YMM6,%YMM3 |
(1244) 0x4638f2 VFMADD231PD %YMM14,%YMM14,%YMM3 |
(1244) 0x4638f7 VMOVUPD %YMM17,0x40(%R11,%R15,8) |
(1244) 0x4638ff VFMADD231PD %YMM17,%YMM17,%YMM3 |
(1244) 0x463905 VMOVUPD %YMM7,0x60(%R11,%R15,8) |
(1244) 0x46390c VFMADD231PD %YMM7,%YMM7,%YMM1 |
(1244) 0x463911 VMOVUPD %YMM4,(%R11,%R15,8) |
(1244) 0x463917 VFMADD231PD %YMM4,%YMM4,%YMM0 |
(1244) 0x46391c VMOVUPD %YMM31,0x20(%R11,%R15,8) |
(1244) 0x463924 VSQRTPD %YMM3,%YMM3 |
(1244) 0x463928 VMOVUPD %YMM3,0x40(%RCX,%R15,8) |
(1244) 0x46392f VSQRTPD %YMM1,%YMM1 |
(1244) 0x463933 VMOVUPD %YMM1,0x60(%RCX,%R15,8) |
(1244) 0x46393a VSQRTPD %YMM0,%YMM0 |
(1244) 0x46393e VMOVUPD %YMM0,(%RCX,%R15,8) |
(1244) 0x463944 VFMADD231PD %YMM31,%YMM31,%YMM2 |
(1244) 0x46394a VMOVUPD 0x1c0(%RSP),%YMM31 |
(1244) 0x463952 VSQRTPD %YMM2,%YMM0 |
(1244) 0x463956 VMOVUPD %YMM0,0x20(%RCX,%R15,8) |
(1244) 0x46395d ADD $0x10,%R13D |
(1244) 0x463961 CMP %R12D,%R13D |
(1244) 0x463964 JBE 4634a0 |
0x46396a MOV 0x8(%RSP),%R13 |
0x46396f CMP %R14D,%R13D |
0x463972 VMOVUPD 0x100(%RSP),%YMM14 |
0x46397b VMOVUPD 0x90(%RSP),%XMM20 |
0x463983 VMOVUPD 0x80(%RSP),%XMM21 |
0x46398b VMOVUPD 0xe0(%RSP),%YMM22 |
0x463993 VMOVUPD 0x70(%RSP),%XMM25 |
0x46399b VMOVUPD 0xc0(%RSP),%YMM26 |
0x4639a3 VMOVUPD 0x60(%RSP),%XMM23 |
0x4639ab VMOVUPD 0x20(%RSP),%XMM3 |
0x4639b1 VMOVUPD 0x10(%RSP),%XMM2 |
0x4639b7 VMOVUPD 0x50(%RSP),%XMM28 |
0x4639bf VMOVUPD 0x40(%RSP),%XMM29 |
0x4639c7 VMOVUPD 0x30(%RSP),%XMM30 |
0x4639cf JNE 4639ec |
0x4639d1 CMP %R13D,0x4(%RSP) |
0x4639d6 JNE 463b93 |
0x4639dc JMP 463cd0 |
0x4639e1 XOR %R14D,%R14D |
0x4639e4 JMP 463b93 |
0x4639e9 XOR %R14D,%R14D |
0x4639ec LEA -0x1(%R13),%R15D |
0x4639f0 VPERMPD $0x55,%YMM14,%YMM6 |
0x4639f6 VBROADCASTSD %XMM20,%YMM1 |
0x4639fc VBROADCASTSD %XMM21,%YMM7 |
0x463a02 VPERMPD $0x55,%YMM22,%YMM8 |
0x463a09 VBROADCASTSD %XMM23,%YMM9 |
0x463a0f VBROADCASTSD %XMM3,%YMM0 |
0x463a14 VBROADCASTSD %XMM2,%YMM2 |
0x463a19 VBROADCASTSD %XMM28,%YMM27 |
0x463a1f VBROADCASTSD %XMM30,%YMM11 |
0x463a25 VBROADCASTSD %XMM29,%YMM15 |
0x463a2b VBROADCASTSD %XMM25,%YMM16 |
0x463a31 VPXORD %XMM17,%XMM17,%XMM17 |
0x463a37 VPERMPD $0x55,%YMM26,%YMM17 |
0x463a3e VPBROADCASTQ 0x7fbc0(%RIP),%YMM18 |
0x463a48 VPBROADCASTQ 0x8626e(%RIP),%YMM19 |
0x463a52 VMOVUPD 0xa0(%RSP),%YMM24 |
0x463a5a NOPW (%RAX,%RAX,1) |
(1243) 0x463a60 LEA (%R9,%R14,1),%R12D |
(1243) 0x463a64 MOVSXD %R12D,%R12 |
(1243) 0x463a67 VMOVUPD (%RDX,%R12,8),%YMM3 |
(1243) 0x463a6d VSUBPD %YMM31,%YMM3,%YMM3 |
(1243) 0x463a73 VMOVUPD (%R10,%R12,8),%YMM4 |
(1243) 0x463a79 VMOVUPD (%RSI,%R12,8),%YMM5 |
(1243) 0x463a7f VSUBPD %YMM6,%YMM4,%YMM4 |
(1243) 0x463a83 VSUBPD %YMM1,%YMM5,%YMM5 |
(1243) 0x463a87 VMULPD %YMM3,%YMM7,%YMM12 |
(1243) 0x463a8b VFMADD231PD %YMM4,%YMM10,%YMM12 |
(1243) 0x463a90 VMULPD %YMM3,%YMM9,%YMM13 |
(1243) 0x463a94 VFMADD231PD %YMM4,%YMM0,%YMM13 |
(1243) 0x463a99 VFMADD231PD %YMM5,%YMM8,%YMM12 |
(1243) 0x463a9e VFMADD231PD %YMM5,%YMM2,%YMM13 |
(1243) 0x463aa3 VMULPD %YMM3,%YMM27,%YMM3 |
(1243) 0x463aa9 VFMADD231PD %YMM4,%YMM11,%YMM3 |
(1243) 0x463aae VMOVDQA64 %YMM19,%YMM4 |
(1243) 0x463ab4 VPTERNLOGQ $-0x8,%YMM18,%YMM12,%YMM4 |
(1243) 0x463abb VADDPD %YMM4,%YMM12,%YMM4 |
(1243) 0x463abf VFMADD231PD %YMM5,%YMM15,%YMM3 |
(1243) 0x463ac4 VROUNDPD $0xb,%YMM4,%YMM4 |
(1243) 0x463aca VMOVDQA64 %YMM19,%YMM5 |
(1243) 0x463ad0 VPTERNLOGQ $-0x8,%YMM18,%YMM13,%YMM5 |
(1243) 0x463ad7 VADDPD %YMM5,%YMM13,%YMM5 |
(1243) 0x463adb VROUNDPD $0xb,%YMM5,%YMM5 |
(1243) 0x463ae1 VSUBPD %YMM4,%YMM12,%YMM4 |
(1243) 0x463ae5 VMOVDQA64 %YMM19,%YMM12 |
(1243) 0x463aeb VPTERNLOGQ $-0x8,%YMM18,%YMM3,%YMM12 |
(1243) 0x463af2 VADDPD %YMM3,%YMM12,%YMM12 |
(1243) 0x463af6 VROUNDPD $0xb,%YMM12,%YMM12 |
(1243) 0x463afc VSUBPD %YMM5,%YMM13,%YMM5 |
(1243) 0x463b00 VSUBPD %YMM12,%YMM3,%YMM3 |
(1243) 0x463b05 VMULPD %YMM4,%YMM16,%YMM12 |
(1243) 0x463b0b VFMADD231PD %YMM5,%YMM24,%YMM12 |
(1243) 0x463b11 VFMADD231PD %YMM17,%YMM3,%YMM12 |
(1243) 0x463b17 VMOVUPD %YMM12,(%R8,%R12,8) |
(1243) 0x463b1d VMULPD 0x18(%RDI){1to4},%YMM4,%YMM13 |
(1243) 0x463b24 VFMADD231PD 0x20(%RDI){1to4},%YMM5,%YMM13 |
(1243) 0x463b2b VFMADD231PD 0x28(%RDI){1to4},%YMM3,%YMM13 |
(1243) 0x463b32 VMOVUPD %YMM13,(%RBX,%R12,8) |
(1243) 0x463b38 VMULPD 0x30(%RDI){1to4},%YMM4,%YMM4 |
(1243) 0x463b3f VFMADD231PD 0x38(%RDI){1to4},%YMM5,%YMM4 |
(1243) 0x463b46 VFMADD231PD 0x40(%RDI){1to4},%YMM3,%YMM4 |
(1243) 0x463b4d VMULPD %YMM12,%YMM12,%YMM3 |
(1243) 0x463b52 VFMADD231PD %YMM13,%YMM13,%YMM3 |
(1243) 0x463b57 VMOVUPD %YMM4,(%R11,%R12,8) |
(1243) 0x463b5d VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1243) 0x463b62 VSQRTPD %YMM3,%YMM3 |
(1243) 0x463b66 VMOVUPD %YMM3,(%RCX,%R12,8) |
(1243) 0x463b6c ADD $0x4,%R14D |
(1243) 0x463b70 CMP %R15D,%R14D |
(1243) 0x463b73 JBE 463a60 |
0x463b79 MOV %R13D,%R14D |
0x463b7c VMOVUPD 0x10(%RSP),%XMM2 |
0x463b82 VMOVUPD 0x20(%RSP),%XMM3 |
0x463b88 CMP %R13D,0x4(%RSP) |
0x463b8d JE 463cd0 |
0x463b93 VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 |
0x463b99 VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 |
0x463b9f VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 |
0x463ba5 ADD %R9D,%R14D |
0x463ba8 VMOVQ 0x86110(%RIP),%XMM6 |
0x463bb0 VPBROADCASTQ 0x80bdf(%RIP),%XMM7 |
0x463bb9 VPBROADCASTQ 0x860fe(%RIP),%XMM8 |
0x463bc2 VPBROADCASTQ 0x7fa3d(%RIP),%XMM9 |
0x463bcb NOPL (%RAX,%RAX,1) |
(1242) 0x463bd0 MOVSXD %R14D,%R14 |
(1242) 0x463bd3 VMOVSD (%RSI,%R14,8),%XMM3 |
(1242) 0x463bd9 VMOVSD (%RDX,%R14,8),%XMM4 |
(1242) 0x463bdf VMOVHPD (%R10,%R14,8),%XMM4,%XMM4 |
(1242) 0x463be5 VSUBSD %XMM20,%XMM3,%XMM3 |
(1242) 0x463beb VSUBPD %XMM14,%XMM4,%XMM4 |
(1242) 0x463bf0 VSHUFPD $0x1,%XMM4,%XMM4,%XMM5 |
(1242) 0x463bf5 VMULPD %XMM5,%XMM1,%XMM5 |
(1242) 0x463bf9 VMOVDDUP %XMM3,%XMM3 |
(1242) 0x463bfd VPUNPCKHQDQ %XMM3,%XMM4,%XMM10 |
(1242) 0x463c01 VMULPD %XMM10,%XMM22,%XMM10 |
(1242) 0x463c07 VFMADD231PD %XMM4,%XMM0,%XMM5 |
(1242) 0x463c0c VFMADD213SD %XMM10,%XMM21,%XMM4 |
(1242) 0x463c12 VSHUFPD $0x1,%XMM10,%XMM10,%XMM10 |
(1242) 0x463c18 VADDSD %XMM4,%XMM10,%XMM4 |
(1242) 0x463c1c VFMADD231PD %XMM3,%XMM2,%XMM5 |
(1242) 0x463c21 VMOVAPD %XMM4,%XMM3 |
(1242) 0x463c25 VPTERNLOGQ $-0x28,%XMM7,%XMM6,%XMM3 |
(1242) 0x463c2c VADDSD %XMM3,%XMM4,%XMM3 |
(1242) 0x463c30 VROUNDSD $0xb,%XMM3,%XMM3,%XMM3 |
(1242) 0x463c36 VSUBSD %XMM3,%XMM4,%XMM3 |
(1242) 0x463c3a VMOVDQA %XMM8,%XMM4 |
(1242) 0x463c3e VPTERNLOGQ $-0x8,%XMM9,%XMM5,%XMM4 |
(1242) 0x463c45 VADDPD %XMM4,%XMM5,%XMM4 |
(1242) 0x463c49 VROUNDPD $0xb,%XMM4,%XMM4 |
(1242) 0x463c4f VSUBPD %XMM4,%XMM5,%XMM4 |
(1242) 0x463c53 VMULPD %XMM4,%XMM26,%XMM5 |
(1242) 0x463c59 VMOVAPD %XMM3,%XMM10 |
(1242) 0x463c5d VFMADD213SD %XMM5,%XMM25,%XMM10 |
(1242) 0x463c63 VSHUFPD $0x1,%XMM5,%XMM5,%XMM5 |
(1242) 0x463c68 VADDSD %XMM5,%XMM10,%XMM5 |
(1242) 0x463c6c VMOVSD %XMM5,(%R8,%R14,8) |
(1242) 0x463c72 VMULSD 0x18(%RDI),%XMM3,%XMM10 |
(1242) 0x463c77 VFMADD231SD 0x20(%RDI),%XMM4,%XMM10 |
(1242) 0x463c7d VSHUFPD $0x1,%XMM4,%XMM4,%XMM11 |
(1242) 0x463c82 VFMADD132SD 0x28(%RDI),%XMM10,%XMM11 |
(1242) 0x463c88 VMOVSD %XMM11,(%RBX,%R14,8) |
(1242) 0x463c8e VMULPD 0x38(%RDI),%XMM4,%XMM4 |
(1242) 0x463c93 VMOVAPD %XMM4,%XMM10 |
(1242) 0x463c97 VFMADD231SD 0x30(%RDI),%XMM3,%XMM10 |
(1242) 0x463c9d VSHUFPD $0x1,%XMM4,%XMM4,%XMM3 |
(1242) 0x463ca2 VADDSD %XMM3,%XMM10,%XMM3 |
(1242) 0x463ca6 VMOVSD %XMM3,(%R11,%R14,8) |
(1242) 0x463cac VMULSD %XMM5,%XMM5,%XMM4 |
(1242) 0x463cb0 VFMADD231SD %XMM11,%XMM11,%XMM4 |
(1242) 0x463cb5 VFMADD231SD %XMM3,%XMM3,%XMM4 |
(1242) 0x463cba VSQRTSD %XMM4,%XMM4,%XMM3 |
(1242) 0x463cbe VMOVSD %XMM3,(%RCX,%R14,8) |
(1242) 0x463cc4 INC %R14D |
(1242) 0x463cc7 CMP %R14D,%EAX |
(1242) 0x463cca JNE 463bd0 |
0x463cd0 LEA -0x28(%RBP),%RSP |
0x463cd4 POP %RBX |
0x463cd5 POP %R12 |
0x463cd7 POP %R13 |
0x463cd9 POP %R14 |
0x463cdb POP %R15 |
0x463cdd POP %RBP |
0x463cde VZEROUPPER |
0x463ce1 RET |
0x463ce2 NOPW %CS:(%RAX,%RAX,1) |
0x463cec NOPL (%RAX) |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 148 |
nb uops | 149 |
loop length | 845 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.66 |
ROB full (events) | 3.99 |
Front-end | 24.83 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 12% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4639e1 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x721> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 4639e9 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x729> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x8018c(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8683a(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 4639ec <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x72c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 463b93 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 463b93 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8d3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x7fbc0(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8626e(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x86110(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x80bdf(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x860fe(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x7fa3d(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 148 |
nb uops | 149 |
loop length | 845 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
cycles | 6.50 | 6.60 | 17.00 | 17.00 | 13.50 | 27.00 | 6.50 | 13.50 | 13.50 | 13.50 | 6.40 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.66 |
ROB full (events) | 3.99 |
Front-end | 24.83 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 60% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 51% |
load | 50% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 12% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 26% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 24% |
load | 21% |
store | 35% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x200,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4639e1 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x721> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 4639e9 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x729> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x8018c(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8683a(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 4639ec <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x72c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 463b93 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 463b93 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8d3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R13),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXORD %XMM17,%XMM17,%XMM17 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPERMPD $0x55,%YMM26,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x7fbc0(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8626e(%RIP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R13D,0x4(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 463cd0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM23,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM28,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x86110(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x80bdf(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x860fe(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x7fa3d(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 1.77 | 1.42 |
○Loop 1244 - ParticleBConds3DSoa.h:235-256 - exec | 1.76 | 1.38 |
○Loop 1242 - ParticleBConds3DSoa.h:235-255 - exec | 0 | 0 |
○Loop 1243 - ParticleBConds3DSoa.h:235-256 - exec | 0 | 0 |