Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 2.23% |
---|
Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 2.23% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
0x46dcb0 PUSH %RBP |
0x46dcb1 MOV %RSP,%RBP |
0x46dcb4 PUSH %R15 |
0x46dcb6 PUSH %R14 |
0x46dcb8 PUSH %R13 |
0x46dcba PUSH %R12 |
0x46dcbc PUSH %RBX |
0x46dcbd AND $-0x20,%RSP |
0x46dcc1 SUB $0x220,%RSP |
0x46dcc8 MOV 0x10(%RBP),%EAX |
0x46dccb MOV %EAX,%R13D |
0x46dcce SUB %R9D,%R13D |
0x46dcd1 JLE 46e6d3 |
0x46dcd7 VMOVUPD (%RSI),%XMM14 |
0x46dcdb VMOVSD 0x10(%RSI),%XMM20 |
0x46dce2 MOV 0x8(%RDX),%RSI |
0x46dce6 MOV 0x18(%RDX),%RDX |
0x46dcea LEA (%RDX,%RSI,8),%R10 |
0x46dcee SAL $0x4,%RSI |
0x46dcf2 ADD %RDX,%RSI |
0x46dcf5 MOV 0x8(%R8),%R11 |
0x46dcf9 MOV 0x18(%R8),%R8 |
0x46dcfd LEA (%R8,%R11,8),%RBX |
0x46dd01 SAL $0x4,%R11 |
0x46dd05 ADD %R8,%R11 |
0x46dd08 VMOVSD 0x48(%RDI),%XMM21 |
0x46dd0f VMOVUPD 0x50(%RDI),%XMM22 |
0x46dd16 VMOVSD 0x60(%RDI),%XMM23 |
0x46dd1d VMOVSD 0x68(%RDI),%XMM3 |
0x46dd22 VMOVSD 0x78(%RDI),%XMM2 |
0x46dd27 VMOVSD 0x80(%RDI),%XMM29 |
0x46dd2e VMOVSD 0x70(%RDI),%XMM28 |
0x46dd35 VMOVSD 0x88(%RDI),%XMM30 |
0x46dd3c VMOVSD (%RDI),%XMM25 |
0x46dd42 VMOVUPD 0x8(%RDI),%XMM26 |
0x46dd4c MOV %R13D,%R15D |
0x46dd4f AND $-0x4,%R15D |
0x46dd53 JE 46e3e5 |
0x46dd59 MOV %R13D,%R14D |
0x46dd5c VBROADCASTSD %XMM14,%YMM31 |
0x46dd62 VBROADCASTSD %XMM22,%YMM10 |
0x46dd68 VBROADCASTSD %XMM26,%YMM0 |
0x46dd6e VMOVUPD %YMM0,0xa0(%RSP) |
0x46dd77 AND $-0x10,%R14D |
0x46dd7b VMOVUPD %XMM3,0x20(%RSP) |
0x46dd81 VMOVUPD %XMM2,0x10(%RSP) |
0x46dd87 MOV %R15,0x8(%RSP) |
0x46dd8c JE 46e3ed |
0x46dd92 MOV %R13D,0x4(%RSP) |
0x46dd97 LEA -0x1(%R14),%R13D |
0x46dd9b VMOVUPD %YMM14,0x100(%RSP) |
0x46dda4 VXORPS %XMM0,%XMM0,%XMM0 |
0x46dda8 VPERMPD $0x55,%YMM14,%YMM0 |
0x46ddae VMOVUPD %YMM0,0x1a0(%RSP) |
0x46ddb7 VMOVUPD %XMM20,0x90(%RSP) |
0x46ddbf VBROADCASTSD %XMM20,%YMM0 |
0x46ddc5 VMOVUPD %YMM0,0x180(%RSP) |
0x46ddce VMOVUPD %XMM21,0x80(%RSP) |
0x46ddd6 VBROADCASTSD %XMM21,%YMM0 |
0x46dddc VMOVUPD %YMM0,0x160(%RSP) |
0x46dde5 VMOVUPD %YMM22,0xe0(%RSP) |
0x46dded VXORPS %XMM0,%XMM0,%XMM0 |
0x46ddf1 VPERMPD $0x55,%YMM22,%YMM0 |
0x46ddf8 VMOVUPD %YMM0,0x140(%RSP) |
0x46de01 VMOVUPD %XMM23,0x60(%RSP) |
0x46de09 VBROADCASTSD %XMM23,%YMM0 |
0x46de0f VMOVUPD %YMM0,0x120(%RSP) |
0x46de18 VBROADCASTSD %XMM3,%YMM20 |
0x46de1e VMOVUPD %XMM28,0x50(%RSP) |
0x46de26 VBROADCASTSD %XMM28,%YMM21 |
0x46de2c VBROADCASTSD %XMM2,%YMM22 |
0x46de32 VMOVUPD %XMM29,0x40(%RSP) |
0x46de3a VBROADCASTSD %XMM29,%YMM23 |
0x46de40 VMOVUPD %XMM30,0x30(%RSP) |
0x46de48 VBROADCASTSD %XMM30,%YMM24 |
0x46de4e VMOVUPD %XMM25,0x70(%RSP) |
0x46de56 VBROADCASTSD %XMM25,%YMM25 |
0x46de5c VMOVUPD %YMM26,0xc0(%RSP) |
0x46de64 VPERMPD $0x55,%YMM26,%YMM26 |
0x46de6b XOR %R12D,%R12D |
0x46de6e VPBROADCASTQ 0x871c8(%RIP),%YMM27 |
0x46de78 VPBROADCASTQ 0x8d89e(%RIP),%YMM28 |
0x46de82 VMOVUPD 0xa0(%RSP),%YMM19 |
0x46de8a VMOVUPD %YMM31,0x1e0(%RSP) |
0x46de92 VMOVUPD %YMM10,0x1c0(%RSP) |
0x46de9b NOPL (%RAX,%RAX,1) |
(1366) 0x46dea0 LEA (%R9,%R12,1),%R15D |
(1366) 0x46dea4 MOVSXD %R15D,%R15 |
(1366) 0x46dea7 VMOVUPD (%RDX,%R15,8),%YMM0 |
(1366) 0x46dead VMOVUPD 0x20(%RDX,%R15,8),%YMM1 |
(1366) 0x46deb4 VMOVUPD 0x40(%RDX,%R15,8),%YMM2 |
(1366) 0x46debb VMOVUPD 0x60(%RDX,%R15,8),%YMM3 |
(1366) 0x46dec2 VSUBPD %YMM31,%YMM0,%YMM0 |
(1366) 0x46dec8 VSUBPD %YMM31,%YMM1,%YMM1 |
(1366) 0x46dece VSUBPD %YMM31,%YMM2,%YMM4 |
(1366) 0x46ded4 VSUBPD %YMM31,%YMM3,%YMM2 |
(1366) 0x46deda VMOVUPD (%R10,%R15,8),%YMM3 |
(1366) 0x46dee0 VMOVUPD 0x20(%R10,%R15,8),%YMM5 |
(1366) 0x46dee7 VMOVUPD 0x40(%R10,%R15,8),%YMM6 |
(1366) 0x46deee VMOVUPD 0x60(%R10,%R15,8),%YMM7 |
(1366) 0x46def5 VMOVUPD 0x1a0(%RSP),%YMM11 |
(1366) 0x46defe VSUBPD %YMM11,%YMM7,%YMM12 |
(1366) 0x46df03 VSUBPD %YMM11,%YMM6,%YMM13 |
(1366) 0x46df08 VSUBPD %YMM11,%YMM5,%YMM5 |
(1366) 0x46df0d VMOVUPD (%RSI,%R15,8),%YMM6 |
(1366) 0x46df13 VMOVUPD 0x20(%RSI,%R15,8),%YMM7 |
(1366) 0x46df1a VMOVUPD 0x40(%RSI,%R15,8),%YMM8 |
(1366) 0x46df21 VMOVUPD 0x60(%RSI,%R15,8),%YMM9 |
(1366) 0x46df28 VSUBPD %YMM11,%YMM3,%YMM3 |
(1366) 0x46df2d VMOVUPD 0x180(%RSP),%YMM11 |
(1366) 0x46df36 VSUBPD %YMM11,%YMM6,%YMM14 |
(1366) 0x46df3b VSUBPD %YMM11,%YMM7,%YMM15 |
(1366) 0x46df40 VMOVUPD 0x160(%RSP),%YMM7 |
(1366) 0x46df49 VMULPD %YMM2,%YMM7,%YMM6 |
(1366) 0x46df4d VMULPD %YMM4,%YMM7,%YMM30 |
(1366) 0x46df53 VMULPD %YMM1,%YMM7,%YMM31 |
(1366) 0x46df59 VSUBPD %YMM11,%YMM8,%YMM16 |
(1366) 0x46df5f VMULPD %YMM0,%YMM7,%YMM29 |
(1366) 0x46df65 VFMADD231PD %YMM3,%YMM10,%YMM29 |
(1366) 0x46df6b VFMADD231PD %YMM5,%YMM10,%YMM31 |
(1366) 0x46df71 VFMADD231PD %YMM13,%YMM10,%YMM30 |
(1366) 0x46df77 VFMADD231PD %YMM12,%YMM10,%YMM6 |
(1366) 0x46df7c VSUBPD %YMM11,%YMM9,%YMM17 |
(1366) 0x46df82 VMOVUPD 0x140(%RSP),%YMM10 |
(1366) 0x46df8b VFMADD231PD %YMM17,%YMM10,%YMM6 |
(1366) 0x46df91 VFMADD231PD %YMM16,%YMM10,%YMM30 |
(1366) 0x46df97 VMOVUPD 0x120(%RSP),%YMM7 |
(1366) 0x46dfa0 VMULPD %YMM0,%YMM7,%YMM11 |
(1366) 0x46dfa4 VMULPD %YMM1,%YMM7,%YMM8 |
(1366) 0x46dfa8 VMULPD %YMM4,%YMM7,%YMM9 |
(1366) 0x46dfac VFMADD231PD %YMM15,%YMM10,%YMM31 |
(1366) 0x46dfb2 VMULPD %YMM2,%YMM7,%YMM7 |
(1366) 0x46dfb6 VFMADD231PD %YMM12,%YMM20,%YMM7 |
(1366) 0x46dfbc VFMADD231PD %YMM13,%YMM20,%YMM9 |
(1366) 0x46dfc2 VFMADD231PD %YMM5,%YMM20,%YMM8 |
(1366) 0x46dfc8 VFMADD231PD %YMM3,%YMM20,%YMM11 |
(1366) 0x46dfce VFMADD231PD %YMM14,%YMM10,%YMM29 |
(1366) 0x46dfd4 VFMADD231PD %YMM14,%YMM21,%YMM11 |
(1366) 0x46dfda VFMADD231PD %YMM15,%YMM21,%YMM8 |
(1366) 0x46dfe0 VMULPD %YMM2,%YMM22,%YMM2 |
(1366) 0x46dfe6 VMULPD %YMM4,%YMM22,%YMM4 |
(1366) 0x46dfec VMULPD %YMM1,%YMM22,%YMM1 |
(1366) 0x46dff2 VFMADD231PD %YMM16,%YMM21,%YMM9 |
(1366) 0x46dff8 VMULPD %YMM0,%YMM22,%YMM10 |
(1366) 0x46dffe VFMADD231PD %YMM3,%YMM23,%YMM10 |
(1366) 0x46e004 VFMADD231PD %YMM5,%YMM23,%YMM1 |
(1366) 0x46e00a VFMADD231PD %YMM13,%YMM23,%YMM4 |
(1366) 0x46e010 VFMADD231PD %YMM12,%YMM23,%YMM2 |
(1366) 0x46e016 VFMADD231PD %YMM17,%YMM21,%YMM7 |
(1366) 0x46e01c VFMADD231PD %YMM17,%YMM24,%YMM2 |
(1366) 0x46e022 VFMADD231PD %YMM16,%YMM24,%YMM4 |
(1366) 0x46e028 VFMADD231PD %YMM15,%YMM24,%YMM1 |
(1366) 0x46e02e VFMADD231PD %YMM14,%YMM24,%YMM10 |
(1366) 0x46e034 VMOVDQA64 %YMM28,%YMM0 |
(1366) 0x46e03a VPTERNLOGQ $-0x8,%YMM27,%YMM29,%YMM0 |
(1366) 0x46e041 VADDPD %YMM0,%YMM29,%YMM0 |
(1366) 0x46e047 VROUNDPD $0xb,%YMM0,%YMM3 |
(1366) 0x46e04d VMOVDQA64 %YMM28,%YMM0 |
(1366) 0x46e053 VPTERNLOGQ $-0x8,%YMM27,%YMM31,%YMM0 |
(1366) 0x46e05a VADDPD %YMM0,%YMM31,%YMM0 |
(1366) 0x46e060 VMOVDQA64 %YMM28,%YMM5 |
(1366) 0x46e066 VROUNDPD $0xb,%YMM0,%YMM12 |
(1366) 0x46e06c VPTERNLOGQ $-0x8,%YMM27,%YMM30,%YMM5 |
(1366) 0x46e073 VADDPD %YMM5,%YMM30,%YMM0 |
(1366) 0x46e079 VMOVDQA64 %YMM28,%YMM5 |
(1366) 0x46e07f VPTERNLOGQ $-0x8,%YMM27,%YMM6,%YMM5 |
(1366) 0x46e086 VADDPD %YMM5,%YMM6,%YMM5 |
(1366) 0x46e08a VROUNDPD $0xb,%YMM0,%YMM13 |
(1366) 0x46e090 VROUNDPD $0xb,%YMM5,%YMM0 |
(1366) 0x46e096 VMOVDQA64 %YMM28,%YMM5 |
(1366) 0x46e09c VPTERNLOGQ $-0x8,%YMM27,%YMM7,%YMM5 |
(1366) 0x46e0a3 VADDPD %YMM5,%YMM7,%YMM5 |
(1366) 0x46e0a7 VMOVDQA64 %YMM28,%YMM14 |
(1366) 0x46e0ad VROUNDPD $0xb,%YMM5,%YMM5 |
(1366) 0x46e0b3 VPTERNLOGQ $-0x8,%YMM27,%YMM9,%YMM14 |
(1366) 0x46e0ba VADDPD %YMM14,%YMM9,%YMM14 |
(1366) 0x46e0bf VMOVDQA64 %YMM28,%YMM15 |
(1366) 0x46e0c5 VPTERNLOGQ $-0x8,%YMM27,%YMM8,%YMM15 |
(1366) 0x46e0cc VADDPD %YMM15,%YMM8,%YMM15 |
(1366) 0x46e0d1 VROUNDPD $0xb,%YMM14,%YMM14 |
(1366) 0x46e0d7 VROUNDPD $0xb,%YMM15,%YMM15 |
(1366) 0x46e0dd VMOVDQA64 %YMM28,%YMM16 |
(1366) 0x46e0e3 VPTERNLOGQ $-0x8,%YMM27,%YMM11,%YMM16 |
(1366) 0x46e0ea VADDPD %YMM16,%YMM11,%YMM16 |
(1366) 0x46e0f0 VRNDSCALEPD $0xb,%YMM16,%YMM16 |
(1366) 0x46e0f7 VSUBPD %YMM0,%YMM6,%YMM0 |
(1366) 0x46e0fb VMOVDQA64 %YMM28,%YMM6 |
(1366) 0x46e101 VPTERNLOGQ $-0x8,%YMM27,%YMM10,%YMM6 |
(1366) 0x46e108 VADDPD %YMM6,%YMM10,%YMM6 |
(1366) 0x46e10c VRNDSCALEPD $0xb,%YMM6,%YMM17 |
(1366) 0x46e113 VSUBPD %YMM13,%YMM30,%YMM30 |
(1366) 0x46e119 VSUBPD %YMM12,%YMM31,%YMM31 |
(1366) 0x46e11f VMOVDQA64 %YMM28,%YMM6 |
(1366) 0x46e125 VPTERNLOGQ $-0x8,%YMM27,%YMM1,%YMM6 |
(1366) 0x46e12c VADDPD %YMM6,%YMM1,%YMM6 |
(1366) 0x46e130 VRNDSCALEPD $0xb,%YMM6,%YMM18 |
(1366) 0x46e137 VSUBPD %YMM3,%YMM29,%YMM13 |
(1366) 0x46e13d VSUBPD %YMM16,%YMM11,%YMM11 |
(1366) 0x46e143 VMOVDQA64 %YMM28,%YMM3 |
(1366) 0x46e149 VPTERNLOGQ $-0x8,%YMM27,%YMM4,%YMM3 |
(1366) 0x46e150 VADDPD %YMM3,%YMM4,%YMM3 |
(1366) 0x46e154 VROUNDPD $0xb,%YMM3,%YMM6 |
(1366) 0x46e15a VSUBPD %YMM15,%YMM8,%YMM3 |
(1366) 0x46e15f VSUBPD %YMM14,%YMM9,%YMM12 |
(1366) 0x46e164 VMOVDQA64 %YMM28,%YMM8 |
(1366) 0x46e16a VPTERNLOGQ $-0x8,%YMM27,%YMM2,%YMM8 |
(1366) 0x46e171 VADDPD %YMM2,%YMM8,%YMM8 |
(1366) 0x46e175 VROUNDPD $0xb,%YMM8,%YMM8 |
(1366) 0x46e17b VSUBPD %YMM5,%YMM7,%YMM5 |
(1366) 0x46e17f VSUBPD %YMM8,%YMM2,%YMM29 |
(1366) 0x46e185 VSUBPD %YMM6,%YMM4,%YMM2 |
(1366) 0x46e189 VMULPD %YMM13,%YMM25,%YMM7 |
(1366) 0x46e18f VMULPD %YMM31,%YMM25,%YMM9 |
(1366) 0x46e195 VMULPD %YMM30,%YMM25,%YMM6 |
(1366) 0x46e19b VSUBPD %YMM18,%YMM1,%YMM1 |
(1366) 0x46e1a1 VMULPD %YMM0,%YMM25,%YMM8 |
(1366) 0x46e1a7 VFMADD231PD %YMM5,%YMM19,%YMM8 |
(1366) 0x46e1ad VFMADD231PD %YMM12,%YMM19,%YMM6 |
(1366) 0x46e1b3 VFMADD231PD %YMM3,%YMM19,%YMM9 |
(1366) 0x46e1b9 VFMADD231PD %YMM11,%YMM19,%YMM7 |
(1366) 0x46e1bf VSUBPD %YMM17,%YMM10,%YMM4 |
(1366) 0x46e1c5 VFMADD231PD %YMM26,%YMM4,%YMM7 |
(1366) 0x46e1cb VFMADD231PD %YMM26,%YMM1,%YMM9 |
(1366) 0x46e1d1 VFMADD231PD %YMM26,%YMM2,%YMM6 |
(1366) 0x46e1d7 VFMADD231PD %YMM26,%YMM29,%YMM8 |
(1366) 0x46e1dd VMOVUPD %YMM8,0x60(%R8,%R15,8) |
(1366) 0x46e1e4 VMOVUPD %YMM6,0x40(%R8,%R15,8) |
(1366) 0x46e1eb VMOVUPD %YMM9,0x20(%R8,%R15,8) |
(1366) 0x46e1f2 VMOVUPD %YMM7,(%R8,%R15,8) |
(1366) 0x46e1f8 VBROADCASTSD 0x18(%RDI),%YMM10 |
(1366) 0x46e1fe VMULPD %YMM0,%YMM10,%YMM14 |
(1366) 0x46e202 VMULPD %YMM13,%YMM10,%YMM15 |
(1366) 0x46e207 VMULPD %YMM31,%YMM10,%YMM16 |
(1366) 0x46e20d VBROADCASTSD 0x20(%RDI),%YMM17 |
(1366) 0x46e214 VMULPD %YMM30,%YMM10,%YMM10 |
(1366) 0x46e21a VFMADD231PD %YMM12,%YMM17,%YMM10 |
(1366) 0x46e220 VFMADD231PD %YMM3,%YMM17,%YMM16 |
(1366) 0x46e226 VFMADD231PD %YMM11,%YMM17,%YMM15 |
(1366) 0x46e22c VFMADD231PD %YMM17,%YMM5,%YMM14 |
(1366) 0x46e232 VBROADCASTSD 0x28(%RDI),%YMM17 |
(1366) 0x46e239 VFMADD231PD %YMM29,%YMM17,%YMM14 |
(1366) 0x46e23f VFMADD231PD %YMM1,%YMM17,%YMM16 |
(1366) 0x46e245 VFMADD231PD %YMM17,%YMM2,%YMM10 |
(1366) 0x46e24b VMOVUPD %YMM10,0x40(%RBX,%R15,8) |
(1366) 0x46e252 VMOVUPD %YMM16,0x20(%RBX,%R15,8) |
(1366) 0x46e25a VFMADD231PD %YMM4,%YMM17,%YMM15 |
(1366) 0x46e260 VMOVUPD %YMM15,(%RBX,%R15,8) |
(1366) 0x46e266 VMOVUPD %YMM14,0x60(%RBX,%R15,8) |
(1366) 0x46e26d VBROADCASTSD 0x30(%RDI),%YMM17 |
(1366) 0x46e274 VMULPD %YMM0,%YMM17,%YMM0 |
(1366) 0x46e27a VMULPD %YMM13,%YMM17,%YMM13 |
(1366) 0x46e280 VMULPD %YMM31,%YMM17,%YMM18 |
(1366) 0x46e286 VMOVUPD 0x1e0(%RSP),%YMM31 |
(1366) 0x46e28e VMULPD %YMM30,%YMM17,%YMM17 |
(1366) 0x46e294 VBROADCASTSD 0x38(%RDI),%YMM30 |
(1366) 0x46e29b VFMADD231PD %YMM12,%YMM30,%YMM17 |
(1366) 0x46e2a1 VFMADD231PD %YMM3,%YMM30,%YMM18 |
(1366) 0x46e2a7 VFMADD231PD %YMM11,%YMM30,%YMM13 |
(1366) 0x46e2ad VFMADD231PD %YMM5,%YMM30,%YMM0 |
(1366) 0x46e2b3 VBROADCASTSD 0x40(%RDI),%YMM3 |
(1366) 0x46e2b9 VFMADD231PD %YMM29,%YMM3,%YMM0 |
(1366) 0x46e2bf VFMADD231PD %YMM4,%YMM3,%YMM13 |
(1366) 0x46e2c4 VFMADD231PD %YMM1,%YMM3,%YMM18 |
(1366) 0x46e2ca VFMADD231PD %YMM2,%YMM3,%YMM17 |
(1366) 0x46e2d0 VMULPD %YMM9,%YMM9,%YMM1 |
(1366) 0x46e2d5 VFMADD231PD %YMM16,%YMM16,%YMM1 |
(1366) 0x46e2db VMULPD %YMM7,%YMM7,%YMM2 |
(1366) 0x46e2df VFMADD231PD %YMM15,%YMM15,%YMM2 |
(1366) 0x46e2e4 VMULPD %YMM8,%YMM8,%YMM3 |
(1366) 0x46e2e9 VFMADD231PD %YMM14,%YMM14,%YMM3 |
(1366) 0x46e2ee VMULPD %YMM6,%YMM6,%YMM4 |
(1366) 0x46e2f2 VFMADD231PD %YMM10,%YMM10,%YMM4 |
(1366) 0x46e2f7 VMOVUPD 0x1c0(%RSP),%YMM10 |
(1366) 0x46e300 VMOVUPD %YMM17,0x40(%R11,%R15,8) |
(1366) 0x46e308 VFMADD231PD %YMM17,%YMM17,%YMM4 |
(1366) 0x46e30e VMOVUPD %YMM0,0x60(%R11,%R15,8) |
(1366) 0x46e315 VFMADD231PD %YMM0,%YMM0,%YMM3 |
(1366) 0x46e31a VMOVUPD %YMM13,(%R11,%R15,8) |
(1366) 0x46e320 VFMADD231PD %YMM13,%YMM13,%YMM2 |
(1366) 0x46e325 VMOVUPD %YMM18,0x20(%R11,%R15,8) |
(1366) 0x46e32d VSQRTPD %YMM4,%YMM0 |
(1366) 0x46e331 VMOVUPD %YMM0,0x40(%RCX,%R15,8) |
(1366) 0x46e338 VSQRTPD %YMM3,%YMM0 |
(1366) 0x46e33c VMOVUPD %YMM0,0x60(%RCX,%R15,8) |
(1366) 0x46e343 VSQRTPD %YMM2,%YMM0 |
(1366) 0x46e347 VMOVUPD %YMM0,(%RCX,%R15,8) |
(1366) 0x46e34d VFMADD231PD %YMM18,%YMM18,%YMM1 |
(1366) 0x46e353 VSQRTPD %YMM1,%YMM0 |
(1366) 0x46e357 VMOVUPD %YMM0,0x20(%RCX,%R15,8) |
(1366) 0x46e35e ADD $0x10,%R12D |
(1366) 0x46e362 CMP %R13D,%R12D |
(1366) 0x46e365 JBE 46dea0 |
0x46e36b MOV 0x8(%RSP),%R15 |
0x46e370 CMP %R14D,%R15D |
0x46e373 VMOVUPD 0x100(%RSP),%YMM14 |
0x46e37c VMOVUPD 0x90(%RSP),%XMM20 |
0x46e384 VMOVUPD 0x80(%RSP),%XMM21 |
0x46e38c VMOVUPD 0xe0(%RSP),%YMM22 |
0x46e394 VMOVUPD 0x70(%RSP),%XMM25 |
0x46e39c VMOVUPD 0xc0(%RSP),%YMM26 |
0x46e3a4 MOV 0x4(%RSP),%R13D |
0x46e3a9 VMOVUPD 0x60(%RSP),%XMM23 |
0x46e3b1 VMOVUPD 0x20(%RSP),%XMM3 |
0x46e3b7 VMOVUPD 0x10(%RSP),%XMM2 |
0x46e3bd VMOVUPD 0x50(%RSP),%XMM28 |
0x46e3c5 VMOVUPD 0x40(%RSP),%XMM29 |
0x46e3cd VMOVUPD 0x30(%RSP),%XMM30 |
0x46e3d5 JNE 46e3f0 |
0x46e3d7 CMP %R15D,%R13D |
0x46e3da JNE 46e593 |
0x46e3e0 JMP 46e6d3 |
0x46e3e5 XOR %R14D,%R14D |
0x46e3e8 JMP 46e593 |
0x46e3ed XOR %R14D,%R14D |
0x46e3f0 DEC %R15D |
0x46e3f3 VPERMPD $0x55,%YMM14,%YMM6 |
0x46e3f9 VBROADCASTSD %XMM20,%YMM7 |
0x46e3ff VBROADCASTSD %XMM21,%YMM8 |
0x46e405 VPERMPD $0x55,%YMM22,%YMM9 |
0x46e40c VBROADCASTSD %XMM23,%YMM27 |
0x46e412 VBROADCASTSD %XMM3,%YMM11 |
0x46e417 VBROADCASTSD %XMM28,%YMM15 |
0x46e41d VBROADCASTSD %XMM2,%YMM16 |
0x46e423 VBROADCASTSD %XMM29,%YMM17 |
0x46e429 VBROADCASTSD %XMM30,%YMM1 |
0x46e42f VBROADCASTSD %XMM25,%YMM18 |
0x46e435 VPERMPD $0x55,%YMM26,%YMM19 |
0x46e43c VPBROADCASTQ 0x86bfb(%RIP),%YMM0 |
0x46e445 VPBROADCASTQ 0x8d2d2(%RIP),%YMM2 |
0x46e44e VMOVUPD 0xa0(%RSP),%YMM24 |
0x46e456 NOPW %CS:(%RAX,%RAX,1) |
(1365) 0x46e460 LEA (%R9,%R14,1),%R12D |
(1365) 0x46e464 MOVSXD %R12D,%R12 |
(1365) 0x46e467 VMOVUPD (%RDX,%R12,8),%YMM3 |
(1365) 0x46e46d VSUBPD %YMM31,%YMM3,%YMM3 |
(1365) 0x46e473 VMOVUPD (%R10,%R12,8),%YMM4 |
(1365) 0x46e479 VMOVUPD (%RSI,%R12,8),%YMM5 |
(1365) 0x46e47f VSUBPD %YMM6,%YMM4,%YMM4 |
(1365) 0x46e483 VSUBPD %YMM7,%YMM5,%YMM5 |
(1365) 0x46e487 VMULPD %YMM3,%YMM8,%YMM12 |
(1365) 0x46e48b VFMADD231PD %YMM4,%YMM10,%YMM12 |
(1365) 0x46e490 VMULPD %YMM3,%YMM27,%YMM13 |
(1365) 0x46e496 VFMADD231PD %YMM4,%YMM11,%YMM13 |
(1365) 0x46e49b VFMADD231PD %YMM5,%YMM9,%YMM12 |
(1365) 0x46e4a0 VFMADD231PD %YMM5,%YMM15,%YMM13 |
(1365) 0x46e4a5 VMULPD %YMM3,%YMM16,%YMM3 |
(1365) 0x46e4ab VFMADD231PD %YMM4,%YMM17,%YMM3 |
(1365) 0x46e4b1 VMOVDQA %YMM2,%YMM4 |
(1365) 0x46e4b5 VPTERNLOGQ $-0x8,%YMM0,%YMM12,%YMM4 |
(1365) 0x46e4bc VADDPD %YMM4,%YMM12,%YMM4 |
(1365) 0x46e4c0 VFMADD231PD %YMM5,%YMM1,%YMM3 |
(1365) 0x46e4c5 VROUNDPD $0xb,%YMM4,%YMM4 |
(1365) 0x46e4cb VMOVDQA %YMM2,%YMM5 |
(1365) 0x46e4cf VPTERNLOGQ $-0x8,%YMM0,%YMM13,%YMM5 |
(1365) 0x46e4d6 VADDPD %YMM5,%YMM13,%YMM5 |
(1365) 0x46e4da VROUNDPD $0xb,%YMM5,%YMM5 |
(1365) 0x46e4e0 VSUBPD %YMM4,%YMM12,%YMM4 |
(1365) 0x46e4e4 VMOVDQA %YMM2,%YMM12 |
(1365) 0x46e4e8 VPTERNLOGQ $-0x8,%YMM0,%YMM3,%YMM12 |
(1365) 0x46e4ef VADDPD %YMM3,%YMM12,%YMM12 |
(1365) 0x46e4f3 VROUNDPD $0xb,%YMM12,%YMM12 |
(1365) 0x46e4f9 VSUBPD %YMM5,%YMM13,%YMM5 |
(1365) 0x46e4fd VSUBPD %YMM12,%YMM3,%YMM3 |
(1365) 0x46e502 VMULPD %YMM4,%YMM18,%YMM12 |
(1365) 0x46e508 VFMADD231PD %YMM5,%YMM24,%YMM12 |
(1365) 0x46e50e VFMADD231PD %YMM19,%YMM3,%YMM12 |
(1365) 0x46e514 VMOVUPD %YMM12,(%R8,%R12,8) |
(1365) 0x46e51a VMULPD 0x18(%RDI){1to4},%YMM4,%YMM13 |
(1365) 0x46e521 VFMADD231PD 0x20(%RDI){1to4},%YMM5,%YMM13 |
(1365) 0x46e528 VFMADD231PD 0x28(%RDI){1to4},%YMM3,%YMM13 |
(1365) 0x46e52f VMOVUPD %YMM13,(%RBX,%R12,8) |
(1365) 0x46e535 VMULPD 0x30(%RDI){1to4},%YMM4,%YMM4 |
(1365) 0x46e53c VFMADD231PD 0x38(%RDI){1to4},%YMM5,%YMM4 |
(1365) 0x46e543 VFMADD231PD 0x40(%RDI){1to4},%YMM3,%YMM4 |
(1365) 0x46e54a VMULPD %YMM12,%YMM12,%YMM3 |
(1365) 0x46e54f VFMADD231PD %YMM13,%YMM13,%YMM3 |
(1365) 0x46e554 VMOVUPD %YMM4,(%R11,%R12,8) |
(1365) 0x46e55a VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1365) 0x46e55f VSQRTPD %YMM3,%YMM3 |
(1365) 0x46e563 VMOVUPD %YMM3,(%RCX,%R12,8) |
(1365) 0x46e569 ADD $0x4,%R14D |
(1365) 0x46e56d CMP %R15D,%R14D |
(1365) 0x46e570 JBE 46e460 |
0x46e576 MOV 0x8(%RSP),%R15 |
0x46e57b MOV %R15D,%R14D |
0x46e57e VMOVUPD 0x10(%RSP),%XMM2 |
0x46e584 VMOVUPD 0x20(%RSP),%XMM3 |
0x46e58a CMP %R15D,%R13D |
0x46e58d JE 46e6d3 |
0x46e593 VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 |
0x46e599 VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 |
0x46e59d VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 |
0x46e5a3 ADD %R9D,%R14D |
0x46e5a6 VMOVQ 0x8d172(%RIP),%XMM2 |
0x46e5ae VPBROADCASTQ 0x87ce1(%RIP),%XMM4 |
0x46e5b7 VPBROADCASTQ 0x8d160(%RIP),%XMM7 |
0x46e5c0 VPBROADCASTQ 0x86a77(%RIP),%XMM8 |
0x46e5c9 NOPL (%RAX) |
(1364) 0x46e5d0 MOVSXD %R14D,%R14 |
(1364) 0x46e5d3 VMOVSD (%RSI,%R14,8),%XMM3 |
(1364) 0x46e5d9 VMOVSD (%RDX,%R14,8),%XMM5 |
(1364) 0x46e5df VMOVHPD (%R10,%R14,8),%XMM5,%XMM5 |
(1364) 0x46e5e5 VSUBSD %XMM20,%XMM3,%XMM3 |
(1364) 0x46e5eb VSUBPD %XMM14,%XMM5,%XMM5 |
(1364) 0x46e5f0 VSHUFPD $0x1,%XMM5,%XMM5,%XMM9 |
(1364) 0x46e5f5 VMULPD %XMM6,%XMM9,%XMM9 |
(1364) 0x46e5f9 VMOVDDUP %XMM3,%XMM3 |
(1364) 0x46e5fd VPUNPCKHQDQ %XMM3,%XMM5,%XMM10 |
(1364) 0x46e601 VMULPD %XMM10,%XMM22,%XMM10 |
(1364) 0x46e607 VFMADD231PD %XMM5,%XMM1,%XMM9 |
(1364) 0x46e60c VFMADD213SD %XMM10,%XMM21,%XMM5 |
(1364) 0x46e612 VSHUFPD $0x1,%XMM10,%XMM10,%XMM10 |
(1364) 0x46e618 VADDSD %XMM5,%XMM10,%XMM5 |
(1364) 0x46e61c VFMADD231PD %XMM3,%XMM0,%XMM9 |
(1364) 0x46e621 VMOVAPD %XMM5,%XMM3 |
(1364) 0x46e625 VPTERNLOGQ $-0x28,%XMM4,%XMM2,%XMM3 |
(1364) 0x46e62c VADDSD %XMM3,%XMM5,%XMM3 |
(1364) 0x46e630 VROUNDSD $0xb,%XMM3,%XMM3,%XMM3 |
(1364) 0x46e636 VSUBSD %XMM3,%XMM5,%XMM3 |
(1364) 0x46e63a VMOVDQA %XMM7,%XMM5 |
(1364) 0x46e63e VPTERNLOGQ $-0x8,%XMM8,%XMM9,%XMM5 |
(1364) 0x46e645 VADDPD %XMM5,%XMM9,%XMM5 |
(1364) 0x46e649 VROUNDPD $0xb,%XMM5,%XMM5 |
(1364) 0x46e64f VSUBPD %XMM5,%XMM9,%XMM5 |
(1364) 0x46e653 VMULPD %XMM5,%XMM26,%XMM9 |
(1364) 0x46e659 VMOVAPD %XMM3,%XMM10 |
(1364) 0x46e65d VFMADD213SD %XMM9,%XMM25,%XMM10 |
(1364) 0x46e663 VSHUFPD $0x1,%XMM9,%XMM9,%XMM9 |
(1364) 0x46e669 VADDSD %XMM9,%XMM10,%XMM9 |
(1364) 0x46e66e VMOVSD %XMM9,(%R8,%R14,8) |
(1364) 0x46e674 VMULSD 0x18(%RDI),%XMM3,%XMM10 |
(1364) 0x46e679 VFMADD231SD 0x20(%RDI),%XMM5,%XMM10 |
(1364) 0x46e67f VSHUFPD $0x1,%XMM5,%XMM5,%XMM11 |
(1364) 0x46e684 VFMADD132SD 0x28(%RDI),%XMM10,%XMM11 |
(1364) 0x46e68a VMOVSD %XMM11,(%RBX,%R14,8) |
(1364) 0x46e690 VMULPD 0x38(%RDI),%XMM5,%XMM5 |
(1364) 0x46e695 VMOVAPD %XMM5,%XMM10 |
(1364) 0x46e699 VFMADD231SD 0x30(%RDI),%XMM3,%XMM10 |
(1364) 0x46e69f VSHUFPD $0x1,%XMM5,%XMM5,%XMM3 |
(1364) 0x46e6a4 VADDSD %XMM3,%XMM10,%XMM3 |
(1364) 0x46e6a8 VMOVSD %XMM3,(%R11,%R14,8) |
(1364) 0x46e6ae VMULSD %XMM9,%XMM9,%XMM5 |
(1364) 0x46e6b3 VFMADD231SD %XMM11,%XMM11,%XMM5 |
(1364) 0x46e6b8 VFMADD231SD %XMM3,%XMM3,%XMM5 |
(1364) 0x46e6bd VSQRTSD %XMM5,%XMM5,%XMM3 |
(1364) 0x46e6c1 VMOVSD %XMM3,(%RCX,%R14,8) |
(1364) 0x46e6c7 INC %R14D |
(1364) 0x46e6ca CMP %R14D,%EAX |
(1364) 0x46e6cd JNE 46e5d0 |
0x46e6d3 LEA -0x28(%RBP),%RSP |
0x46e6d7 POP %RBX |
0x46e6d8 POP %R12 |
0x46e6da POP %R13 |
0x46e6dc POP %R14 |
0x46e6de POP %R15 |
0x46e6e0 POP %RBP |
0x46e6e1 VZEROUPPER |
0x46e6e4 RET |
0x46e6e5 NOPW %CS:(%RAX,%RAX,1) |
0x46e6ef NOP |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 151 |
nb uops | 152 |
loop length | 860 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 18 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 25.33 cycles |
front end | 25.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
cycles | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.16 |
ROB full (events) | 3.05 |
Front-end | 25.33 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 5% |
all | 61% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 46% |
load | 48% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
all | 10% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 27% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 22% |
load | 21% |
store | 36% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x220,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 46e3e5 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x735> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 46e3ed <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x871c8(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d89e(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x4(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 46e3f0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 46e593 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 46e593 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DEC %R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM26,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x86bfb(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d2d2(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x8d172(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x87ce1(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d160(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x86a77(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 151 |
nb uops | 152 |
loop length | 860 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 18 |
used ymm registers | 25 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 25.33 cycles |
front end | 25.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
cycles | 6.30 | 6.20 | 17.00 | 17.00 | 14.00 | 27.00 | 6.30 | 14.00 | 14.00 | 14.00 | 6.20 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.16 |
Stall cycles | 2.16 |
ROB full (events) | 3.05 |
Front-end | 25.33 |
Dispatch | 27.00 |
Overall L1 | 27.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 5% |
all | 61% |
load | 67% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 27% |
all | 46% |
load | 48% |
store | 90% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
all | 10% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 27% |
load | 25% |
store | 38% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 22% |
load | 21% |
store | 36% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x220,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RSI),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RSI),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R11,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x48(%RDI),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x50(%RDI),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x60(%RDI),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%RDI),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%RDI),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%RDI),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%RDI),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%RDI),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RDI),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x8(%RDI),%XMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R13D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 46e3e5 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x735> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VBROADCASTSD %XMM14,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM26,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
AND $-0x10,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %XMM3,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM2,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 46e3ed <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x73d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13D,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVUPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM14,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM20,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM20,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM21,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM22,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPERMPD $0x55,%YMM22,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %XMM23,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM23,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM28,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM28,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM29,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM30,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM30,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %XMM25,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %YMM26,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPERMPD $0x55,%YMM26,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x871c8(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d89e(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x90(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%XMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xe0(%RSP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x70(%RSP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0xc0(%RSP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x4(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0x60(%RSP),%XMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x50(%RSP),%XMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x40(%RSP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x30(%RSP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 46e3f0 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x740> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 46e593 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 46e593 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x8e3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DEC %R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPERMPD $0x55,%YMM14,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM22,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM23,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM28,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM29,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM30,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM25,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMPD $0x55,%YMM26,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x86bfb(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d2d2(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVUPD 0xa0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD 0x10(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
CMP %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 46e6d3 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0xa23> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPUNPCKLQDQ %XMM29,%XMM23,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM2,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ %XMM30,%XMM28,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVQ 0x8d172(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x87ce1(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8d160(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x86a77(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii– | 2.23 | 1.61 |
○Loop 1366 - ParticleBConds3DSoa.h:234-255 - exec | 2.21 | 1.55 |
○Loop 1365 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |
○Loop 1364 - ParticleBConds3DSoa.h:235-255 - exec | 0 | 0 |