Function: _ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE | Module: libqmcparticle_omptarget.so | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage: 0.06% |
---|
Function: _ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE | Module: libqmcparticle_omptarget.so | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage: 0.06% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/SoaDistanceTableAAOMPTarget.h: 179 - 187 |
-------------------------------------------------------------------------------- |
179: inline void evaluate(ParticleSet& P) override |
180: { |
181: ScopedTimer local_timer(evaluate_timer_); |
182: |
183: constexpr T BigR = std::numeric_limits<T>::max(); |
184: for (int iat = 1; iat < num_targets_; ++iat) |
185: DTD_BConds<T, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), |
186: displacements_[iat], 0, iat, iat); |
187: } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_vector.h: 1126 - 1126 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 222 - 255 |
-------------------------------------------------------------------------------- |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
0x25b00 PUSH %RBP |
0x25b01 MOV %RSP,%RBP |
0x25b04 PUSH %R15 |
0x25b06 PUSH %R14 |
0x25b08 PUSH %R13 |
0x25b0a MOV $0x1,%R13D |
0x25b10 PUSH %R12 |
0x25b12 PUSH %RBX |
0x25b13 MOV %RDI,%RBX |
0x25b16 AND $-0x20,%RSP |
0x25b1a SUB $0x20,%RSP |
0x25b1e MOV 0x278(%RDI),%RDI |
0x25b25 MOV %RSI,0x10(%RSP) |
0x25b2a MOV %RDI,0x8(%RSP) |
0x25b2f CALL 8250 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x25b34 CMPQ $0x1,0x18(%RBX) |
0x25b39 JBE 26049 |
0x25b3f NOP |
(257) 0x25b40 MOV 0x10(%RSP),%R15 |
(257) 0x25b45 MOV 0x60(%RBX),%R14 |
(257) 0x25b49 LEA (%R13,%R13,4),%RAX |
(257) 0x25b4e MOV 0x48(%RBX),%RDX |
(257) 0x25b52 SAL $0x3,%RAX |
(257) 0x25b56 MOV 0x278(%R15),%RDI |
(257) 0x25b5d ADD %RAX,%R14 |
(257) 0x25b60 MOV 0x18(%RDX,%RAX,1),%R12 |
(257) 0x25b65 MOV (%RDI),%RAX |
(257) 0x25b68 CALLQ 0x48(%RAX) |
(257) 0x25b6b MOV 0x40(%R15),%RCX |
(257) 0x25b6f LEA (%R13,%R13,2),%RSI |
(257) 0x25b74 MOV 0x8(%RAX),%RDI |
(257) 0x25b78 LEA -0x1(%R13),%R10D |
(257) 0x25b7c MOV 0x18(%RAX),%R9 |
(257) 0x25b80 MOV %R13D,%R11D |
(257) 0x25b83 LEA (%RCX,%RSI,8),%R8 |
(257) 0x25b87 MOV 0x8(%R14),%RSI |
(257) 0x25b8b MOV 0x18(%R14),%RCX |
(257) 0x25b8f SAL $0x3,%RDI |
(257) 0x25b93 VMOVSD (%R8),%XMM4 |
(257) 0x25b98 VMOVSD 0x8(%R8),%XMM2 |
(257) 0x25b9e SAL $0x3,%RSI |
(257) 0x25ba2 VMOVSD 0x10(%R8),%XMM1 |
(257) 0x25ba8 LEA (%R9,%RDI,1),%R8 |
(257) 0x25bac LEA (%RCX,%RSI,1),%RDX |
(257) 0x25bb0 ADD %R8,%RDI |
(257) 0x25bb3 ADD %RDX,%RSI |
(257) 0x25bb6 CMP $0x2,%R10D |
(257) 0x25bba JBE 26070 |
(257) 0x25bc0 MOV %R13D,%R14D |
(257) 0x25bc3 VBROADCASTSD 0x2e384(%RIP),%YMM6 |
(257) 0x25bcc VBROADCASTSD 0x2e36b(%RIP),%YMM5 |
(257) 0x25bd5 VBROADCASTSD %XMM4,%YMM9 |
(257) 0x25bda SHR $0x2,%R14D |
(257) 0x25bde VBROADCASTSD %XMM2,%YMM8 |
(257) 0x25be3 VBROADCASTSD %XMM1,%YMM7 |
(257) 0x25be8 XOR %R15D,%R15D |
(257) 0x25beb SAL $0x5,%R14 |
(257) 0x25bef NOP |
(258) 0x25bf0 VMOVAPD (%R8,%R15,1),%YMM10 |
(258) 0x25bf6 VMOVAPD (%R9,%R15,1),%YMM0 |
(258) 0x25bfc VMOVAPD %YMM6,%YMM13 |
(258) 0x25c00 VMOVAPD (%RDI,%R15,1),%YMM11 |
(258) 0x25c06 VSUBPD %YMM8,%YMM10,%YMM14 |
(258) 0x25c0b VSUBPD %YMM9,%YMM0,%YMM3 |
(258) 0x25c10 VSUBPD %YMM7,%YMM11,%YMM0 |
(258) 0x25c14 VMULPD 0x168(%RBX){1to4},%YMM14,%YMM15 |
(258) 0x25c1b VMULPD 0x198(%RBX){1to4},%YMM14,%YMM12 |
(258) 0x25c22 VMULPD 0x180(%RBX){1to4},%YMM14,%YMM11 |
(258) 0x25c29 VFMADD231PD 0x160(%RBX){1to4},%YMM3,%YMM15 |
(258) 0x25c30 VFMADD231PD 0x178(%RBX){1to4},%YMM3,%YMM11 |
(258) 0x25c37 VFMADD132PD 0x190(%RBX){1to4},%YMM12,%YMM3 |
(258) 0x25c3e VFMADD231PD 0x170(%RBX){1to4},%YMM0,%YMM15 |
(258) 0x25c45 VFMADD231PD 0x188(%RBX){1to4},%YMM0,%YMM11 |
(258) 0x25c4c VFMADD132PD 0x1a0(%RBX){1to4},%YMM3,%YMM0 |
(258) 0x25c53 VPTERNLOGQ $-0x8,%YMM5,%YMM15,%YMM13 |
(258) 0x25c5a VADDPD %YMM13,%YMM15,%YMM3 |
(258) 0x25c5f VRNDSCALEPD $0x3,%YMM3,%YMM10 |
(258) 0x25c66 VMOVAPD %YMM6,%YMM3 |
(258) 0x25c6a VPTERNLOGQ $-0x8,%YMM5,%YMM0,%YMM3 |
(258) 0x25c71 VSUBPD %YMM10,%YMM15,%YMM14 |
(258) 0x25c76 VMOVAPD %YMM6,%YMM15 |
(258) 0x25c7a VADDPD %YMM3,%YMM0,%YMM10 |
(258) 0x25c7e VPTERNLOGQ $-0x8,%YMM5,%YMM11,%YMM15 |
(258) 0x25c85 VADDPD %YMM15,%YMM11,%YMM12 |
(258) 0x25c8a VRNDSCALEPD $0x3,%YMM10,%YMM15 |
(258) 0x25c91 VRNDSCALEPD $0x3,%YMM12,%YMM13 |
(258) 0x25c98 VSUBPD %YMM15,%YMM0,%YMM0 |
(258) 0x25c9d VSUBPD %YMM13,%YMM11,%YMM11 |
(258) 0x25ca2 VMULPD 0x120(%RBX){1to4},%YMM11,%YMM12 |
(258) 0x25ca9 VFMADD231PD 0x118(%RBX){1to4},%YMM14,%YMM12 |
(258) 0x25cb0 VFMADD231PD 0x128(%RBX){1to4},%YMM0,%YMM12 |
(258) 0x25cb7 VMOVAPD %YMM12,(%RCX,%R15,1) |
(258) 0x25cbd VMULPD 0x138(%RBX){1to4},%YMM11,%YMM13 |
(258) 0x25cc4 VFMADD231PD 0x130(%RBX){1to4},%YMM14,%YMM13 |
(258) 0x25ccb VFMADD231PD 0x140(%RBX){1to4},%YMM0,%YMM13 |
(258) 0x25cd2 VMOVAPD %YMM13,(%RDX,%R15,1) |
(258) 0x25cd8 VMULPD 0x150(%RBX){1to4},%YMM11,%YMM11 |
(258) 0x25cdf VFMADD231PD 0x148(%RBX){1to4},%YMM14,%YMM11 |
(258) 0x25ce6 VFMADD132PD 0x158(%RBX){1to4},%YMM11,%YMM0 |
(258) 0x25ced VMOVAPD %YMM0,(%RSI,%R15,1) |
(258) 0x25cf3 VMOVAPD (%RDX,%R15,1),%YMM3 |
(258) 0x25cf9 VMOVAPD (%RCX,%R15,1),%YMM14 |
(258) 0x25cff VMULPD %YMM3,%YMM3,%YMM10 |
(258) 0x25d03 VFMADD132PD %YMM14,%YMM10,%YMM14 |
(258) 0x25d08 VFMADD132PD %YMM0,%YMM14,%YMM0 |
(258) 0x25d0d VSQRTPD %YMM0,%YMM15 |
(258) 0x25d11 VMOVAPD %YMM15,(%R12,%R15,1) |
(258) 0x25d17 ADD $0x20,%R15 |
(258) 0x25d1b CMP %R15,%R14 |
(258) 0x25d1e JNE 25bf0 |
(257) 0x25d24 MOV %R11D,%R10D |
(257) 0x25d27 AND $-0x4,%R10D |
(257) 0x25d2b MOV %R10D,%EAX |
(257) 0x25d2e TEST $0x3,%R11B |
(257) 0x25d32 JE 26068 |
(257) 0x25d38 VZEROUPPER |
(257) 0x25d3b SUB %R10D,%R11D |
(257) 0x25d3e MOV %R11D,0x1c(%RSP) |
(257) 0x25d43 CMP $0x1,%R11D |
(257) 0x25d47 JE 25edd |
(257) 0x25d4d VMOVAPD (%R8,%R10,8),%XMM5 |
(257) 0x25d53 VMOVAPD (%R9,%R10,8),%XMM6 |
(257) 0x25d59 VMOVDDUP %XMM2,%XMM0 |
(257) 0x25d5d VMOVDDUP %XMM4,%XMM7 |
(257) 0x25d61 VMOVDDUP 0x138(%RBX),%XMM14 |
(257) 0x25d69 VMOVDDUP 0x130(%RBX),%XMM9 |
(257) 0x25d71 VMOVDDUP %XMM1,%XMM3 |
(257) 0x25d75 VMOVDDUP 0x150(%RBX),%XMM13 |
(257) 0x25d7d VMOVDDUP 0x140(%RBX),%XMM8 |
(257) 0x25d85 VMOVDDUP 0x148(%RBX),%XMM12 |
(257) 0x25d8d VMOVDDUP 0x158(%RBX),%XMM11 |
(257) 0x25d95 LEA (,%R10,8),%R11 |
(257) 0x25d9d VSUBPD %XMM0,%XMM5,%XMM15 |
(257) 0x25da1 VSUBPD %XMM7,%XMM6,%XMM10 |
(257) 0x25da5 VMOVAPD (%RDI,%R10,8),%XMM7 |
(257) 0x25dab LEA (%RCX,%R11,1),%R15 |
(257) 0x25daf LEA (%RDX,%R11,1),%R14 |
(257) 0x25db3 VSUBPD %XMM3,%XMM7,%XMM0 |
(257) 0x25db7 VMULPD 0x168(%RBX){1to2},%XMM15,%XMM3 |
(257) 0x25dbe VMULPD 0x180(%RBX){1to2},%XMM15,%XMM5 |
(257) 0x25dc5 VMULPD 0x198(%RBX){1to2},%XMM15,%XMM6 |
(257) 0x25dcc VFMADD231PD 0x160(%RBX){1to2},%XMM10,%XMM3 |
(257) 0x25dd3 VFMADD231PD 0x178(%RBX){1to2},%XMM10,%XMM5 |
(257) 0x25dda VFMADD132PD 0x190(%RBX){1to2},%XMM6,%XMM10 |
(257) 0x25de1 VMOVDDUP 0x2e167(%RIP),%XMM6 |
(257) 0x25de9 VFMADD231PD 0x170(%RBX){1to2},%XMM0,%XMM3 |
(257) 0x25df0 VFMADD231PD 0x188(%RBX){1to2},%XMM0,%XMM5 |
(257) 0x25df7 VFMADD132PD 0x1a0(%RBX){1to2},%XMM10,%XMM0 |
(257) 0x25dfe VPTERNLOGQ $-0x8,0x2e137(%RIP),%XMM3,%XMM6 |
(257) 0x25e09 VADDPD %XMM6,%XMM3,%XMM10 |
(257) 0x25e0d VMOVDDUP 0x2e13b(%RIP),%XMM6 |
(257) 0x25e15 VPTERNLOGQ $-0x8,0x2e120(%RIP),%XMM5,%XMM6 |
(257) 0x25e20 VADDPD %XMM6,%XMM5,%XMM7 |
(257) 0x25e24 VMOVDDUP 0x2e124(%RIP),%XMM6 |
(257) 0x25e2c VPTERNLOGQ $-0x8,0x2e109(%RIP),%XMM0,%XMM6 |
(257) 0x25e37 VRNDSCALEPD $0x3,%XMM10,%XMM15 |
(257) 0x25e3e VRNDSCALEPD $0x3,%XMM7,%XMM10 |
(257) 0x25e45 VSUBPD %XMM15,%XMM3,%XMM3 |
(257) 0x25e4a VADDPD %XMM6,%XMM0,%XMM15 |
(257) 0x25e4e VSUBPD %XMM10,%XMM5,%XMM5 |
(257) 0x25e53 VRNDSCALEPD $0x3,%XMM15,%XMM7 |
(257) 0x25e5a VMULPD %XMM14,%XMM5,%XMM14 |
(257) 0x25e5f VMULPD 0x120(%RBX){1to2},%XMM5,%XMM10 |
(257) 0x25e66 VFMADD132PD %XMM3,%XMM14,%XMM9 |
(257) 0x25e6b VSUBPD %XMM7,%XMM0,%XMM0 |
(257) 0x25e6f VFMADD231PD 0x118(%RBX){1to2},%XMM3,%XMM10 |
(257) 0x25e76 VFMADD132PD %XMM0,%XMM9,%XMM8 |
(257) 0x25e7b VMULPD %XMM13,%XMM5,%XMM9 |
(257) 0x25e80 VFMADD231PD 0x128(%RBX){1to2},%XMM0,%XMM10 |
(257) 0x25e87 VFMADD132PD %XMM12,%XMM9,%XMM3 |
(257) 0x25e8c VMOVAPD %XMM10,(%R15) |
(257) 0x25e91 VMOVAPD %XMM8,(%R14) |
(257) 0x25e96 VFMADD132PD %XMM11,%XMM3,%XMM0 |
(257) 0x25e9b VMOVAPD %XMM0,(%RSI,%R11,1) |
(257) 0x25ea1 VMOVAPD (%R14),%XMM12 |
(257) 0x25ea6 VMOVAPD (%R15),%XMM8 |
(257) 0x25eab VMULPD %XMM12,%XMM12,%XMM13 |
(257) 0x25eb0 VFMADD132PD %XMM8,%XMM13,%XMM8 |
(257) 0x25eb5 VFMADD132PD %XMM0,%XMM8,%XMM0 |
(257) 0x25eba VSQRTPD %XMM0,%XMM11 |
(257) 0x25ebe VMOVAPD %XMM11,(%R12,%R11,1) |
(257) 0x25ec4 MOV 0x1c(%RSP),%R10D |
(257) 0x25ec9 TEST $0x1,%R10B |
(257) 0x25ecd JE 2603c |
(257) 0x25ed3 MOV %R10D,%R11D |
(257) 0x25ed6 AND $-0x2,%R11D |
(257) 0x25eda ADD %R11D,%EAX |
(257) 0x25edd CLTQ |
(257) 0x25edf VMOVSD (%R9,%RAX,8),%XMM3 |
(257) 0x25ee5 LEA (,%RAX,8),%R15 |
(257) 0x25eed ADD %R15,%RCX |
(257) 0x25ef0 ADD %R15,%RDX |
(257) 0x25ef3 VSUBSD %XMM4,%XMM3,%XMM5 |
(257) 0x25ef7 VMOVSD (%R8,%RAX,8),%XMM4 |
(257) 0x25efd VMOVQ 0x2e04b(%RIP),%XMM3 |
(257) 0x25f05 VSUBSD %XMM2,%XMM4,%XMM6 |
(257) 0x25f09 VMOVSD (%RDI,%RAX,8),%XMM2 |
(257) 0x25f0e VSUBSD %XMM1,%XMM2,%XMM15 |
(257) 0x25f12 VMULSD 0x168(%RBX),%XMM6,%XMM1 |
(257) 0x25f1a VMULSD 0x180(%RBX),%XMM6,%XMM0 |
(257) 0x25f22 VMULSD 0x198(%RBX),%XMM6,%XMM7 |
(257) 0x25f2a VFMADD231SD 0x160(%RBX),%XMM5,%XMM1 |
(257) 0x25f33 VFMADD231SD 0x178(%RBX),%XMM5,%XMM0 |
(257) 0x25f3c VFMADD132SD 0x190(%RBX),%XMM7,%XMM5 |
(257) 0x25f45 VFMADD231SD 0x170(%RBX),%XMM15,%XMM1 |
(257) 0x25f4e VFMADD231SD 0x188(%RBX),%XMM15,%XMM0 |
(257) 0x25f57 VFMADD132SD 0x1a0(%RBX),%XMM5,%XMM15 |
(257) 0x25f60 VPTERNLOGQ $-0x8,0x2dfc5(%RIP),%XMM1,%XMM3 |
(257) 0x25f6b VADDSD %XMM3,%XMM1,%XMM10 |
(257) 0x25f6f VMOVQ 0x2dfd9(%RIP),%XMM3 |
(257) 0x25f77 VPTERNLOGQ $-0x8,0x2dfae(%RIP),%XMM0,%XMM3 |
(257) 0x25f82 VADDSD %XMM3,%XMM0,%XMM8 |
(257) 0x25f86 VMOVQ 0x2dfc2(%RIP),%XMM3 |
(257) 0x25f8e VRNDSCALESD $0x3,%XMM10,%XMM10,%XMM14 |
(257) 0x25f95 VSUBSD %XMM14,%XMM1,%XMM9 |
(257) 0x25f9a VPTERNLOGQ $-0x8,0x2df8b(%RIP),%XMM15,%XMM3 |
(257) 0x25fa5 VRNDSCALESD $0x3,%XMM8,%XMM8,%XMM12 |
(257) 0x25fac VSUBSD %XMM12,%XMM0,%XMM13 |
(257) 0x25fb1 VADDSD %XMM3,%XMM15,%XMM11 |
(257) 0x25fb5 VMULSD 0x120(%RBX),%XMM13,%XMM6 |
(257) 0x25fbd VRNDSCALESD $0x3,%XMM11,%XMM11,%XMM5 |
(257) 0x25fc4 VSUBSD %XMM5,%XMM15,%XMM4 |
(257) 0x25fc8 VFMADD231SD 0x118(%RBX),%XMM9,%XMM6 |
(257) 0x25fd1 VFMADD231SD 0x128(%RBX),%XMM4,%XMM6 |
(257) 0x25fda VMOVSD %XMM6,(%RCX) |
(257) 0x25fde VMULSD 0x138(%RBX),%XMM13,%XMM2 |
(257) 0x25fe6 VFMADD231SD 0x130(%RBX),%XMM9,%XMM2 |
(257) 0x25fef VFMADD231SD 0x140(%RBX),%XMM4,%XMM2 |
(257) 0x25ff8 VMOVSD %XMM2,(%RDX) |
(257) 0x25ffc VMULSD 0x150(%RBX),%XMM13,%XMM15 |
(257) 0x26004 VFMADD132SD 0x148(%RBX),%XMM15,%XMM9 |
(257) 0x2600d VFMADD132SD 0x158(%RBX),%XMM9,%XMM4 |
(257) 0x26016 VMOVSD %XMM4,(%RSI,%R15,1) |
(257) 0x2601c VMOVSD (%RDX),%XMM0 |
(257) 0x26020 VMOVSD (%RCX),%XMM1 |
(257) 0x26024 VMULSD %XMM0,%XMM0,%XMM7 |
(257) 0x26028 VFMADD132SD %XMM1,%XMM7,%XMM1 |
(257) 0x2602d VFMADD132SD %XMM4,%XMM1,%XMM4 |
(257) 0x26032 VSQRTSD %XMM4,%XMM4,%XMM4 |
(257) 0x26036 VMOVSD %XMM4,(%R12,%RAX,8) |
(257) 0x2603c INC %R13 |
(257) 0x2603f CMP 0x18(%RBX),%R13 |
(257) 0x26043 JB 25b40 |
0x26049 MOV 0x8(%RSP),%RDI |
0x2604e CALL 8280 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x26053 LEA -0x28(%RBP),%RSP |
0x26057 POP %RBX |
0x26058 POP %R12 |
0x2605a POP %R13 |
0x2605c POP %R14 |
0x2605e POP %R15 |
0x26060 POP %RBP |
0x26061 RET |
0x26062 NOPW (%RAX,%RAX,1) |
(257) 0x26068 VZEROUPPER |
(257) 0x2606b JMP 2603c |
0x2606d NOPL (%RAX) |
(257) 0x26070 XOR %R10D,%R10D |
(257) 0x26073 XOR %EAX,%EAX |
(257) 0x26075 JMP 25d3b |
0x2607a MOV %RAX,%RBX |
0x2607d JMP a1d6 |
0x26082 NOPW %CS:(%RAX,%RAX,1) |
0x2608d NOPL (%RAX) |
Path / |
Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
Module | libqmcparticle_omptarget.so |
nb instructions | 34 |
nb uops | 36 |
loop length | 120 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 6.00 cycles |
front end | 6.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 3.33 | 3.33 | 5.00 | 1.00 | 1.00 | 5.00 | 5.00 | 5.00 | 1.00 | 3.33 |
cycles | 1.00 | 1.00 | 3.33 | 3.33 | 5.00 | 1.00 | 1.00 | 5.00 | 5.00 | 5.00 | 1.00 | 3.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.77 |
Stall cycles | 0.00 |
Front-end | 6.00 |
Dispatch | 5.00 |
Overall L1 | 6.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x1,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x278(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8250 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMPQ $0x1,0x18(%RBX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JBE 26049 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x549> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 8280 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP a1d6 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE.cold> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
Module | libqmcparticle_omptarget.so |
nb instructions | 34 |
nb uops | 36 |
loop length | 120 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 6.00 cycles |
front end | 6.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 3.33 | 3.33 | 5.00 | 1.00 | 1.00 | 5.00 | 5.00 | 5.00 | 1.00 | 3.33 |
cycles | 1.00 | 1.00 | 3.33 | 3.33 | 5.00 | 1.00 | 1.00 | 5.00 | 5.00 | 5.00 | 1.00 | 3.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.77 |
Stall cycles | 0.00 |
Front-end | 6.00 |
Dispatch | 5.00 |
Overall L1 | 6.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x1,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x278(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8250 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMPQ $0x1,0x18(%RBX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JBE 26049 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x549> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 8280 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP a1d6 <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE.cold> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE– | 0.06 | 0.06 |
▼Loop 257 - SoaDistanceTableAAOMPTarget.h:184-187 - libqmcparticle_omptarget.so– | 0 | 0 |
○Loop 258 - ParticleBConds3DSoa.h:237-255 - libqmcparticle_omptarget.so | 0.06 | 0.05 |