Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::computeU3(qmcpl ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:254-279 [...] | Coverage: 0.24% |
---|
Function: miniqmcreference::TwoBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::computeU3(qmcpl ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:254-279 [...] | Coverage: 0.24% |
---|
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 1128 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
[...] |
931: *__first = __tmp; |
[...] |
1123: if (__n <= 0) |
1124: return __first; |
1125: |
1126: __glibcxx_requires_can_increment(__first, __n); |
1127: |
1128: std::__fill_a(__first, __first + __n, __value); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1143 - 1143 |
-------------------------------------------------------------------------------- |
1143: return *(this->_M_impl._M_start + __n); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 254 - 279 |
-------------------------------------------------------------------------------- |
254: inline void TwoBodyJastrowRef<FT>::computeU3(const ParticleSet& P, |
[...] |
262: const int jelmax = triangle ? iat : N; |
[...] |
268: const int igt = P.GroupID[iat] * NumGroups; |
269: for (int jg = 0; jg < NumGroups; ++jg) |
270: { |
271: const FuncType& f2(*F[igt + jg]); |
[...] |
279: } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 221 - 223 |
-------------------------------------------------------------------------------- |
221: inline Type_t& operator[](size_t i) |
222: { |
223: return X[i]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 276 - 336 |
-------------------------------------------------------------------------------- |
276: real_type dSquareDeltaRinv = DeltaRInv * DeltaRInv; |
277: constexpr real_type cOne(1); |
278: |
279: // START_MARK_FIRST(); |
280: |
281: ASSUME_ALIGNED(distIndices); |
282: ASSUME_ALIGNED(distArrayCompressed); |
283: int iCount = 0; |
284: int iLimit = iEnd - iStart; |
[...] |
291: for (int jat = 0; jat < iLimit; jat++) |
292: { |
293: real_type r = distArray[jat]; |
294: if (r < cutoff_radius && iStart + jat != iat) |
295: { |
296: distIndices[iCount] = jat; |
297: distArrayCompressed[iCount] = r; |
298: iCount++; |
[...] |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
0x4bc20 PUSH %RBP |
0x4bc21 MOV %RDI,%R11 |
0x4bc24 MOV %RSI,%R10 |
0x4bc27 MOV %RSP,%RBP |
0x4bc2a PUSH %R15 |
0x4bc2c PUSH %R14 |
0x4bc2e PUSH %R13 |
0x4bc30 MOV %EDX,%R13D |
0x4bc33 PUSH %R12 |
0x4bc35 MOV %R9,%R12 |
0x4bc38 PUSH %RBX |
0x4bc39 MOV %R8,%RBX |
0x4bc3c AND $-0x20,%RSP |
0x4bc40 SUB $0xa0,%RSP |
0x4bc47 CMPB $0,0x18(%RBP) |
0x4bc4b MOV 0x10(%RBP),%R14 |
0x4bc4f MOV %RCX,(%RSP) |
0x4bc53 MOV %EDX,0x18(%RSP) |
0x4bc57 JNE 4bc63 |
0x4bc59 MOV 0x90(%RDI),%EAX |
0x4bc5f MOV %EAX,0x18(%RSP) |
0x4bc63 MOV 0x18(%RSP),%EDX |
0x4bc67 TEST %EDX,%EDX |
0x4bc69 JLE 4bcb5 |
0x4bc6b MOVSXD 0x18(%RSP),%R15 |
0x4bc70 XOR %ESI,%ESI |
0x4bc72 MOV %RBX,%RDI |
0x4bc75 MOV %R10,0x60(%RSP) |
0x4bc7a MOV %R11,0x80(%RSP) |
0x4bc82 SAL $0x3,%R15 |
0x4bc86 MOV %R15,%RDX |
0x4bc89 CALL 80c0 <memset@plt> |
0x4bc8e MOV %R15,%RDX |
0x4bc91 XOR %ESI,%ESI |
0x4bc93 MOV %R12,%RDI |
0x4bc96 CALL 80c0 <memset@plt> |
0x4bc9b MOV %R15,%RDX |
0x4bc9e XOR %ESI,%ESI |
0x4bca0 MOV %R14,%RDI |
0x4bca3 CALL 80c0 <memset@plt> |
0x4bca8 MOV 0x80(%RSP),%R11 |
0x4bcb0 MOV 0x60(%RSP),%R10 |
0x4bcb5 MOV 0x18(%R10),%RSI |
0x4bcb9 MOVSXD %R13D,%RCX |
0x4bcbc MOV 0xa0(%R11),%RDI |
0x4bcc3 MOV (%RSI,%RCX,4),%R8D |
0x4bcc7 MOV %RDI,0x10(%RSP) |
0x4bccc IMUL %EDI,%R8D |
0x4bcd0 MOVSXD %R8D,%R9 |
0x4bcd3 TEST %RDI,%RDI |
0x4bcd6 JE 4c273 |
0x4bcdc MOV 0x200(%R11),%RDX |
0x4bce3 MOV 0x268(%R10),%RAX |
0x4bcea MOV %R13D,0x1c(%RSP) |
0x4bcef MOV 0x1e8(%R11),%R10 |
0x4bcf6 MOV 0x1d0(%R11),%RDI |
0x4bcfd LEA (%RDX,%R9,8),%R11 |
0x4bd01 MOV 0x18(%RAX),%R15 |
0x4bd05 VMOVSD 0x31eb3(%RIP),%XMM15 |
0x4bd0d XOR %R9D,%R9D |
0x4bd10 MOV %R11,0x8(%RSP) |
0x4bd15 NOPL (%RAX) |
(484) 0x4bd18 MOV 0x8(%RSP),%R13 |
(484) 0x4bd1d MOVSXD (%R15,%R9,4),%R8 |
(484) 0x4bd21 MOV 0x18(%RSP),%ECX |
(484) 0x4bd25 MOV (%R13,%R9,8),%RAX |
(484) 0x4bd2a INC %R9 |
(484) 0x4bd2d MOV %R8,%R11 |
(484) 0x4bd30 MOV (%R15,%R9,4),%R13D |
(484) 0x4bd34 VMOVSD 0x238(%RAX),%XMM7 |
(484) 0x4bd3c CMP %R13D,%ECX |
(484) 0x4bd3f VMULSD %XMM7,%XMM7,%XMM14 |
(484) 0x4bd43 CMOVLE %ECX,%R13D |
(484) 0x4bd47 SUB %R8D,%R13D |
(484) 0x4bd4a TEST %R13D,%R13D |
(484) 0x4bd4d JLE 4c265 |
(484) 0x4bd53 MOV (%RSP),%RDX |
(484) 0x4bd57 MOVSXD %R13D,%RSI |
(484) 0x4bd5a VMOVSD 0x8(%RAX),%XMM0 |
(484) 0x4bd5f XOR %ECX,%ECX |
(484) 0x4bd61 MOV %RSI,0x80(%RSP) |
(484) 0x4bd69 LEA (%RDX,%R8,8),%RSI |
(484) 0x4bd6d XOR %EDX,%EDX |
(484) 0x4bd6f AND $0x7,%R13D |
(484) 0x4bd73 JE 4bebb |
(484) 0x4bd79 CMP $0x1,%R13 |
(484) 0x4bd7d JE 4be84 |
(484) 0x4bd83 CMP $0x2,%R13 |
(484) 0x4bd87 JE 4be5c |
(484) 0x4bd8d CMP $0x3,%R13 |
(484) 0x4bd91 JE 4be34 |
(484) 0x4bd97 CMP $0x4,%R13 |
(484) 0x4bd9b JE 4be0c |
(484) 0x4bd9d CMP $0x5,%R13 |
(484) 0x4bda1 JE 4bde4 |
(484) 0x4bda3 CMP $0x6,%R13 |
(484) 0x4bda7 JE 4bdbc |
(484) 0x4bda9 VMOVSD (%RSI),%XMM1 |
(484) 0x4bdad VCOMISD %XMM1,%XMM0 |
(484) 0x4bdb1 JA 4c282 |
(484) 0x4bdb7 MOV $0x1,%EDX |
(484) 0x4bdbc VMOVSD (%RSI,%RDX,8),%XMM2 |
(484) 0x4bdc1 VCOMISD %XMM2,%XMM0 |
(484) 0x4bdc5 JBE 4bde1 |
(484) 0x4bdc7 LEA (%R11,%RDX,1),%R13D |
(484) 0x4bdcb CMP %R13D,0x1c(%RSP) |
(484) 0x4bdd0 JE 4bde1 |
(484) 0x4bdd2 MOVSXD %ECX,%R13 |
(484) 0x4bdd5 INC %ECX |
(484) 0x4bdd7 MOV %EDX,(%R10,%R13,4) |
(484) 0x4bddb VMOVSD %XMM2,(%RDI,%R13,8) |
(484) 0x4bde1 INC %RDX |
(484) 0x4bde4 VMOVSD (%RSI,%RDX,8),%XMM3 |
(484) 0x4bde9 VCOMISD %XMM3,%XMM0 |
(484) 0x4bded JBE 4be09 |
(484) 0x4bdef LEA (%R11,%RDX,1),%R13D |
(484) 0x4bdf3 CMP %R13D,0x1c(%RSP) |
(484) 0x4bdf8 JE 4be09 |
(484) 0x4bdfa MOVSXD %ECX,%R13 |
(484) 0x4bdfd INC %ECX |
(484) 0x4bdff MOV %EDX,(%R10,%R13,4) |
(484) 0x4be03 VMOVSD %XMM3,(%RDI,%R13,8) |
(484) 0x4be09 INC %RDX |
(484) 0x4be0c VMOVSD (%RSI,%RDX,8),%XMM4 |
(484) 0x4be11 VCOMISD %XMM4,%XMM0 |
(484) 0x4be15 JBE 4be31 |
(484) 0x4be17 LEA (%R11,%RDX,1),%R13D |
(484) 0x4be1b CMP %R13D,0x1c(%RSP) |
(484) 0x4be20 JE 4be31 |
(484) 0x4be22 MOVSXD %ECX,%R13 |
(484) 0x4be25 INC %ECX |
(484) 0x4be27 MOV %EDX,(%R10,%R13,4) |
(484) 0x4be2b VMOVSD %XMM4,(%RDI,%R13,8) |
(484) 0x4be31 INC %RDX |
(484) 0x4be34 VMOVSD (%RSI,%RDX,8),%XMM5 |
(484) 0x4be39 VCOMISD %XMM5,%XMM0 |
(484) 0x4be3d JBE 4be59 |
(484) 0x4be3f LEA (%R11,%RDX,1),%R13D |
(484) 0x4be43 CMP %R13D,0x1c(%RSP) |
(484) 0x4be48 JE 4be59 |
(484) 0x4be4a MOVSXD %ECX,%R13 |
(484) 0x4be4d INC %ECX |
(484) 0x4be4f MOV %EDX,(%R10,%R13,4) |
(484) 0x4be53 VMOVSD %XMM5,(%RDI,%R13,8) |
(484) 0x4be59 INC %RDX |
(484) 0x4be5c VMOVSD (%RSI,%RDX,8),%XMM6 |
(484) 0x4be61 VCOMISD %XMM6,%XMM0 |
(484) 0x4be65 JBE 4be81 |
(484) 0x4be67 LEA (%R11,%RDX,1),%R13D |
(484) 0x4be6b CMP %R13D,0x1c(%RSP) |
(484) 0x4be70 JE 4be81 |
(484) 0x4be72 MOVSXD %ECX,%R13 |
(484) 0x4be75 INC %ECX |
(484) 0x4be77 MOV %EDX,(%R10,%R13,4) |
(484) 0x4be7b VMOVSD %XMM6,(%RDI,%R13,8) |
(484) 0x4be81 INC %RDX |
(484) 0x4be84 VMOVSD (%RSI,%RDX,8),%XMM8 |
(484) 0x4be89 VCOMISD %XMM8,%XMM0 |
(484) 0x4be8e JBE 4beaa |
(484) 0x4be90 LEA (%R11,%RDX,1),%R13D |
(484) 0x4be94 CMP %R13D,0x1c(%RSP) |
(484) 0x4be99 JE 4beaa |
(484) 0x4be9b MOVSXD %ECX,%R13 |
(484) 0x4be9e INC %ECX |
(484) 0x4bea0 MOV %EDX,(%R10,%R13,4) |
(484) 0x4bea4 VMOVSD %XMM8,(%RDI,%R13,8) |
(484) 0x4beaa INC %RDX |
(484) 0x4bead CMP %RDX,0x80(%RSP) |
(484) 0x4beb5 JE 4c01d |
(484) 0x4bebb MOV %RAX,0x60(%RSP) |
(485) 0x4bec0 VMOVSD (%RSI,%RDX,8),%XMM9 |
(485) 0x4bec5 VCOMISD %XMM9,%XMM0 |
(485) 0x4beca JBE 4bee5 |
(485) 0x4becc LEA (%R11,%RDX,1),%EAX |
(485) 0x4bed0 CMP %EAX,0x1c(%RSP) |
(485) 0x4bed4 JE 4bee5 |
(485) 0x4bed6 MOVSXD %ECX,%R13 |
(485) 0x4bed9 INC %ECX |
(485) 0x4bedb MOV %EDX,(%R10,%R13,4) |
(485) 0x4bedf VMOVSD %XMM9,(%RDI,%R13,8) |
(485) 0x4bee5 INC %RDX |
(485) 0x4bee8 VMOVSD (%RSI,%RDX,8),%XMM10 |
(485) 0x4beed VCOMISD %XMM10,%XMM0 |
(485) 0x4bef2 JBE 4bf0d |
(485) 0x4bef4 LEA (%R11,%RDX,1),%EAX |
(485) 0x4bef8 CMP %EAX,0x1c(%RSP) |
(485) 0x4befc JE 4bf0d |
(485) 0x4befe MOVSXD %ECX,%R13 |
(485) 0x4bf01 INC %ECX |
(485) 0x4bf03 MOV %EDX,(%R10,%R13,4) |
(485) 0x4bf07 VMOVSD %XMM10,(%RDI,%R13,8) |
(485) 0x4bf0d LEA 0x1(%RDX),%RAX |
(485) 0x4bf11 VMOVSD (%RSI,%RAX,8),%XMM11 |
(485) 0x4bf16 VCOMISD %XMM11,%XMM0 |
(485) 0x4bf1b JBE 4bf37 |
(485) 0x4bf1d LEA (%R11,%RAX,1),%R13D |
(485) 0x4bf21 CMP %R13D,0x1c(%RSP) |
(485) 0x4bf26 JE 4bf37 |
(485) 0x4bf28 MOVSXD %ECX,%R13 |
(485) 0x4bf2b INC %ECX |
(485) 0x4bf2d MOV %EAX,(%R10,%R13,4) |
(485) 0x4bf31 VMOVSD %XMM11,(%RDI,%R13,8) |
(485) 0x4bf37 LEA 0x2(%RDX),%RAX |
(485) 0x4bf3b VMOVSD (%RSI,%RAX,8),%XMM12 |
(485) 0x4bf40 VCOMISD %XMM12,%XMM0 |
(485) 0x4bf45 JBE 4bf61 |
(485) 0x4bf47 LEA (%R11,%RAX,1),%R13D |
(485) 0x4bf4b CMP %R13D,0x1c(%RSP) |
(485) 0x4bf50 JE 4bf61 |
(485) 0x4bf52 MOVSXD %ECX,%R13 |
(485) 0x4bf55 INC %ECX |
(485) 0x4bf57 MOV %EAX,(%R10,%R13,4) |
(485) 0x4bf5b VMOVSD %XMM12,(%RDI,%R13,8) |
(485) 0x4bf61 LEA 0x3(%RDX),%RAX |
(485) 0x4bf65 VMOVSD (%RSI,%RAX,8),%XMM13 |
(485) 0x4bf6a VCOMISD %XMM13,%XMM0 |
(485) 0x4bf6f JBE 4bf8b |
(485) 0x4bf71 LEA (%R11,%RAX,1),%R13D |
(485) 0x4bf75 CMP %R13D,0x1c(%RSP) |
(485) 0x4bf7a JE 4bf8b |
(485) 0x4bf7c MOVSXD %ECX,%R13 |
(485) 0x4bf7f INC %ECX |
(485) 0x4bf81 MOV %EAX,(%R10,%R13,4) |
(485) 0x4bf85 VMOVSD %XMM13,(%RDI,%R13,8) |
(485) 0x4bf8b LEA 0x4(%RDX),%RAX |
(485) 0x4bf8f VMOVSD (%RSI,%RAX,8),%XMM1 |
(485) 0x4bf94 VCOMISD %XMM1,%XMM0 |
(485) 0x4bf98 JBE 4bfb4 |
(485) 0x4bf9a LEA (%R11,%RAX,1),%R13D |
(485) 0x4bf9e CMP %R13D,0x1c(%RSP) |
(485) 0x4bfa3 JE 4bfb4 |
(485) 0x4bfa5 MOVSXD %ECX,%R13 |
(485) 0x4bfa8 INC %ECX |
(485) 0x4bfaa MOV %EAX,(%R10,%R13,4) |
(485) 0x4bfae VMOVSD %XMM1,(%RDI,%R13,8) |
(485) 0x4bfb4 LEA 0x5(%RDX),%RAX |
(485) 0x4bfb8 VMOVSD (%RSI,%RAX,8),%XMM2 |
(485) 0x4bfbd VCOMISD %XMM2,%XMM0 |
(485) 0x4bfc1 JBE 4bfdd |
(485) 0x4bfc3 LEA (%R11,%RAX,1),%R13D |
(485) 0x4bfc7 CMP %R13D,0x1c(%RSP) |
(485) 0x4bfcc JE 4bfdd |
(485) 0x4bfce MOVSXD %ECX,%R13 |
(485) 0x4bfd1 INC %ECX |
(485) 0x4bfd3 MOV %EAX,(%R10,%R13,4) |
(485) 0x4bfd7 VMOVSD %XMM2,(%RDI,%R13,8) |
(485) 0x4bfdd LEA 0x6(%RDX),%RAX |
(485) 0x4bfe1 VMOVSD (%RSI,%RAX,8),%XMM3 |
(485) 0x4bfe6 VCOMISD %XMM3,%XMM0 |
(485) 0x4bfea JBE 4c006 |
(485) 0x4bfec LEA (%R11,%RAX,1),%R13D |
(485) 0x4bff0 CMP %R13D,0x1c(%RSP) |
(485) 0x4bff5 JE 4c006 |
(485) 0x4bff7 MOVSXD %ECX,%R13 |
(485) 0x4bffa INC %ECX |
(485) 0x4bffc MOV %EAX,(%R10,%R13,4) |
(485) 0x4c000 VMOVSD %XMM3,(%RDI,%R13,8) |
(485) 0x4c006 ADD $0x7,%RDX |
(485) 0x4c00a CMP %RDX,0x80(%RSP) |
(485) 0x4c012 JNE 4bec0 |
(484) 0x4c018 MOV 0x60(%RSP),%RAX |
(484) 0x4c01d TEST %ECX,%ECX |
(484) 0x4c01f JLE 4c265 |
(484) 0x4c025 VMOVSD 0x168(%RAX),%XMM0 |
(484) 0x4c02d VMOVSD 0x128(%RAX),%XMM5 |
(484) 0x4c035 MOVSXD %ECX,%RCX |
(484) 0x4c038 XOR %EDX,%EDX |
(484) 0x4c03a VMOVSD 0x170(%RAX),%XMM8 |
(484) 0x4c042 VMOVSD 0x130(%RAX),%XMM10 |
(484) 0x4c04a VMOVSD 0xe0(%RAX),%XMM1 |
(484) 0x4c052 VMOVSD 0xa0(%RAX),%XMM3 |
(484) 0x4c05a VMOVHPD 0x188(%RAX),%XMM0,%XMM4 |
(484) 0x4c062 VMOVHPD 0x148(%RAX),%XMM5,%XMM6 |
(484) 0x4c06a VMOVHPD 0x190(%RAX),%XMM8,%XMM9 |
(484) 0x4c072 VMOVHPD 0x150(%RAX),%XMM10,%XMM11 |
(484) 0x4c07a VINSERTF128 $0x1,%XMM4,%YMM6,%YMM13 |
(484) 0x4c080 VMOVSD 0xe8(%RAX),%XMM4 |
(484) 0x4c088 VMOVHPD 0x100(%RAX),%XMM1,%XMM2 |
(484) 0x4c090 VMOVHPD 0xc0(%RAX),%XMM3,%XMM0 |
(484) 0x4c098 VMOVSD 0xa8(%RAX),%XMM6 |
(484) 0x4c0a0 VINSERTF128 $0x1,%XMM9,%YMM11,%YMM12 |
(484) 0x4c0a6 VMOVSD 0xf0(%RAX),%XMM9 |
(484) 0x4c0ae VINSERTF128 $0x1,%XMM2,%YMM0,%YMM11 |
(484) 0x4c0b4 VMOVSD 0xb0(%RAX),%XMM2 |
(484) 0x4c0bc VMOVHPD 0x108(%RAX),%XMM4,%XMM5 |
(484) 0x4c0c4 VMOVHPD 0xc8(%RAX),%XMM6,%XMM8 |
(484) 0x4c0cc VMOVSD 0x58(%RAX),%XMM0 |
(484) 0x4c0d1 MOV 0x218(%RAX),%R11 |
(484) 0x4c0d8 VMOVHPD 0xd0(%RAX),%XMM2,%XMM3 |
(484) 0x4c0e0 VMOVHPD 0x110(%RAX),%XMM9,%XMM1 |
(484) 0x4c0e8 VINSERTF128 $0x1,%XMM5,%YMM8,%YMM10 |
(484) 0x4c0ee VMOVSD 0x18(%RAX),%XMM5 |
(484) 0x4c0f3 VINSERTF128 $0x1,%XMM1,%YMM3,%YMM9 |
(484) 0x4c0f9 VMOVSD 0x60(%RAX),%XMM1 |
(484) 0x4c0fe VMOVSD 0x20(%RAX),%XMM3 |
(484) 0x4c103 VMOVHPD 0x78(%RAX),%XMM0,%XMM4 |
(484) 0x4c108 VMOVHPD 0x38(%RAX),%XMM5,%XMM6 |
(484) 0x4c10d VMOVSD 0x68(%RAX),%XMM5 |
(484) 0x4c112 VMOVHPD 0x80(%RAX),%XMM1,%XMM2 |
(484) 0x4c11a VMOVHPD 0x40(%RAX),%XMM3,%XMM0 |
(484) 0x4c11f VINSERTF128 $0x1,%XMM4,%YMM6,%YMM8 |
(484) 0x4c125 VMOVAPD %YMM8,0x80(%RSP) |
(484) 0x4c12e VINSERTF128 $0x1,%XMM2,%YMM0,%YMM4 |
(484) 0x4c134 VMOVAPD %YMM4,0x20(%RSP) |
(484) 0x4c13a VMOVHPD 0x88(%RAX),%XMM5,%XMM6 |
(484) 0x4c142 VMOVSD 0x28(%RAX),%XMM8 |
(484) 0x4c147 VMOVSD 0x70(%RAX),%XMM3 |
(484) 0x4c14c VMOVSD 0x30(%RAX),%XMM4 |
(484) 0x4c151 VMOVHPD 0x48(%RAX),%XMM8,%XMM1 |
(484) 0x4c156 VMOVHPD 0x90(%RAX),%XMM3,%XMM0 |
(484) 0x4c15e VMOVHPD 0x50(%RAX),%XMM4,%XMM5 |
(484) 0x4c163 VINSERTF128 $0x1,%XMM6,%YMM1,%YMM2 |
(484) 0x4c169 VINSERTF128 $0x1,%XMM0,%YMM5,%YMM6 |
(484) 0x4c16f VMOVAPD %YMM2,0x60(%RSP) |
(484) 0x4c175 VMOVAPD %YMM6,0x40(%RSP) |
(484) 0x4c17b NOPL (%RAX,%RAX,1) |
(486) 0x4c180 VMOVSD (%RDI,%RDX,8),%XMM2 |
(486) 0x4c185 VMOVAPD %YMM10,%YMM3 |
(486) 0x4c189 MOVSXD (%R10,%RDX,4),%RAX |
(486) 0x4c18d INC %RDX |
(486) 0x4c190 VMULSD %XMM2,%XMM7,%XMM8 |
(486) 0x4c194 ADD %R8,%RAX |
(486) 0x4c197 VDIVSD %XMM2,%XMM15,%XMM2 |
(486) 0x4c19b VROUNDSD $0xb,%XMM8,%XMM8,%XMM1 |
(486) 0x4c1a1 VCVTTSD2SI %XMM8,%ESI |
(486) 0x4c1a6 VSUBSD %XMM1,%XMM8,%XMM0 |
(486) 0x4c1aa VMOVAPD 0x60(%RSP),%YMM1 |
(486) 0x4c1b0 VMULSD %XMM0,%XMM0,%XMM6 |
(486) 0x4c1b4 VBROADCASTSD %XMM0,%YMM4 |
(486) 0x4c1b9 VFMADD132PD %YMM4,%YMM9,%YMM3 |
(486) 0x4c1be VFMADD213PD 0x40(%RSP),%YMM4,%YMM1 |
(486) 0x4c1c5 MOVSXD %ESI,%R13 |
(486) 0x4c1c8 VFMADD132PD %YMM13,%YMM12,%YMM4 |
(486) 0x4c1cd VMOVUPD (%R11,%R13,8),%YMM8 |
(486) 0x4c1d3 VMULSD %XMM6,%XMM0,%XMM0 |
(486) 0x4c1d7 VBROADCASTSD %XMM6,%YMM5 |
(486) 0x4c1dc VFMADD231PD %YMM5,%YMM11,%YMM3 |
(486) 0x4c1e1 VMULPD %YMM4,%YMM8,%YMM4 |
(486) 0x4c1e5 VBROADCASTSD %XMM0,%YMM6 |
(486) 0x4c1ea VMULPD 0x80(%RSP),%YMM6,%YMM0 |
(486) 0x4c1f3 VMULPD %YMM3,%YMM8,%YMM3 |
(486) 0x4c1f7 VFMADD231PD 0x20(%RSP),%YMM5,%YMM0 |
(486) 0x4c1fe VEXTRACTF128 $0x1,%YMM4,%XMM5 |
(486) 0x4c204 VADDPD %YMM0,%YMM1,%YMM1 |
(486) 0x4c208 VMULPD %YMM8,%YMM1,%YMM6 |
(486) 0x4c20d VADDPD %XMM4,%XMM5,%XMM8 |
(486) 0x4c211 VEXTRACTF128 $0x1,%YMM3,%XMM5 |
(486) 0x4c217 VADDPD %XMM3,%XMM5,%XMM3 |
(486) 0x4c21b VUNPCKHPD %XMM8,%XMM8,%XMM0 |
(486) 0x4c220 VADDPD %XMM8,%XMM0,%XMM1 |
(486) 0x4c225 VUNPCKHPD %XMM3,%XMM3,%XMM8 |
(486) 0x4c229 VADDPD %XMM3,%XMM8,%XMM0 |
(486) 0x4c22d VEXTRACTF128 $0x1,%YMM6,%XMM5 |
(486) 0x4c233 VADDPD %XMM6,%XMM5,%XMM6 |
(486) 0x4c237 VMULSD %XMM1,%XMM14,%XMM4 |
(486) 0x4c23b VMULSD %XMM0,%XMM7,%XMM1 |
(486) 0x4c23f VUNPCKHPD %XMM6,%XMM6,%XMM3 |
(486) 0x4c243 VADDPD %XMM6,%XMM3,%XMM8 |
(486) 0x4c247 VMOVSD %XMM4,(%R14,%RAX,8) |
(486) 0x4c24d VMULSD %XMM1,%XMM2,%XMM4 |
(486) 0x4c251 VMOVLPD %XMM8,(%RBX,%RAX,8) |
(486) 0x4c256 VMOVSD %XMM4,(%R12,%RAX,8) |
(486) 0x4c25c CMP %RCX,%RDX |
(486) 0x4c25f JNE 4c180 |
(484) 0x4c265 CMP %R9,0x10(%RSP) |
(484) 0x4c26a JNE 4bd18 |
0x4c270 VZEROUPPER |
0x4c273 LEA -0x28(%RBP),%RSP |
0x4c277 POP %RBX |
0x4c278 POP %R12 |
0x4c27a POP %R13 |
0x4c27c POP %R14 |
0x4c27e POP %R15 |
0x4c280 POP %RBP |
0x4c281 RET |
(484) 0x4c282 CMP %R8D,0x1c(%RSP) |
(484) 0x4c287 JE 4bdb7 |
(484) 0x4c28d MOV %EDX,(%R10) |
(484) 0x4c290 MOV $0x1,%ECX |
(484) 0x4c295 VMOVSD %XMM1,(%RDI) |
(484) 0x4c299 JMP 4bdb7 |
0x4c29e XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.08+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:315 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | libqmcwfs.so |
○ | main._omp_fn.1 | stl_vector.h:1121 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►1.92+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:315 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::acc[...] | NewTimer.h:249 | libqmcwfs.so |
○ | main._omp_fn.1 | stl_vector.h:1121 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | TwoBodyJastrowRef.h:254-279 |
Module | libqmcwfs.so |
nb instructions | 72 |
nb uops | 76 |
loop length | 268 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 2.60 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
cycles | 2.70 | 2.67 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.19 |
Stall cycles | 0.00 |
Front-end | 12.67 |
Dispatch | 8.50 |
Overall L1 | 12.67 |
all | 5% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xa0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPB $0,0x18(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JNE 4bc63 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x43> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x90(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4bcb5 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x18(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x80(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xa0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RCX,4),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVSXD %R8D,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4c273 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x653> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x200(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x268(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1e8(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1d0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R9,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x31eb3(%RIP),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | TwoBodyJastrowRef.h:254-279 |
Module | libqmcwfs.so |
nb instructions | 72 |
nb uops | 76 |
loop length | 268 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 2.60 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
cycles | 2.70 | 2.67 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.19 |
Stall cycles | 0.00 |
Front-end | 12.67 |
Dispatch | 8.50 |
Overall L1 | 12.67 |
all | 5% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xa0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPB $0,0x18(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JNE 4bc63 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x43> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x90(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4bcb5 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x18(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80c0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x80(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xa0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RCX,4),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVSXD %R8D,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4c273 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x653> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x200(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x268(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1e8(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1d0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R9,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x31eb3(%RIP),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::TwoBodyJastrowRef | 0.24 | 0.21 |
▼Loop 484 - TwoBodyJastrowRef.h:269-271 - libqmcwfs.so– | 0.01 | 0.01 |
○Loop 485 - BsplineFunctor.h:291-298 - libqmcwfs.so | 0.19 | 0.16 |
○Loop 486 - BsplineFunctor.h:305-336 - libqmcwfs.so | 0.03 | 0.03 |