Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.07% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.07% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 156 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
156: res += a[i] * b[i]; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 231 - 256 |
-------------------------------------------------------------------------------- |
231: typename DiracDeterminantRef<DU_TYPE>::RealType DiracDeterminantRef<DU_TYPE>::evaluateLog( |
232: ParticleSet& P, |
233: ParticleSet::ParticleGradient& G, |
234: ParticleSet::ParticleLaplacian& L) |
235: { |
236: recompute(P); |
237: |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
256: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
0x77850 PUSH %RBP |
0x77851 MOV %RSP,%RBP |
0x77854 PUSH %R15 |
0x77856 PUSH %R14 |
0x77858 PUSH %R13 |
0x7785a MOV %RDX,%R13 |
0x7785d PUSH %R12 |
0x7785f MOV %RDI,%R12 |
0x77862 PUSH %RBX |
0x77863 MOV %RCX,%RBX |
0x77866 AND $-0x20,%RSP |
0x7786a SUB $0x60,%RSP |
0x7786e CALL 77720 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> |
0x77873 MOV 0x484(%R12),%EDX |
0x7787b CMP $0x1,%EDX |
0x7787e JE 78066 |
0x77884 MOVSXD 0x478(%R12),%RAX |
0x7788c TEST %EDX,%EDX |
0x7788e JLE 78025 |
0x77894 MOV 0x118(%R12),%R15 |
0x7789c MOV 0x480(%R12),%EDI |
0x778a4 LEA (%RAX,%RAX,2),%R8 |
0x778a8 MOVSXD %EDX,%RDX |
0x778ab MOV 0xd8(%R12),%RSI |
0x778b3 MOV 0x18(%R13),%RCX |
0x778b7 MOV %RDX,0x30(%RSP) |
0x778bc LEA (%R15,%R15,2),%R10 |
0x778c0 MOV 0x18(%RBX),%RBX |
0x778c4 MOV 0x140(%R12),%R11 |
0x778cc MOV %EDI,0x5c(%RSP) |
0x778d0 LEA (,%R10,8),%R13 |
0x778d8 MOV %RSI,0x20(%RSP) |
0x778dd MOV 0x100(%R12),%R14 |
0x778e5 LEA (%RCX,%R8,8),%R9 |
0x778e9 MOV %R13,0x10(%RSP) |
0x778ee MOV %EDI,%R13D |
0x778f1 LEA (%RBX,%RAX,8),%RAX |
0x778f5 LEA -0x1(%RDI),%EDI |
0x778f8 MOV %R13D,%ESI |
0x778fb MOV %EDI,0x3c(%RSP) |
0x778ff VMOVDQA 0x10ab9(%RIP),%YMM8 |
0x77907 AND $-0x4,%R13D |
0x7790b SHR $0x2,%ESI |
0x7790e MOV %RAX,0x28(%RSP) |
0x77913 VMOVDQA 0x10ac5(%RIP),%YMM7 |
0x7791b XOR %EBX,%EBX |
0x7791d MOV %R12,0x8(%RSP) |
0x77922 SAL $0x5,%RSI |
0x77926 VMOVDQA 0x10ad2(%RIP),%YMM6 |
0x7792e XOR %R10D,%R10D |
0x77931 VMOVDQA 0x10ae7(%RIP),%YMM5 |
0x77939 VMOVDQA 0x1085f(%RIP),%YMM4 |
0x77941 MOV %R15,0x18(%RSP) |
0x77946 MOV %R11,%R15 |
0x77949 VMOVDQA 0x1086f(%RIP),%YMM3 |
0x77951 MOV %RSI,0x50(%RSP) |
0x77956 XOR %ESI,%ESI |
0x77958 NOPL (%RAX,%RAX,1) |
(747) 0x77960 MOV 0x5c(%RSP),%R8D |
(747) 0x77965 LEA (%R14,%RBX,8),%R12 |
(747) 0x77969 MOV %R12,%RAX |
(747) 0x7796c TEST %R8D,%R8D |
(747) 0x7796f JLE 780ea |
(747) 0x77975 CMPL $0x2,0x3c(%RSP) |
(747) 0x7797a JBE 78107 |
(747) 0x77980 MOV 0x50(%RSP),%RDI |
(747) 0x77985 VXORPD %XMM10,%XMM10,%XMM10 |
(747) 0x7798a MOV %R15,%RCX |
(747) 0x7798d VMOVAPD %YMM10,%YMM11 |
(747) 0x77992 VMOVAPD %YMM10,%YMM0 |
(747) 0x77996 LEA (%R12,%RDI,1),%R8 |
(747) 0x7799a SUB $0x20,%RDI |
(747) 0x7799e SHR $0x5,%RDI |
(747) 0x779a2 INC %RDI |
(747) 0x779a5 AND $0x3,%EDI |
(747) 0x779a8 JE 77ad2 |
(747) 0x779ae CMP $0x1,%RDI |
(747) 0x779b2 JE 77a71 |
(747) 0x779b8 CMP $0x2,%RDI |
(747) 0x779bc JE 77a19 |
(747) 0x779be VMOVUPD (%R15),%YMM0 |
(747) 0x779c3 VMOVUPD 0x20(%R15),%YMM2 |
(747) 0x779c9 ADD $0x20,%R12 |
(747) 0x779cd LEA 0x60(%R15),%RCX |
(747) 0x779d1 VMOVUPD 0x40(%R15),%YMM13 |
(747) 0x779d7 VMOVUPD -0x20(%R12),%YMM15 |
(747) 0x779de VMOVAPD %YMM0,%YMM11 |
(747) 0x779e2 VMOVAPD %YMM0,%YMM12 |
(747) 0x779e6 VPERMT2PD %YMM2,%YMM4,%YMM0 |
(747) 0x779ec VPERMT2PD %YMM2,%YMM8,%YMM11 |
(747) 0x779f2 VPERMT2PD %YMM2,%YMM6,%YMM12 |
(747) 0x779f8 VPERMT2PD %YMM13,%YMM3,%YMM0 |
(747) 0x779fe VPERMT2PD %YMM13,%YMM7,%YMM11 |
(747) 0x77a04 VFMADD132PD %YMM15,%YMM10,%YMM0 |
(747) 0x77a09 VPERMT2PD %YMM13,%YMM5,%YMM12 |
(747) 0x77a0f VFMADD132PD %YMM15,%YMM10,%YMM11 |
(747) 0x77a14 VFMADD231PD %YMM12,%YMM15,%YMM10 |
(747) 0x77a19 VMOVUPD (%RCX),%YMM1 |
(747) 0x77a1d VMOVUPD 0x20(%RCX),%YMM14 |
(747) 0x77a22 ADD $0x20,%R12 |
(747) 0x77a26 ADD $0x60,%RCX |
(747) 0x77a2a VMOVUPD -0x20(%RCX),%YMM15 |
(747) 0x77a2f VMOVUPD -0x20(%R12),%YMM9 |
(747) 0x77a36 VMOVAPD %YMM1,%YMM13 |
(747) 0x77a3a VMOVAPD %YMM1,%YMM2 |
(747) 0x77a3e VPERMT2PD %YMM14,%YMM4,%YMM1 |
(747) 0x77a44 VPERMT2PD %YMM14,%YMM8,%YMM13 |
(747) 0x77a4a VPERMT2PD %YMM14,%YMM6,%YMM2 |
(747) 0x77a50 VPERMT2PD %YMM15,%YMM3,%YMM1 |
(747) 0x77a56 VPERMT2PD %YMM15,%YMM7,%YMM13 |
(747) 0x77a5c VFMADD231PD %YMM1,%YMM9,%YMM0 |
(747) 0x77a61 VPERMT2PD %YMM15,%YMM5,%YMM2 |
(747) 0x77a67 VFMADD231PD %YMM13,%YMM9,%YMM11 |
(747) 0x77a6c VFMADD231PD %YMM2,%YMM9,%YMM10 |
(747) 0x77a71 VMOVUPD (%RCX),%YMM1 |
(747) 0x77a75 VMOVUPD 0x20(%RCX),%YMM14 |
(747) 0x77a7a ADD $0x20,%R12 |
(747) 0x77a7e ADD $0x60,%RCX |
(747) 0x77a82 VMOVUPD -0x20(%RCX),%YMM15 |
(747) 0x77a87 VMOVUPD -0x20(%R12),%YMM9 |
(747) 0x77a8e VMOVAPD %YMM1,%YMM12 |
(747) 0x77a92 VMOVAPD %YMM1,%YMM13 |
(747) 0x77a96 VPERMT2PD %YMM14,%YMM4,%YMM1 |
(747) 0x77a9c VPERMT2PD %YMM14,%YMM8,%YMM12 |
(747) 0x77aa2 VPERMT2PD %YMM14,%YMM6,%YMM13 |
(747) 0x77aa8 VPERMT2PD %YMM15,%YMM3,%YMM1 |
(747) 0x77aae VPERMT2PD %YMM15,%YMM7,%YMM12 |
(747) 0x77ab4 VFMADD231PD %YMM1,%YMM9,%YMM0 |
(747) 0x77ab9 VPERMT2PD %YMM15,%YMM5,%YMM13 |
(747) 0x77abf VFMADD231PD %YMM12,%YMM9,%YMM11 |
(747) 0x77ac4 VFMADD231PD %YMM13,%YMM9,%YMM10 |
(747) 0x77ac9 CMP %R8,%R12 |
(747) 0x77acc JE 77c44 |
(749) 0x77ad2 VMOVUPD (%RCX),%YMM1 |
(749) 0x77ad6 VMOVUPD 0x20(%RCX),%YMM14 |
(749) 0x77adb SUB $-0x80,%R12 |
(749) 0x77adf ADD $0x180,%RCX |
(749) 0x77ae6 VMOVUPD -0x140(%RCX),%YMM15 |
(749) 0x77aee VMOVUPD -0x80(%R12),%YMM2 |
(749) 0x77af5 VMOVAPD %YMM1,%YMM9 |
(749) 0x77af9 VMOVAPD %YMM1,%YMM12 |
(749) 0x77afd VPERMT2PD %YMM14,%YMM4,%YMM1 |
(749) 0x77b03 VPERMT2PD %YMM14,%YMM8,%YMM9 |
(749) 0x77b09 VPERMT2PD %YMM14,%YMM6,%YMM12 |
(749) 0x77b0f VPERMT2PD %YMM15,%YMM3,%YMM1 |
(749) 0x77b15 VMOVUPD -0xe0(%RCX),%YMM14 |
(749) 0x77b1d VPERMT2PD %YMM15,%YMM7,%YMM9 |
(749) 0x77b23 VFMADD231PD %YMM1,%YMM2,%YMM0 |
(749) 0x77b28 VPERMT2PD %YMM15,%YMM5,%YMM12 |
(749) 0x77b2e VMOVUPD -0x100(%RCX),%YMM1 |
(749) 0x77b36 VFMADD231PD %YMM9,%YMM2,%YMM11 |
(749) 0x77b3b VFMADD231PD %YMM12,%YMM2,%YMM10 |
(749) 0x77b40 VMOVUPD -0x120(%RCX),%YMM2 |
(749) 0x77b48 VMOVUPD -0x60(%R12),%YMM9 |
(749) 0x77b4f VMOVUPD -0x80(%RCX),%YMM15 |
(749) 0x77b54 VMOVAPD %YMM2,%YMM12 |
(749) 0x77b58 VMOVAPD %YMM2,%YMM13 |
(749) 0x77b5c VPERMT2PD %YMM1,%YMM4,%YMM2 |
(749) 0x77b62 VPERMT2PD %YMM1,%YMM8,%YMM12 |
(749) 0x77b68 VPERMT2PD %YMM1,%YMM6,%YMM13 |
(749) 0x77b6e VPERMT2PD %YMM14,%YMM3,%YMM2 |
(749) 0x77b74 VMOVUPD -0xc0(%RCX),%YMM1 |
(749) 0x77b7c VPERMT2PD %YMM14,%YMM7,%YMM12 |
(749) 0x77b82 VFMADD132PD %YMM9,%YMM0,%YMM2 |
(749) 0x77b87 VPERMT2PD %YMM14,%YMM5,%YMM13 |
(749) 0x77b8d VMOVUPD -0xa0(%RCX),%YMM0 |
(749) 0x77b95 VFMADD132PD %YMM9,%YMM11,%YMM12 |
(749) 0x77b9a VMOVAPD %YMM9,%YMM11 |
(749) 0x77b9f VMOVAPD %YMM1,%YMM14 |
(749) 0x77ba3 VMOVUPD -0x40(%R12),%YMM9 |
(749) 0x77baa VFMADD132PD %YMM13,%YMM10,%YMM11 |
(749) 0x77baf VMOVAPD %YMM1,%YMM10 |
(749) 0x77bb3 VPERMT2PD %YMM0,%YMM4,%YMM1 |
(749) 0x77bb9 VMOVUPD -0x20(%RCX),%YMM13 |
(749) 0x77bbe VPERMT2PD %YMM0,%YMM8,%YMM10 |
(749) 0x77bc4 VPERMT2PD %YMM0,%YMM6,%YMM14 |
(749) 0x77bca VPERMT2PD %YMM15,%YMM3,%YMM1 |
(749) 0x77bd0 VMOVUPD -0x60(%RCX),%YMM0 |
(749) 0x77bd5 VPERMT2PD %YMM15,%YMM7,%YMM10 |
(749) 0x77bdb VPERMT2PD %YMM15,%YMM5,%YMM14 |
(749) 0x77be1 VMOVUPD -0x40(%RCX),%YMM15 |
(749) 0x77be6 VFMADD132PD %YMM9,%YMM2,%YMM1 |
(749) 0x77beb VMOVAPD %YMM10,%YMM2 |
(749) 0x77bef VMOVUPD -0x20(%R12),%YMM10 |
(749) 0x77bf6 VFMADD132PD %YMM9,%YMM12,%YMM2 |
(749) 0x77bfb VMOVAPD %YMM0,%YMM12 |
(749) 0x77bff VFMADD132PD %YMM14,%YMM11,%YMM9 |
(749) 0x77c04 VMOVAPD %YMM0,%YMM11 |
(749) 0x77c08 VPERMT2PD %YMM15,%YMM6,%YMM12 |
(749) 0x77c0e VPERMT2PD %YMM15,%YMM8,%YMM11 |
(749) 0x77c14 VPERMT2PD %YMM15,%YMM4,%YMM0 |
(749) 0x77c1a VPERMT2PD %YMM13,%YMM5,%YMM12 |
(749) 0x77c20 VPERMT2PD %YMM13,%YMM7,%YMM11 |
(749) 0x77c26 VPERMT2PD %YMM13,%YMM3,%YMM0 |
(749) 0x77c2c VFMADD132PD %YMM10,%YMM1,%YMM0 |
(749) 0x77c31 VFMADD132PD %YMM10,%YMM2,%YMM11 |
(749) 0x77c36 VFMADD132PD %YMM12,%YMM9,%YMM10 |
(749) 0x77c3b CMP %R8,%R12 |
(749) 0x77c3e JNE 77ad2 |
(747) 0x77c44 VEXTRACTF64X2 $0x1,%YMM10,%XMM15 |
(747) 0x77c4b VEXTRACTF64X2 $0x1,%YMM11,%XMM16 |
(747) 0x77c52 VADDPD %XMM10,%XMM15,%XMM14 |
(747) 0x77c57 VADDPD %XMM11,%XMM16,%XMM1 |
(747) 0x77c5d VUNPCKHPD %XMM14,%XMM14,%XMM9 |
(747) 0x77c62 VUNPCKHPD %XMM1,%XMM1,%XMM2 |
(747) 0x77c66 VADDPD %XMM14,%XMM9,%XMM9 |
(747) 0x77c6b VEXTRACTF64X2 $0x1,%YMM0,%XMM14 |
(747) 0x77c72 VADDPD %XMM1,%XMM2,%XMM13 |
(747) 0x77c76 VADDPD %XMM0,%XMM14,%XMM12 |
(747) 0x77c7a VUNPCKHPD %XMM12,%XMM12,%XMM1 |
(747) 0x77c7f VADDPD %XMM12,%XMM1,%XMM2 |
(747) 0x77c84 VMOVSD %XMM2,%XMM2,%XMM12 |
(747) 0x77c88 VUNPCKLPD %XMM13,%XMM2,%XMM2 |
(747) 0x77c8d CMP %R13D,0x5c(%RSP) |
(747) 0x77c92 JE 7803b |
(747) 0x77c98 VADDPD %XMM14,%XMM0,%XMM14 |
(747) 0x77c9d VADDPD %XMM16,%XMM11,%XMM11 |
(747) 0x77ca3 MOV %R13D,%ECX |
(747) 0x77ca6 MOV %R13D,%EDX |
(747) 0x77ca9 VADDPD %XMM15,%XMM10,%XMM10 |
(747) 0x77cae MOV 0x5c(%RSP),%R12D |
(747) 0x77cb3 SUB %ECX,%R12D |
(747) 0x77cb6 CMP $0x1,%R12D |
(747) 0x77cba JE 77d35 |
(747) 0x77cbc LEA (%RSI,%RCX,1),%RDI |
(747) 0x77cc0 ADD %RBX,%RCX |
(747) 0x77cc3 LEA (%RDI,%RDI,2),%R8 |
(747) 0x77cc7 VMOVUPD (%R14,%RCX,8),%XMM15 |
(747) 0x77ccd LEA (%R11,%R8,8),%RDI |
(747) 0x77cd1 VMOVUPD (%RDI),%XMM0 |
(747) 0x77cd5 VMOVUPD 0x10(%RDI),%XMM2 |
(747) 0x77cda VMOVUPD 0x20(%RDI),%XMM13 |
(747) 0x77cdf VPERMILPD $0x1,%XMM0,%XMM9 |
(747) 0x77ce5 VUNPCKLPD %XMM0,%XMM2,%XMM12 |
(747) 0x77ce9 VBLENDPD $0x2,%XMM2,%XMM0,%XMM0 |
(747) 0x77cef VUNPCKLPD %XMM13,%XMM9,%XMM1 |
(747) 0x77cf4 VFMADD132PD %XMM15,%XMM14,%XMM0 |
(747) 0x77cf9 VBLENDPD $0x2,%XMM13,%XMM12,%XMM13 |
(747) 0x77cff VFMADD132PD %XMM15,%XMM11,%XMM1 |
(747) 0x77d04 VFMADD132PD %XMM13,%XMM10,%XMM15 |
(747) 0x77d09 VUNPCKHPD %XMM15,%XMM15,%XMM11 |
(747) 0x77d0e VUNPCKHPD %XMM1,%XMM1,%XMM10 |
(747) 0x77d12 VADDPD %XMM15,%XMM11,%XMM9 |
(747) 0x77d17 VUNPCKHPD %XMM0,%XMM0,%XMM15 |
(747) 0x77d1b VADDPD %XMM1,%XMM10,%XMM14 |
(747) 0x77d1f VADDPD %XMM0,%XMM15,%XMM2 |
(747) 0x77d23 VUNPCKLPD %XMM14,%XMM2,%XMM2 |
(747) 0x77d28 TEST $0x1,%R12B |
(747) 0x77d2c JE 77d59 |
(747) 0x77d2e AND $-0x2,%R12D |
(747) 0x77d32 ADD %R12D,%EDX |
(747) 0x77d35 MOVSXD %EDX,%RDX |
(747) 0x77d38 LEA (%RSI,%RDX,1),%RCX |
(747) 0x77d3c VMOVSD (%RAX,%RDX,8),%XMM12 |
(747) 0x77d41 VMOVDDUP (%RAX,%RDX,8),%XMM1 |
(747) 0x77d46 LEA (%RCX,%RCX,2),%R12 |
(747) 0x77d4a LEA (%R11,%R12,8),%R8 |
(747) 0x77d4e VFMADD231PD (%R8),%XMM1,%XMM2 |
(747) 0x77d53 VFMADD231SD 0x10(%R8),%XMM12,%XMM9 |
(747) 0x77d59 MOV 0x8(%RSP),%RDX |
(747) 0x77d5e VUNPCKHPD %XMM2,%XMM2,%XMM13 |
(747) 0x77d62 VMOVSD %XMM2,%XMM2,%XMM12 |
(747) 0x77d66 MOV 0x158(%RDX),%RCX |
(747) 0x77d6d MOV 0x180(%RDX),%RDI |
(747) 0x77d74 IMUL %R10,%RCX |
(747) 0x77d78 CMPL $0x2,0x3c(%RSP) |
(747) 0x77d7d MOV %RCX,0x40(%RSP) |
(747) 0x77d82 LEA (,%RCX,8),%R8 |
(747) 0x77d8a JBE 78125 |
(747) 0x77d90 MOV 0x50(%RSP),%RCX |
(747) 0x77d95 LEA (%RDI,%R8,1),%R12 |
(747) 0x77d99 XOR %EDX,%EDX |
(747) 0x77d9b VXORPD %XMM0,%XMM0,%XMM0 |
(747) 0x77d9f SUB $0x20,%RCX |
(747) 0x77da3 SHR $0x5,%RCX |
(747) 0x77da7 INC %RCX |
(747) 0x77daa AND $0x7,%ECX |
(747) 0x77dad JE 77e6c |
(747) 0x77db3 CMP $0x1,%RCX |
(747) 0x77db7 JE 77e4d |
(747) 0x77dbd CMP $0x2,%RCX |
(747) 0x77dc1 JE 77e39 |
(747) 0x77dc3 CMP $0x3,%RCX |
(747) 0x77dc7 JE 77e25 |
(747) 0x77dc9 CMP $0x4,%RCX |
(747) 0x77dcd JE 77e11 |
(747) 0x77dcf CMP $0x5,%RCX |
(747) 0x77dd3 JE 77dfe |
(747) 0x77dd5 CMP $0x6,%RCX |
(747) 0x77dd9 JE 77dea |
(747) 0x77ddb VMOVUPD (%R12),%YMM11 |
(747) 0x77de1 MOV $0x20,%EDX |
(747) 0x77de6 VMULPD (%RAX),%YMM11,%YMM0 |
(747) 0x77dea VMOVUPD (%R12,%RDX,1),%YMM10 |
(747) 0x77df0 VMULPD (%RAX,%RDX,1),%YMM10,%YMM14 |
(747) 0x77df5 ADD $0x20,%RDX |
(747) 0x77df9 VADDPD %YMM14,%YMM0,%YMM0 |
(747) 0x77dfe VMOVUPD (%R12,%RDX,1),%YMM15 |
(747) 0x77e04 VMULPD (%RAX,%RDX,1),%YMM15,%YMM1 |
(747) 0x77e09 ADD $0x20,%RDX |
(747) 0x77e0d VADDPD %YMM1,%YMM0,%YMM0 |
(747) 0x77e11 VMOVUPD (%R12,%RDX,1),%YMM11 |
(747) 0x77e17 VMULPD (%RAX,%RDX,1),%YMM11,%YMM10 |
(747) 0x77e1c ADD $0x20,%RDX |
(747) 0x77e20 VADDPD %YMM10,%YMM0,%YMM0 |
(747) 0x77e25 VMOVUPD (%R12,%RDX,1),%YMM14 |
(747) 0x77e2b VMULPD (%RAX,%RDX,1),%YMM14,%YMM15 |
(747) 0x77e30 ADD $0x20,%RDX |
(747) 0x77e34 VADDPD %YMM15,%YMM0,%YMM0 |
(747) 0x77e39 VMOVUPD (%R12,%RDX,1),%YMM1 |
(747) 0x77e3f VMULPD (%RAX,%RDX,1),%YMM1,%YMM11 |
(747) 0x77e44 ADD $0x20,%RDX |
(747) 0x77e48 VADDPD %YMM11,%YMM0,%YMM0 |
(747) 0x77e4d VMOVUPD (%R12,%RDX,1),%YMM10 |
(747) 0x77e53 VMULPD (%RAX,%RDX,1),%YMM10,%YMM14 |
(747) 0x77e58 ADD $0x20,%RDX |
(747) 0x77e5c VADDPD %YMM14,%YMM0,%YMM0 |
(747) 0x77e61 CMP %RDX,0x50(%RSP) |
(747) 0x77e66 JE 77f20 |
(748) 0x77e6c VMOVUPD (%R12,%RDX,1),%YMM15 |
(748) 0x77e72 VMOVUPD 0x20(%R12,%RDX,1),%YMM10 |
(748) 0x77e79 VMULPD (%RAX,%RDX,1),%YMM15,%YMM1 |
(748) 0x77e7e VMOVUPD 0x40(%R12,%RDX,1),%YMM15 |
(748) 0x77e85 VMULPD 0x20(%RAX,%RDX,1),%YMM10,%YMM14 |
(748) 0x77e8b VMOVUPD 0x60(%R12,%RDX,1),%YMM10 |
(748) 0x77e92 VADDPD %YMM1,%YMM0,%YMM11 |
(748) 0x77e96 VMULPD 0x40(%RAX,%RDX,1),%YMM15,%YMM1 |
(748) 0x77e9c VMOVUPD 0x80(%R12,%RDX,1),%YMM15 |
(748) 0x77ea6 VADDPD %YMM14,%YMM11,%YMM0 |
(748) 0x77eab VMULPD 0x60(%RAX,%RDX,1),%YMM10,%YMM14 |
(748) 0x77eb1 VMOVUPD 0xa0(%R12,%RDX,1),%YMM10 |
(748) 0x77ebb VADDPD %YMM1,%YMM0,%YMM11 |
(748) 0x77ebf VMULPD 0x80(%RAX,%RDX,1),%YMM15,%YMM1 |
(748) 0x77ec8 VMOVUPD 0xc0(%R12,%RDX,1),%YMM15 |
(748) 0x77ed2 VADDPD %YMM14,%YMM11,%YMM0 |
(748) 0x77ed7 VMULPD 0xa0(%RAX,%RDX,1),%YMM10,%YMM14 |
(748) 0x77ee0 VMOVUPD 0xe0(%R12,%RDX,1),%YMM10 |
(748) 0x77eea VADDPD %YMM1,%YMM0,%YMM11 |
(748) 0x77eee VMULPD 0xc0(%RAX,%RDX,1),%YMM15,%YMM1 |
(748) 0x77ef7 VADDPD %YMM14,%YMM11,%YMM0 |
(748) 0x77efc VMULPD 0xe0(%RAX,%RDX,1),%YMM10,%YMM14 |
(748) 0x77f05 ADD $0x100,%RDX |
(748) 0x77f0c VADDPD %YMM1,%YMM0,%YMM11 |
(748) 0x77f10 VADDPD %YMM14,%YMM11,%YMM0 |
(748) 0x77f15 CMP %RDX,0x50(%RSP) |
(748) 0x77f1a JNE 77e6c |
(747) 0x77f20 VEXTRACTF64X2 $0x1,%YMM0,%XMM15 |
(747) 0x77f27 VADDPD %XMM0,%XMM15,%XMM11 |
(747) 0x77f2b VUNPCKHPD %XMM11,%XMM11,%XMM1 |
(747) 0x77f30 VADDPD %XMM11,%XMM1,%XMM1 |
(747) 0x77f35 CMP %R13D,0x5c(%RSP) |
(747) 0x77f3a JE 77fae |
(747) 0x77f3c VADDPD %XMM15,%XMM0,%XMM0 |
(747) 0x77f41 MOV %R13D,%ECX |
(747) 0x77f44 MOV %R13D,%EDX |
(747) 0x77f47 MOV 0x5c(%RSP),%R12D |
(747) 0x77f4c SUB %ECX,%R12D |
(747) 0x77f4f MOV %R12D,0x58(%RSP) |
(747) 0x77f54 CMP $0x1,%R12D |
(747) 0x77f58 JE 77f9b |
(747) 0x77f5a LEA (%RBX,%RCX,1),%R12 |
(747) 0x77f5e MOV %R12,0x48(%RSP) |
(747) 0x77f63 MOV 0x40(%RSP),%R12 |
(747) 0x77f68 ADD %RCX,%R12 |
(747) 0x77f6b MOV %R12,%RCX |
(747) 0x77f6e MOV 0x48(%RSP),%R12 |
(747) 0x77f73 VMOVUPD (%R14,%R12,8),%XMM10 |
(747) 0x77f79 MOV 0x58(%RSP),%R12D |
(747) 0x77f7e VFMADD231PD (%RDI,%RCX,8),%XMM10,%XMM0 |
(747) 0x77f84 MOV %R12D,%ECX |
(747) 0x77f87 AND $0x1,%ECX |
(747) 0x77f8a VUNPCKHPD %XMM0,%XMM0,%XMM14 |
(747) 0x77f8e VADDPD %XMM0,%XMM14,%XMM1 |
(747) 0x77f92 JE 77fae |
(747) 0x77f94 AND $-0x2,%R12D |
(747) 0x77f98 ADD %R12D,%EDX |
(747) 0x77f9b MOVSXD %EDX,%RDX |
(747) 0x77f9e LEA (%RDI,%RDX,8),%RDI |
(747) 0x77fa2 VMOVSD (%RDI,%R8,1),%XMM0 |
(747) 0x77fa8 VFMADD231SD (%RAX,%RDX,8),%XMM0,%XMM1 |
(747) 0x77fae VMULSD %XMM12,%XMM12,%XMM12 |
(747) 0x77fb3 VMULSD %XMM13,%XMM13,%XMM13 |
(747) 0x77fb8 VMULSD %XMM9,%XMM9,%XMM0 |
(747) 0x77fbd VADDPD (%R9),%XMM2,%XMM2 |
(747) 0x77fc2 VADDSD 0x10(%R9),%XMM9,%XMM9 |
(747) 0x77fc8 ADD $0x18,%R9 |
(747) 0x77fcc MOV 0x28(%RSP),%RAX |
(747) 0x77fd1 VADDSD %XMM12,%XMM13,%XMM13 |
(747) 0x77fd6 MOV 0x10(%RSP),%R8 |
(747) 0x77fdb MOV 0x18(%RSP),%R12 |
(747) 0x77fe0 MOV 0x20(%RSP),%RCX |
(747) 0x77fe5 VMOVUPD %XMM2,-0x18(%R9) |
(747) 0x77feb ADD %R8,%R15 |
(747) 0x77fee VMOVSD %XMM9,-0x8(%R9) |
(747) 0x77ff4 ADD %R12,%RSI |
(747) 0x77ff7 ADD %RCX,%RBX |
(747) 0x77ffa VADDSD (%RAX,%R10,8),%XMM1,%XMM15 |
(747) 0x78000 VSUBSD %XMM13,%XMM15,%XMM12 |
(747) 0x78005 VSUBSD %XMM0,%XMM12,%XMM11 |
(747) 0x78009 VMOVSD %XMM11,(%RAX,%R10,8) |
(747) 0x7800f INC %R10 |
(747) 0x78012 CMP %R10,0x30(%RSP) |
(747) 0x78017 JNE 77960 |
0x7801d MOV 0x8(%RSP),%R12 |
0x78022 VZEROUPPER |
0x78025 VMOVSD 0x10(%R12),%XMM0 |
0x7802c LEA -0x28(%RBP),%RSP |
0x78030 POP %RBX |
0x78031 POP %R12 |
0x78033 POP %R13 |
0x78035 POP %R14 |
0x78037 POP %R15 |
0x78039 POP %RBP |
0x7803a RET |
(747) 0x7803b MOV 0x8(%RSP),%R12 |
(747) 0x78040 MOV 0x158(%R12),%RDX |
(747) 0x78048 MOV 0x180(%R12),%RDI |
(747) 0x78050 IMUL %R10,%RDX |
(747) 0x78054 MOV %RDX,0x40(%RSP) |
(747) 0x78059 LEA (,%RDX,8),%R8 |
(747) 0x78061 JMP 77d90 |
0x78066 MOV 0x100(%R12),%R11 |
0x7806e MOV 0x140(%R12),%R14 |
0x78076 MOVSXD 0x478(%R12),%R9 |
0x7807e MOV 0x18(%R13),%R13 |
0x78082 VMOVSD (%R11),%XMM8 |
0x78087 MOV 0x18(%RBX),%RBX |
0x7808b LEA (%R9,%R9,2),%R15 |
0x7808f MOV 0x180(%R12),%R10 |
0x78097 VMOVDDUP %XMM8,%XMM7 |
0x7809c LEA (%R13,%R15,8),%RSI |
0x780a1 LEA (%RBX,%R9,8),%RDX |
0x780a5 VMULPD (%R14),%XMM7,%XMM6 |
0x780aa VMULSD 0x10(%R14),%XMM8,%XMM10 |
0x780b0 VUNPCKHPD %XMM6,%XMM6,%XMM5 |
0x780b4 VADDPD (%RSI),%XMM6,%XMM4 |
0x780b8 VMOVSD %XMM6,%XMM6,%XMM3 |
0x780bc VMULSD %XMM5,%XMM5,%XMM14 |
0x780c0 VADDSD 0x10(%RSI),%XMM10,%XMM1 |
0x780c5 VMOVUPD %XMM4,(%RSI) |
0x780c9 VMOVSD %XMM1,0x10(%RSI) |
0x780ce VFNMADD213SD (%RDX),%XMM10,%XMM10 |
0x780d3 VFMADD132SD %XMM6,%XMM14,%XMM3 |
0x780d8 VSUBSD %XMM3,%XMM10,%XMM0 |
0x780dc VFMADD132SD (%R10),%XMM0,%XMM8 |
0x780e1 VMOVSD %XMM8,(%RDX) |
0x780e5 JMP 78025 |
(747) 0x780ea VXORPD %XMM0,%XMM0,%XMM0 |
(747) 0x780ee VXORPD %XMM2,%XMM2,%XMM2 |
(747) 0x780f2 VMOVSD %XMM0,%XMM0,%XMM13 |
(747) 0x780f6 VMOVSD %XMM0,%XMM0,%XMM12 |
(747) 0x780fa VMOVSD %XMM0,%XMM0,%XMM9 |
(747) 0x780fe VMOVSD %XMM0,%XMM0,%XMM1 |
(747) 0x78102 JMP 77fbd |
(747) 0x78107 VXORPD %XMM2,%XMM2,%XMM2 |
(747) 0x7810b XOR %ECX,%ECX |
(747) 0x7810d VXORPD %XMM9,%XMM9,%XMM9 |
(747) 0x78112 XOR %EDX,%EDX |
(747) 0x78114 VMOVAPD %XMM2,%XMM10 |
(747) 0x78118 VMOVAPD %XMM2,%XMM11 |
(747) 0x7811c VMOVAPD %XMM2,%XMM14 |
(747) 0x78120 JMP 77cae |
(747) 0x78125 VXORPD %XMM0,%XMM0,%XMM0 |
(747) 0x78129 XOR %ECX,%ECX |
(747) 0x7812b VXORPD %XMM1,%XMM1,%XMM1 |
(747) 0x7812f XOR %EDX,%EDX |
(747) 0x78131 JMP 77f47 |
0x78136 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 96 |
nb uops | 98 |
loop length | 444 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 4.83 | 12.33 | 12.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 12.33 |
cycles | 5.00 | 4.83 | 12.33 | 12.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 16.36-16.39 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 12.33 |
Overall L1 | 16.33 |
all | 41% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 17% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 29% |
load | 57% |
store | 8% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 25% |
load | 50% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 14% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 20% |
load | 30% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 77720 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%R12),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 78066 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x816> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 78025 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x7d5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x118(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xd8(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R15,%R15,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x100(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RBX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RDI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x10ab9(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x2,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x10ac5(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x10ad2(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x10ae7(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1085f(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R15,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1086f(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVSD 0x10(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R9,2),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x180(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R13,%R15,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RBX,%R9,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R14),%XMM7,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD 0x10(%R14),%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM6,%XMM6,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD (%RSI),%XMM6,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM6,%XMM6,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM5,%XMM5,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x10(%RSI),%XMM10,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %XMM4,(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM1,0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFNMADD213SD (%RDX),%XMM10,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD %XMM6,%XMM14,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM10,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R10),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 78025 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x7d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 96 |
nb uops | 98 |
loop length | 444 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 4.83 | 12.33 | 12.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 12.33 |
cycles | 5.00 | 4.83 | 12.33 | 12.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 12.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 16.36-16.39 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 12.33 |
Overall L1 | 16.33 |
all | 41% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 17% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 29% |
load | 57% |
store | 8% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 25% |
load | 50% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 14% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 20% |
load | 30% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 77720 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%R12),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 78066 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x816> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 78025 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x7d5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x118(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xd8(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R15,%R15,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x100(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RBX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RDI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x10ab9(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
AND $-0x4,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x2,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x10ac5(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x10ad2(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x10ae7(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1085f(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R15,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1086f(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVSD 0x10(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R9,2),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x180(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R13,%R15,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RBX,%R9,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R14),%XMM7,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD 0x10(%R14),%XMM8,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM6,%XMM6,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD (%RSI),%XMM6,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM6,%XMM6,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM5,%XMM5,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x10(%RSI),%XMM10,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVUPD %XMM4,(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM1,0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFNMADD213SD (%RDX),%XMM10,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD %XMM6,%XMM14,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM10,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R10),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 78025 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x7d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE– | 0.07 | 0.07 |
▼Loop 747 - DiracDeterminantRef.cpp:247-252 - libqmcwfs.so– | 0 | 0 |
○Loop 749 - OperatorTags.h:63-94 - libqmcwfs.so | 0.05 | 0.04 |
○Loop 748 - inner_product.hpp:82-83 - libqmcwfs.so | 0.02 | 0.01 |