Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.09% |
---|
Function: miniqmcreference::DiracDeterminantRef<qmcplusplus::DelayedUpdate<double, double> >::evalua ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.09% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 156 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
156: res += a[i] * b[i]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 231 - 256 |
-------------------------------------------------------------------------------- |
231: typename DiracDeterminantRef<DU_TYPE>::RealType DiracDeterminantRef<DU_TYPE>::evaluateLog( |
232: ParticleSet& P, |
233: ParticleSet::ParticleGradient& G, |
234: ParticleSet::ParticleLaplacian& L) |
235: { |
236: recompute(P); |
237: |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
256: } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
0x6de60 PUSH %RBP |
0x6de61 MOV %RSP,%RBP |
0x6de64 PUSH %R15 |
0x6de66 MOV %RDI,%R15 |
0x6de69 PUSH %R14 |
0x6de6b PUSH %R13 |
0x6de6d PUSH %R12 |
0x6de6f MOV %RDX,%R12 |
0x6de72 PUSH %RBX |
0x6de73 MOV %RCX,%RBX |
0x6de76 AND $-0x20,%RSP |
0x6de7a SUB $0x60,%RSP |
0x6de7e CALL 6dd30 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> |
0x6de83 MOV 0x484(%R15),%EDX |
0x6de8a CMP $0x1,%EDX |
0x6de8d JE 6e4d2 |
0x6de93 MOVSXD 0x478(%R15),%RAX |
0x6de9a TEST %EDX,%EDX |
0x6de9c JLE 6e494 |
0x6dea2 MOV 0x18(%RBX),%RBX |
0x6dea6 MOV 0x18(%R12),%RCX |
0x6deab LEA (%RAX,%RAX,2),%R8 |
0x6deaf MOVSXD %EDX,%RDX |
0x6deb2 MOV 0x118(%R15),%R10 |
0x6deb9 MOV 0x480(%R15),%EDI |
0x6dec0 MOV %RDX,0x30(%RSP) |
0x6dec5 MOV 0xd8(%R15),%RSI |
0x6decc LEA (%RBX,%RAX,8),%RAX |
0x6ded0 LEA (%RCX,%R8,8),%R9 |
0x6ded4 XOR %EBX,%EBX |
0x6ded6 LEA (%R10,%R10,2),%R13 |
0x6deda MOV 0x140(%R15),%R11 |
0x6dee1 MOV 0x100(%R15),%R14 |
0x6dee8 MOV %RAX,0x28(%RSP) |
0x6deed LEA (,%R13,8),%R12 |
0x6def5 MOV %RSI,0x20(%RSP) |
0x6defa MOV %EDI,%ESI |
0x6defc SHR $0x2,%ESI |
0x6deff MOV %R12,0x10(%RSP) |
0x6df04 MOV %R11,%R13 |
0x6df07 SAL $0x5,%RSI |
0x6df0b MOV %EDI,0x5c(%RSP) |
0x6df0f MOV %R10,0x18(%RSP) |
0x6df14 LEA -0x1(%RDI),%R10D |
0x6df18 AND $-0x4,%EDI |
0x6df1b MOV %R10D,0x3c(%RSP) |
0x6df20 XOR %R10D,%R10D |
0x6df23 MOV %RSI,0x50(%RSP) |
0x6df28 XOR %ESI,%ESI |
0x6df2a MOV %R15,0x8(%RSP) |
0x6df2f MOV %R14,%R15 |
0x6df32 MOV %EDI,%R14D |
0x6df35 NOPL (%RAX) |
(732) 0x6df38 MOV 0x5c(%RSP),%EDI |
(732) 0x6df3c LEA (%R15,%RBX,8),%RCX |
(732) 0x6df40 MOV %RCX,%RDX |
(732) 0x6df43 TEST %EDI,%EDI |
(732) 0x6df45 JLE 6e557 |
(732) 0x6df4b CMPL $0x2,0x3c(%RSP) |
(732) 0x6df50 JBE 6e574 |
(732) 0x6df56 MOV 0x50(%RSP),%RDI |
(732) 0x6df5b VXORPD %XMM11,%XMM11,%XMM11 |
(732) 0x6df60 MOV %R13,%R12 |
(732) 0x6df63 VMOVAPD %YMM11,%YMM0 |
(732) 0x6df67 VMOVAPD %YMM11,%YMM6 |
(732) 0x6df6b LEA (%RCX,%RDI,1),%R8 |
(732) 0x6df6f AND $0x20,%EDI |
(732) 0x6df72 JE 6e000 |
(732) 0x6df78 VMOVUPD (%R13),%YMM8 |
(732) 0x6df7e VMOVUPD 0x20(%R13),%YMM10 |
(732) 0x6df84 ADD $0x20,%RCX |
(732) 0x6df88 LEA 0x60(%R13),%R12 |
(732) 0x6df8c VMOVUPD 0x40(%R13),%YMM12 |
(732) 0x6df92 VPALIGNR $0x8,%YMM10,%YMM8,%YMM1 |
(732) 0x6df98 VPERMPD $0x2c,%YMM8,%YMM3 |
(732) 0x6df9e VBLENDPD $0xc,%YMM10,%YMM8,%YMM13 |
(732) 0x6dfa4 VBLENDPD $0x4,%YMM10,%YMM3,%YMM9 |
(732) 0x6dfaa VPERMPD $0x53,%YMM1,%YMM4 |
(732) 0x6dfb0 VPERM2F128 $0x2,%YMM10,%YMM8,%YMM14 |
(732) 0x6dfb6 VPERMILPD $0x2,%YMM12,%YMM2 |
(732) 0x6dfbc VINSERTF128 $0x1,%XMM12,%YMM4,%YMM5 |
(732) 0x6dfc2 VPERM2F128 $0x21,%YMM12,%YMM9,%YMM6 |
(732) 0x6dfc8 VSHUFPD $0x5,%YMM14,%YMM13,%YMM15 |
(732) 0x6dfce VBLENDPD $0x7,%YMM5,%YMM12,%YMM7 |
(732) 0x6dfd4 VBLENDPD $0x8,%YMM2,%YMM15,%YMM0 |
(732) 0x6dfda VBLENDPD $0x8,%YMM6,%YMM9,%YMM6 |
(732) 0x6dfe0 VFMADD132PD -0x20(%RCX),%YMM11,%YMM0 |
(732) 0x6dfe6 VFMADD132PD -0x20(%RCX),%YMM11,%YMM6 |
(732) 0x6dfec VFMADD231PD -0x20(%RCX),%YMM7,%YMM11 |
(732) 0x6dff2 CMP %R8,%RCX |
(732) 0x6dff5 JE 6e0fc |
(732) 0x6dffb NOPL (%RAX,%RAX,1) |
(734) 0x6e000 VMOVUPD (%R12),%YMM8 |
(734) 0x6e006 VMOVUPD 0x20(%R12),%YMM10 |
(734) 0x6e00d ADD $0x40,%RCX |
(734) 0x6e011 ADD $0xc0,%R12 |
(734) 0x6e018 VMOVUPD -0x80(%R12),%YMM12 |
(734) 0x6e01f VBLENDPD $0xc,%YMM10,%YMM8,%YMM13 |
(734) 0x6e025 VPERM2F128 $0x2,%YMM10,%YMM8,%YMM14 |
(734) 0x6e02b VPERMPD $0x2c,%YMM8,%YMM3 |
(734) 0x6e031 VPALIGNR $0x8,%YMM10,%YMM8,%YMM1 |
(734) 0x6e037 VSHUFPD $0x5,%YMM14,%YMM13,%YMM15 |
(734) 0x6e03d VBLENDPD $0x4,%YMM10,%YMM3,%YMM8 |
(734) 0x6e043 VMOVUPD -0x60(%R12),%YMM13 |
(734) 0x6e04a VPERMPD $0x53,%YMM1,%YMM5 |
(734) 0x6e050 VMOVUPD -0x40(%R12),%YMM14 |
(734) 0x6e057 VPERMILPD $0x2,%YMM12,%YMM2 |
(734) 0x6e05d VPERM2F128 $0x21,%YMM12,%YMM8,%YMM10 |
(734) 0x6e063 VINSERTF128 $0x1,%XMM12,%YMM5,%YMM7 |
(734) 0x6e069 VBLENDPD $0x8,%YMM2,%YMM15,%YMM4 |
(734) 0x6e06f VMOVUPD -0x20(%R12),%YMM15 |
(734) 0x6e076 VBLENDPD $0x7,%YMM7,%YMM12,%YMM9 |
(734) 0x6e07c VBLENDPD $0x8,%YMM10,%YMM8,%YMM12 |
(734) 0x6e082 VPERM2F128 $0x2,%YMM14,%YMM13,%YMM2 |
(734) 0x6e088 VFMADD231PD -0x40(%RCX),%YMM12,%YMM6 |
(734) 0x6e08e VFMADD231PD -0x40(%RCX),%YMM9,%YMM11 |
(734) 0x6e094 VPERMPD $0x2c,%YMM13,%YMM12 |
(734) 0x6e09a VPALIGNR $0x8,%YMM14,%YMM13,%YMM9 |
(734) 0x6e0a0 VFMADD231PD -0x40(%RCX),%YMM4,%YMM0 |
(734) 0x6e0a6 VBLENDPD $0xc,%YMM14,%YMM13,%YMM4 |
(734) 0x6e0ac VPERMPD $0x53,%YMM9,%YMM3 |
(734) 0x6e0b2 VBLENDPD $0x4,%YMM14,%YMM12,%YMM13 |
(734) 0x6e0b8 VPERMILPD $0x2,%YMM15,%YMM5 |
(734) 0x6e0be VSHUFPD $0x5,%YMM2,%YMM4,%YMM1 |
(734) 0x6e0c3 VINSERTF128 $0x1,%XMM15,%YMM3,%YMM8 |
(734) 0x6e0c9 VPERM2F128 $0x21,%YMM15,%YMM13,%YMM14 |
(734) 0x6e0cf VBLENDPD $0x8,%YMM5,%YMM1,%YMM7 |
(734) 0x6e0d5 VBLENDPD $0x7,%YMM8,%YMM15,%YMM10 |
(734) 0x6e0db VBLENDPD $0x8,%YMM14,%YMM13,%YMM15 |
(734) 0x6e0e1 VFMADD231PD -0x20(%RCX),%YMM7,%YMM0 |
(734) 0x6e0e7 VFMADD231PD -0x20(%RCX),%YMM15,%YMM6 |
(734) 0x6e0ed VFMADD231PD -0x20(%RCX),%YMM10,%YMM11 |
(734) 0x6e0f3 CMP %R8,%RCX |
(734) 0x6e0f6 JNE 6e000 |
(732) 0x6e0fc VEXTRACTF128 $0x1,%YMM0,%XMM8 |
(732) 0x6e102 VEXTRACTF128 $0x1,%YMM6,%XMM9 |
(732) 0x6e108 VEXTRACTF128 $0x1,%YMM11,%XMM7 |
(732) 0x6e10e VADDPD %XMM0,%XMM8,%XMM1 |
(732) 0x6e112 VADDPD %XMM6,%XMM9,%XMM12 |
(732) 0x6e116 VADDPD %XMM11,%XMM7,%XMM4 |
(732) 0x6e11b VUNPCKHPD %XMM1,%XMM1,%XMM5 |
(732) 0x6e11f VUNPCKHPD %XMM12,%XMM12,%XMM13 |
(732) 0x6e124 VADDPD %XMM12,%XMM13,%XMM14 |
(732) 0x6e129 VADDPD %XMM1,%XMM5,%XMM10 |
(732) 0x6e12d VUNPCKHPD %XMM4,%XMM4,%XMM2 |
(732) 0x6e131 VADDPD %XMM4,%XMM2,%XMM3 |
(732) 0x6e135 VMOVSD %XMM14,%XMM14,%XMM15 |
(732) 0x6e13a VUNPCKLPD %XMM10,%XMM14,%XMM2 |
(732) 0x6e13f CMP %R14D,0x5c(%RSP) |
(732) 0x6e144 JE 6e4a9 |
(732) 0x6e14a VADDPD %XMM9,%XMM6,%XMM6 |
(732) 0x6e14f VADDPD %XMM7,%XMM11,%XMM7 |
(732) 0x6e153 MOV %R14D,%R12D |
(732) 0x6e156 MOV %R14D,%EAX |
(732) 0x6e159 VADDPD %XMM8,%XMM0,%XMM9 |
(732) 0x6e15e MOV 0x5c(%RSP),%EDI |
(732) 0x6e162 SUB %R12D,%EDI |
(732) 0x6e165 CMP $0x1,%EDI |
(732) 0x6e168 JE 6e1e1 |
(732) 0x6e16a LEA (%RSI,%R12,1),%RCX |
(732) 0x6e16e ADD %RBX,%R12 |
(732) 0x6e171 LEA (%RCX,%RCX,2),%R8 |
(732) 0x6e175 VMOVUPD (%R15,%R12,8),%XMM11 |
(732) 0x6e17b LEA (%R11,%R8,8),%RCX |
(732) 0x6e17f VMOVUPD (%RCX),%XMM2 |
(732) 0x6e183 VMOVUPD 0x10(%RCX),%XMM4 |
(732) 0x6e188 VMOVUPD 0x20(%RCX),%XMM8 |
(732) 0x6e18d VPERMILPD $0x1,%XMM2,%XMM0 |
(732) 0x6e193 VBLENDPD $0x2,%XMM4,%XMM2,%XMM1 |
(732) 0x6e199 VUNPCKLPD %XMM2,%XMM4,%XMM3 |
(732) 0x6e19d VUNPCKLPD %XMM8,%XMM0,%XMM5 |
(732) 0x6e1a2 VFMADD132PD %XMM11,%XMM6,%XMM1 |
(732) 0x6e1a7 VBLENDPD $0x2,%XMM8,%XMM3,%XMM10 |
(732) 0x6e1ad VFMADD132PD %XMM11,%XMM9,%XMM5 |
(732) 0x6e1b2 VFMADD132PD %XMM10,%XMM7,%XMM11 |
(732) 0x6e1b7 VUNPCKHPD %XMM1,%XMM1,%XMM12 |
(732) 0x6e1bb VUNPCKHPD %XMM5,%XMM5,%XMM7 |
(732) 0x6e1bf VADDPD %XMM1,%XMM12,%XMM13 |
(732) 0x6e1c3 VUNPCKHPD %XMM11,%XMM11,%XMM6 |
(732) 0x6e1c8 VADDPD %XMM5,%XMM7,%XMM9 |
(732) 0x6e1cc VADDPD %XMM11,%XMM6,%XMM3 |
(732) 0x6e1d1 VUNPCKLPD %XMM9,%XMM13,%XMM2 |
(732) 0x6e1d6 TEST $0x1,%DIL |
(732) 0x6e1da JE 6e204 |
(732) 0x6e1dc AND $-0x2,%EDI |
(732) 0x6e1df ADD %EDI,%EAX |
(732) 0x6e1e1 CLTQ |
(732) 0x6e1e3 LEA (%RSI,%RAX,1),%R12 |
(732) 0x6e1e7 VMOVSD (%RDX,%RAX,8),%XMM14 |
(732) 0x6e1ec LEA (%R12,%R12,2),%RDI |
(732) 0x6e1f0 LEA (%R11,%RDI,8),%R8 |
(732) 0x6e1f4 VMOVDDUP %XMM14,%XMM15 |
(732) 0x6e1f9 VFMADD231SD 0x10(%R8),%XMM14,%XMM3 |
(732) 0x6e1ff VFMADD231PD (%R8),%XMM15,%XMM2 |
(732) 0x6e204 MOV 0x8(%RSP),%RCX |
(732) 0x6e209 VUNPCKHPD %XMM2,%XMM2,%XMM10 |
(732) 0x6e20d VMOVSD %XMM2,%XMM2,%XMM15 |
(732) 0x6e211 MOV 0x158(%RCX),%RAX |
(732) 0x6e218 MOV 0x180(%RCX),%RDI |
(732) 0x6e21f IMUL %R10,%RAX |
(732) 0x6e223 CMPL $0x2,0x3c(%RSP) |
(732) 0x6e228 MOV %RAX,0x40(%RSP) |
(732) 0x6e22d LEA (,%RAX,8),%R8 |
(732) 0x6e235 JBE 6e592 |
(732) 0x6e23b MOV 0x50(%RSP),%RCX |
(732) 0x6e240 LEA (%RDI,%R8,1),%R12 |
(732) 0x6e244 XOR %EAX,%EAX |
(732) 0x6e246 VXORPD %XMM4,%XMM4,%XMM4 |
(732) 0x6e24a SUB $0x20,%RCX |
(732) 0x6e24e SHR $0x5,%RCX |
(732) 0x6e252 INC %RCX |
(732) 0x6e255 AND $0x7,%ECX |
(732) 0x6e258 JE 6e2fd |
(732) 0x6e25e CMP $0x1,%RCX |
(732) 0x6e262 JE 6e2e2 |
(732) 0x6e264 CMP $0x2,%RCX |
(732) 0x6e268 JE 6e2d2 |
(732) 0x6e26a CMP $0x3,%RCX |
(732) 0x6e26e JE 6e2c2 |
(732) 0x6e270 CMP $0x4,%RCX |
(732) 0x6e274 JE 6e2b2 |
(732) 0x6e276 CMP $0x5,%RCX |
(732) 0x6e27a JE 6e2a2 |
(732) 0x6e27c CMP $0x6,%RCX |
(732) 0x6e280 JE 6e292 |
(732) 0x6e282 VMOVUPD (%R12),%YMM8 |
(732) 0x6e288 VFMADD231PD (%RDX),%YMM8,%YMM4 |
(732) 0x6e28d MOV $0x20,%EAX |
(732) 0x6e292 VMOVUPD (%R12,%RAX,1),%YMM0 |
(732) 0x6e298 VFMADD231PD (%RDX,%RAX,1),%YMM0,%YMM4 |
(732) 0x6e29e ADD $0x20,%RAX |
(732) 0x6e2a2 VMOVUPD (%R12,%RAX,1),%YMM5 |
(732) 0x6e2a8 VFMADD231PD (%RDX,%RAX,1),%YMM5,%YMM4 |
(732) 0x6e2ae ADD $0x20,%RAX |
(732) 0x6e2b2 VMOVUPD (%R12,%RAX,1),%YMM1 |
(732) 0x6e2b8 VFMADD231PD (%RDX,%RAX,1),%YMM1,%YMM4 |
(732) 0x6e2be ADD $0x20,%RAX |
(732) 0x6e2c2 VMOVUPD (%R12,%RAX,1),%YMM6 |
(732) 0x6e2c8 VFMADD231PD (%RDX,%RAX,1),%YMM6,%YMM4 |
(732) 0x6e2ce ADD $0x20,%RAX |
(732) 0x6e2d2 VMOVUPD (%R12,%RAX,1),%YMM7 |
(732) 0x6e2d8 VFMADD231PD (%RDX,%RAX,1),%YMM7,%YMM4 |
(732) 0x6e2de ADD $0x20,%RAX |
(732) 0x6e2e2 VMOVUPD (%R12,%RAX,1),%YMM9 |
(732) 0x6e2e8 VFMADD231PD (%RDX,%RAX,1),%YMM9,%YMM4 |
(732) 0x6e2ee ADD $0x20,%RAX |
(732) 0x6e2f2 CMP %RAX,0x50(%RSP) |
(732) 0x6e2f7 JE 6e394 |
(733) 0x6e2fd VMOVUPD (%R12,%RAX,1),%YMM12 |
(733) 0x6e303 VFMADD231PD (%RDX,%RAX,1),%YMM12,%YMM4 |
(733) 0x6e309 VMOVUPD 0x20(%R12,%RAX,1),%YMM13 |
(733) 0x6e310 VFMADD231PD 0x20(%RDX,%RAX,1),%YMM13,%YMM4 |
(733) 0x6e317 VMOVUPD 0x40(%R12,%RAX,1),%YMM14 |
(733) 0x6e31e VFMADD231PD 0x40(%RDX,%RAX,1),%YMM14,%YMM4 |
(733) 0x6e325 VMOVUPD 0x60(%R12,%RAX,1),%YMM11 |
(733) 0x6e32c VFMADD231PD 0x60(%RDX,%RAX,1),%YMM11,%YMM4 |
(733) 0x6e333 VMOVUPD 0x80(%R12,%RAX,1),%YMM8 |
(733) 0x6e33d VFMADD231PD 0x80(%RDX,%RAX,1),%YMM8,%YMM4 |
(733) 0x6e347 VMOVUPD 0xa0(%R12,%RAX,1),%YMM0 |
(733) 0x6e351 VFMADD231PD 0xa0(%RDX,%RAX,1),%YMM0,%YMM4 |
(733) 0x6e35b VMOVUPD 0xc0(%R12,%RAX,1),%YMM5 |
(733) 0x6e365 VFMADD231PD 0xc0(%RDX,%RAX,1),%YMM5,%YMM4 |
(733) 0x6e36f VMOVUPD 0xe0(%R12,%RAX,1),%YMM1 |
(733) 0x6e379 VFMADD231PD 0xe0(%RDX,%RAX,1),%YMM1,%YMM4 |
(733) 0x6e383 ADD $0x100,%RAX |
(733) 0x6e389 CMP %RAX,0x50(%RSP) |
(733) 0x6e38e JNE 6e2fd |
(732) 0x6e394 VEXTRACTF128 $0x1,%YMM4,%XMM7 |
(732) 0x6e39a VADDPD %XMM4,%XMM7,%XMM6 |
(732) 0x6e39e VUNPCKHPD %XMM6,%XMM6,%XMM9 |
(732) 0x6e3a2 VADDPD %XMM6,%XMM9,%XMM0 |
(732) 0x6e3a6 CMP %R14D,0x5c(%RSP) |
(732) 0x6e3ab JE 6e41f |
(732) 0x6e3ad VADDPD %XMM7,%XMM4,%XMM11 |
(732) 0x6e3b1 MOV %R14D,%ECX |
(732) 0x6e3b4 MOV %R14D,%EAX |
(732) 0x6e3b7 MOV 0x5c(%RSP),%R12D |
(732) 0x6e3bc SUB %ECX,%R12D |
(732) 0x6e3bf MOV %R12D,0x58(%RSP) |
(732) 0x6e3c4 CMP $0x1,%R12D |
(732) 0x6e3c8 JE 6e40d |
(732) 0x6e3ca LEA (%RBX,%RCX,1),%R12 |
(732) 0x6e3ce MOV %R12,0x48(%RSP) |
(732) 0x6e3d3 MOV 0x40(%RSP),%R12 |
(732) 0x6e3d8 ADD %RCX,%R12 |
(732) 0x6e3db MOV %R12,%RCX |
(732) 0x6e3de MOV 0x48(%RSP),%R12 |
(732) 0x6e3e3 VMOVUPD (%R15,%R12,8),%XMM4 |
(732) 0x6e3e9 VFMADD231PD (%RDI,%RCX,8),%XMM4,%XMM11 |
(732) 0x6e3ef MOV 0x58(%RSP),%R12D |
(732) 0x6e3f4 MOV %R12D,%ECX |
(732) 0x6e3f7 AND $0x1,%ECX |
(732) 0x6e3fa VUNPCKHPD %XMM11,%XMM11,%XMM12 |
(732) 0x6e3ff VADDPD %XMM11,%XMM12,%XMM0 |
(732) 0x6e404 JE 6e41f |
(732) 0x6e406 AND $-0x2,%R12D |
(732) 0x6e40a ADD %R12D,%EAX |
(732) 0x6e40d CLTQ |
(732) 0x6e40f LEA (%RDI,%RAX,8),%RDI |
(732) 0x6e413 VMOVSD (%RDI,%R8,1),%XMM13 |
(732) 0x6e419 VFMADD231SD (%RDX,%RAX,8),%XMM13,%XMM0 |
(732) 0x6e41f VMULSD %XMM15,%XMM15,%XMM5 |
(732) 0x6e424 VMULSD %XMM10,%XMM10,%XMM4 |
(732) 0x6e429 VMULSD %XMM3,%XMM3,%XMM1 |
(732) 0x6e42d VADDPD (%R9),%XMM2,%XMM2 |
(732) 0x6e432 VADDSD 0x10(%R9),%XMM3,%XMM3 |
(732) 0x6e438 ADD $0x18,%R9 |
(732) 0x6e43c MOV 0x28(%RSP),%RDX |
(732) 0x6e441 VADDSD %XMM5,%XMM4,%XMM15 |
(732) 0x6e445 MOV 0x10(%RSP),%R8 |
(732) 0x6e44a MOV 0x18(%RSP),%RAX |
(732) 0x6e44f MOV 0x20(%RSP),%R12 |
(732) 0x6e454 VMOVUPD %XMM2,-0x18(%R9) |
(732) 0x6e45a ADD %R8,%R13 |
(732) 0x6e45d VMOVSD %XMM3,-0x8(%R9) |
(732) 0x6e463 VADDSD (%RDX,%R10,8),%XMM0,%XMM10 |
(732) 0x6e469 ADD %RAX,%RSI |
(732) 0x6e46c ADD %R12,%RBX |
(732) 0x6e46f VSUBSD %XMM15,%XMM10,%XMM14 |
(732) 0x6e474 VSUBSD %XMM1,%XMM14,%XMM11 |
(732) 0x6e478 VMOVSD %XMM11,(%RDX,%R10,8) |
(732) 0x6e47e INC %R10 |
(732) 0x6e481 CMP %R10,0x30(%RSP) |
(732) 0x6e486 JNE 6df38 |
0x6e48c MOV 0x8(%RSP),%R15 |
0x6e491 VZEROUPPER |
0x6e494 VMOVSD 0x10(%R15),%XMM0 |
0x6e49a LEA -0x28(%RBP),%RSP |
0x6e49e POP %RBX |
0x6e49f POP %R12 |
0x6e4a1 POP %R13 |
0x6e4a3 POP %R14 |
0x6e4a5 POP %R15 |
0x6e4a7 POP %RBP |
0x6e4a8 RET |
(732) 0x6e4a9 MOV 0x8(%RSP),%RCX |
(732) 0x6e4ae MOV 0x158(%RCX),%RAX |
(732) 0x6e4b5 MOV 0x180(%RCX),%RDI |
(732) 0x6e4bc IMUL %R10,%RAX |
(732) 0x6e4c0 MOV %RAX,0x40(%RSP) |
(732) 0x6e4c5 LEA (,%RAX,8),%R8 |
(732) 0x6e4cd JMP 6e23b |
0x6e4d2 MOV 0x100(%R15),%R11 |
0x6e4d9 MOV 0x140(%R15),%R9 |
0x6e4e0 MOVSXD 0x478(%R15),%R13 |
0x6e4e7 MOV 0x18(%R12),%R10 |
0x6e4ec VMOVSD (%R11),%XMM8 |
0x6e4f1 VMULSD 0x10(%R9),%XMM8,%XMM9 |
0x6e4f7 LEA (%R13,%R13,2),%RSI |
0x6e4fc MOV 0x18(%RBX),%RBX |
0x6e500 MOV 0x180(%R15),%RCX |
0x6e507 VMOVDDUP %XMM8,%XMM0 |
0x6e50c LEA (%R10,%RSI,8),%R14 |
0x6e510 VMULPD (%R9),%XMM0,%XMM5 |
0x6e515 VADDSD 0x10(%R14),%XMM9,%XMM4 |
0x6e51b LEA (%RBX,%R13,8),%RDI |
0x6e51f VADDPD (%R14),%XMM5,%XMM1 |
0x6e524 VMOVSD %XMM4,0x10(%R14) |
0x6e52a VMOVUPD %XMM1,(%R14) |
0x6e52f VFNMADD213SD (%RDI),%XMM9,%XMM9 |
0x6e534 VUNPCKHPD %XMM5,%XMM5,%XMM7 |
0x6e538 VMOVSD %XMM5,%XMM5,%XMM6 |
0x6e53c VMULSD %XMM7,%XMM7,%XMM12 |
0x6e540 VFMADD132SD %XMM5,%XMM12,%XMM6 |
0x6e545 VSUBSD %XMM6,%XMM9,%XMM13 |
0x6e549 VFMADD132SD (%RCX),%XMM13,%XMM8 |
0x6e54e VMOVSD %XMM8,(%RDI) |
0x6e552 JMP 6e494 |
(732) 0x6e557 VXORPD %XMM1,%XMM1,%XMM1 |
(732) 0x6e55b VXORPD %XMM2,%XMM2,%XMM2 |
(732) 0x6e55f VMOVSD %XMM1,%XMM1,%XMM4 |
(732) 0x6e563 VMOVSD %XMM1,%XMM1,%XMM5 |
(732) 0x6e567 VMOVSD %XMM1,%XMM1,%XMM3 |
(732) 0x6e56b VMOVSD %XMM1,%XMM1,%XMM0 |
(732) 0x6e56f JMP 6e42d |
(732) 0x6e574 VXORPD %XMM2,%XMM2,%XMM2 |
(732) 0x6e578 XOR %R12D,%R12D |
(732) 0x6e57b VXORPD %XMM3,%XMM3,%XMM3 |
(732) 0x6e57f XOR %EAX,%EAX |
(732) 0x6e581 VMOVAPD %XMM2,%XMM7 |
(732) 0x6e585 VMOVAPD %XMM2,%XMM9 |
(732) 0x6e589 VMOVAPD %XMM2,%XMM6 |
(732) 0x6e58d JMP 6e15e |
(732) 0x6e592 VXORPD %XMM11,%XMM11,%XMM11 |
(732) 0x6e597 XOR %ECX,%ECX |
(732) 0x6e599 VXORPD %XMM0,%XMM0,%XMM0 |
(732) 0x6e59d XOR %EAX,%EAX |
(732) 0x6e59f JMP 6e3b7 |
0x6e5a4 NOPW %CS:(%RAX,%RAX,1) |
0x6e5af NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►50.03+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►48.05+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►1.05+ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 92 |
nb uops | 94 |
loop length | 390 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 15.67 cycles |
front end | 15.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 4.83 | 10.33 | 10.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 10.33 |
cycles | 5.00 | 4.83 | 10.33 | 10.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 15.70-15.74 |
Stall cycles | 0.00 |
Front-end | 15.67 |
Dispatch | 10.33 |
Overall L1 | 15.67 |
all | 9% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 17% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 14% |
load | 25% |
store | 8% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 11% |
load | NA (no load vectorizable/vectorized instructions) |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 14% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 15% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 6dd30 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%R15),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6e4d2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x672> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 6e494 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x634> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x118(%R15),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R15),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%R15),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RBX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R10,%R10,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x140(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%R15),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R13,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R12,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x5,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RDI),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
AND $-0x4,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVSD 0x10(%R15),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R9),%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%R13,%R13,2),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM8,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R10,%RSI,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R9),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VADDSD 0x10(%R14),%XMM9,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
LEA (%RBX,%R13,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDPD (%R14),%XMM5,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM4,0x10(%R14) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM1,(%R14) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RDI),%XMM9,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM5,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,%XMM5,%XMM6 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM7,%XMM7,%XMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM5,%XMM12,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM6,%XMM9,%XMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%RCX),%XMM13,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 6e494 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x634> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 92 |
nb uops | 94 |
loop length | 390 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 15.67 cycles |
front end | 15.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 4.83 | 10.33 | 10.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 10.33 |
cycles | 5.00 | 4.83 | 10.33 | 10.33 | 9.50 | 4.67 | 4.30 | 9.50 | 9.50 | 9.50 | 4.20 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 15.70-15.74 |
Stall cycles | 0.00 |
Front-end | 15.67 |
Dispatch | 10.33 |
Overall L1 | 15.67 |
all | 9% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 17% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 14% |
load | 25% |
store | 8% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 11% |
load | NA (no load vectorizable/vectorized instructions) |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 14% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 15% |
store | 12% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x60,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 6dd30 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%R15),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6e4d2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x672> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 6e494 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x634> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x118(%R15),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x480(%R15),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%R15),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RBX,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R10,%R10,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x140(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%R15),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R13,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R12,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x5,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RDI),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
AND $-0x4,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVSD 0x10(%R15),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x100(%R15),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R9),%XMM8,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%R13,%R13,2),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM8,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R10,%RSI,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R9),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VADDSD 0x10(%R14),%XMM9,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
LEA (%RBX,%R13,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDPD (%R14),%XMM5,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM4,0x10(%R14) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM1,(%R14) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RDI),%XMM9,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM5,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,%XMM5,%XMM6 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM7,%XMM7,%XMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM5,%XMM12,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM6,%XMM9,%XMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%RCX),%XMM13,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 6e494 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x634> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::DiracDeterminantRef | 0.09 | 0.08 |
▼Loop 732 - DiracDeterminantRef.cpp:247-252 - libqmcwfs.so– | 0 | 0 |
○Loop 734 - OperatorTags.h:63-94 - libqmcwfs.so | 0.07 | 0.06 |
○Loop 733 - inner_product.hpp:82-83 - libqmcwfs.so | 0.02 | 0.02 |