Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.48% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGL ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:152-181 [...] | Coverage: 0.48% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 156 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
149: inline TinyVector<T, D> dot(const T* a, const TinyVector<T, D>* b, int n) |
[...] |
155: for (int i = 0; i < n; i++) |
156: res += a[i] * b[i]; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 129 - 131 |
-------------------------------------------------------------------------------- |
129: Type_t res = lhs[0] * rhs[0]; |
130: for (unsigned d = 1; d < D; ++d) |
131: res += lhs[d] * rhs[d]; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 152 - 181 |
-------------------------------------------------------------------------------- |
152: void DiracDeterminantRef<DU_TYPE>::evaluateGL(ParticleSet& P, |
153: ParticleSet::ParticleGradient& G, |
154: ParticleSet::ParticleLaplacian& L, |
155: bool fromscratch) |
156: { |
157: if (UpdateMode == ORB_PBYP_RATIO) |
158: { //need to compute dpsiM and d2psiM. Do not touch psiM! |
159: SPOVGLTimer->start(); |
160: Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp, dpsiM, d2psiM); |
161: SPOVGLTimer->stop(); |
162: } |
163: |
164: if (NumPtcls == 1) |
[...] |
173: for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat) |
174: { |
175: mValueType dot_temp = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
176: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
177: G[iat] += rv; |
178: L[iat] += dot_temp - dot(rv, rv); |
179: } |
180: } |
181: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
0x760e0 PUSH %R13 |
0x760e2 LEA 0x10(%RSP),%R13 |
0x760e7 AND $-0x20,%RSP |
0x760eb PUSHQ -0x8(%R13) |
0x760ef PUSH %RBP |
0x760f0 MOV %RSP,%RBP |
0x760f3 PUSH %R15 |
0x760f5 PUSH %R14 |
0x760f7 MOV %RDX,%R14 |
0x760fa PUSH %R13 |
0x760fc PUSH %R12 |
0x760fe MOV %RCX,%R12 |
0x76101 PUSH %RBX |
0x76102 SUB $0x68,%RSP |
0x76106 MOV %RDI,-0x50(%RBP) |
0x7610a MOV 0xc(%RDI),%ECX |
0x7610d TEST %ECX,%ECX |
0x7610f JE 76901 |
0x76115 MOV -0x50(%RBP),%RDI |
0x76119 MOV 0x484(%RDI),%EDX |
0x7611f CMP $0x1,%EDX |
0x76122 JE 768e2 |
0x76128 MOVSXD %EDX,%RSI |
0x7612b MOVSXD 0x478(%RDI),%R10 |
0x76132 MOV %RSI,-0x60(%RBP) |
0x76136 TEST %EDX,%EDX |
0x76138 JE 7685e |
0x7613e MOV 0x480(%RDI),%EBX |
0x76144 MOV 0xd8(%RDI),%R15 |
0x7614b LEA (%R10,%R10,2),%R13 |
0x7614f MOV 0x100(%RDI),%R11 |
0x76156 MOV 0x158(%RDI),%R9 |
0x7615d MOV 0x180(%RDI),%R8 |
0x76164 MOV 0x18(%R14),%R14 |
0x76168 MOV %EBX,%EDI |
0x7616a LEA (,%R15,8),%RCX |
0x76172 MOV 0x18(%R12),%R12 |
0x76177 LEA -0x1(%RBX),%EAX |
0x7617a AND $-0x4,%EDI |
0x7617d MOV %R9,-0x70(%RBP) |
0x76181 MOV %R15,-0x68(%RBP) |
0x76185 LEA (%R14,%R13,8),%R9 |
0x76189 MOV %EBX,%R13D |
0x7618c VMOVDQA 0x1222c(%RIP),%YMM9 |
0x76194 MOV %RCX,-0x58(%RBP) |
0x76198 LEA (%R12,%R10,8),%R10 |
0x7619c SHR $0x2,%R13D |
0x761a0 VMOVDQA 0x12238(%RIP),%YMM8 |
0x761a8 MOV %EAX,-0x40(%RBP) |
0x761ab VMOVDQA 0x1224d(%RIP),%YMM7 |
0x761b3 MOV %R11,%RDX |
0x761b6 SAL $0x5,%R13 |
0x761ba VMOVDQA 0x1225e(%RIP),%YMM6 |
0x761c2 VMOVDQA 0x11fd6(%RIP),%YMM5 |
0x761ca MOV %EDI,-0x44(%RBP) |
0x761cd XOR %R12D,%R12D |
0x761d0 VMOVDQA 0x11fe8(%RIP),%YMM4 |
0x761d8 MOV %R11,-0x38(%RBP) |
0x761dc XOR %R11D,%R11D |
0x761df MOV %R10,-0x78(%RBP) |
0x761e3 XOR %R10D,%R10D |
0x761e6 NOPW %CS:(%RAX,%RAX,1) |
(741) 0x761f0 LEA (,%R12,8),%RCX |
(741) 0x761f8 TEST %EBX,%EBX |
(741) 0x761fa JLE 7688e |
(741) 0x76200 CMPL $0x2,-0x40(%RBP) |
(741) 0x76204 JBE 768af |
(741) 0x7620a LEA -0x20(%R13),%R14 |
(741) 0x7620e LEA (%R8,%RCX,1),%R15 |
(741) 0x76212 VXORPD %XMM0,%XMM0,%XMM0 |
(741) 0x76216 XOR %EDI,%EDI |
(741) 0x76218 SHR $0x5,%R14 |
(741) 0x7621c INC %R14 |
(741) 0x7621f AND $0x7,%R14D |
(741) 0x76223 JE 762de |
(741) 0x76229 CMP $0x1,%R14 |
(741) 0x7622d JE 762c1 |
(741) 0x76233 CMP $0x2,%R14 |
(741) 0x76237 JE 762ae |
(741) 0x76239 CMP $0x3,%R14 |
(741) 0x7623d JE 7629a |
(741) 0x7623f CMP $0x4,%R14 |
(741) 0x76243 JE 76286 |
(741) 0x76245 CMP $0x5,%R14 |
(741) 0x76249 JE 76272 |
(741) 0x7624b CMP $0x6,%R14 |
(741) 0x7624f JE 7625f |
(741) 0x76251 VMOVUPD (%RDX),%YMM1 |
(741) 0x76255 MOV $0x20,%EDI |
(741) 0x7625a VMULPD (%R15),%YMM1,%YMM0 |
(741) 0x7625f VMOVUPD (%RDX,%RDI,1),%YMM2 |
(741) 0x76264 VMULPD (%R15,%RDI,1),%YMM2,%YMM3 |
(741) 0x7626a ADD $0x20,%RDI |
(741) 0x7626e VADDPD %YMM3,%YMM0,%YMM0 |
(741) 0x76272 VMOVUPD (%RDX,%RDI,1),%YMM10 |
(741) 0x76277 VMULPD (%R15,%RDI,1),%YMM10,%YMM11 |
(741) 0x7627d ADD $0x20,%RDI |
(741) 0x76281 VADDPD %YMM11,%YMM0,%YMM0 |
(741) 0x76286 VMOVUPD (%RDX,%RDI,1),%YMM12 |
(741) 0x7628b VMULPD (%R15,%RDI,1),%YMM12,%YMM13 |
(741) 0x76291 ADD $0x20,%RDI |
(741) 0x76295 VADDPD %YMM13,%YMM0,%YMM0 |
(741) 0x7629a VMOVUPD (%RDX,%RDI,1),%YMM14 |
(741) 0x7629f VMULPD (%R15,%RDI,1),%YMM14,%YMM15 |
(741) 0x762a5 ADD $0x20,%RDI |
(741) 0x762a9 VADDPD %YMM15,%YMM0,%YMM0 |
(741) 0x762ae VMOVUPD (%RDX,%RDI,1),%YMM1 |
(741) 0x762b3 VMULPD (%R15,%RDI,1),%YMM1,%YMM2 |
(741) 0x762b9 ADD $0x20,%RDI |
(741) 0x762bd VADDPD %YMM2,%YMM0,%YMM0 |
(741) 0x762c1 VMOVUPD (%RDX,%RDI,1),%YMM3 |
(741) 0x762c6 VMULPD (%R15,%RDI,1),%YMM3,%YMM10 |
(741) 0x762cc ADD $0x20,%RDI |
(741) 0x762d0 VADDPD %YMM10,%YMM0,%YMM0 |
(741) 0x762d5 CMP %R13,%RDI |
(741) 0x762d8 JE 76391 |
(743) 0x762de VMOVUPD (%RDX,%RDI,1),%YMM11 |
(743) 0x762e3 VMOVUPD 0x20(%RDX,%RDI,1),%YMM14 |
(743) 0x762e9 VMOVUPD 0x40(%RDX,%RDI,1),%YMM1 |
(743) 0x762ef VMULPD (%R15,%RDI,1),%YMM11,%YMM12 |
(743) 0x762f5 VMULPD 0x20(%R15,%RDI,1),%YMM14,%YMM15 |
(743) 0x762fc VMULPD 0x40(%R15,%RDI,1),%YMM1,%YMM3 |
(743) 0x76303 VADDPD %YMM12,%YMM0,%YMM13 |
(743) 0x76308 VMOVUPD 0x60(%RDX,%RDI,1),%YMM0 |
(743) 0x7630e VMULPD 0x60(%R15,%RDI,1),%YMM0,%YMM11 |
(743) 0x76315 VADDPD %YMM15,%YMM13,%YMM2 |
(743) 0x7631a VMOVUPD 0x80(%RDX,%RDI,1),%YMM13 |
(743) 0x76323 VMULPD 0x80(%R15,%RDI,1),%YMM13,%YMM14 |
(743) 0x7632d VADDPD %YMM3,%YMM2,%YMM10 |
(743) 0x76331 VMOVUPD 0xa0(%RDX,%RDI,1),%YMM2 |
(743) 0x7633a VMULPD 0xa0(%R15,%RDI,1),%YMM2,%YMM1 |
(743) 0x76344 VADDPD %YMM11,%YMM10,%YMM12 |
(743) 0x76349 VMOVUPD 0xc0(%RDX,%RDI,1),%YMM10 |
(743) 0x76352 VMULPD 0xc0(%R15,%RDI,1),%YMM10,%YMM0 |
(743) 0x7635c VADDPD %YMM14,%YMM12,%YMM15 |
(743) 0x76361 VMOVUPD 0xe0(%RDX,%RDI,1),%YMM12 |
(743) 0x7636a VMULPD 0xe0(%R15,%RDI,1),%YMM12,%YMM13 |
(743) 0x76374 ADD $0x100,%RDI |
(743) 0x7637b VADDPD %YMM1,%YMM15,%YMM3 |
(743) 0x7637f VADDPD %YMM0,%YMM3,%YMM11 |
(743) 0x76383 VADDPD %YMM13,%YMM11,%YMM0 |
(743) 0x76388 CMP %R13,%RDI |
(743) 0x7638b JNE 762de |
(741) 0x76391 VEXTRACTF64X2 $0x1,%YMM0,%XMM14 |
(741) 0x76398 MOV -0x44(%RBP),%EAX |
(741) 0x7639b VADDPD %XMM0,%XMM14,%XMM15 |
(741) 0x7639f VUNPCKHPD %XMM15,%XMM15,%XMM2 |
(741) 0x763a4 VADDPD %XMM15,%XMM2,%XMM1 |
(741) 0x763a9 CMP %EAX,%EBX |
(741) 0x763ab JE 76873 |
(741) 0x763b1 VADDPD %XMM14,%XMM0,%XMM0 |
(741) 0x763b6 MOV %EAX,%ESI |
(741) 0x763b8 MOV %EBX,%EDI |
(741) 0x763ba SUB %ESI,%EDI |
(741) 0x763bc CMP $0x1,%EDI |
(741) 0x763bf JE 763eb |
(741) 0x763c1 MOV -0x38(%RBP),%R15 |
(741) 0x763c5 LEA (%R11,%RSI,1),%R14 |
(741) 0x763c9 ADD %R12,%RSI |
(741) 0x763cc VMOVUPD (%R15,%R14,8),%XMM1 |
(741) 0x763d2 VFMADD231PD (%R8,%RSI,8),%XMM1,%XMM0 |
(741) 0x763d8 VUNPCKHPD %XMM0,%XMM0,%XMM3 |
(741) 0x763dc VADDPD %XMM0,%XMM3,%XMM1 |
(741) 0x763e0 TEST $0x1,%DIL |
(741) 0x763e4 JE 763fb |
(741) 0x763e6 AND $-0x2,%EDI |
(741) 0x763e9 ADD %EDI,%EAX |
(741) 0x763eb CLTQ |
(741) 0x763ed ADD %R8,%RCX |
(741) 0x763f0 VMOVSD (%RDX,%RAX,8),%XMM10 |
(741) 0x763f5 VFMADD231SD (%RCX,%RAX,8),%XMM10,%XMM1 |
(741) 0x763fb MOV -0x50(%RBP),%RSI |
(741) 0x763ff MOV 0x118(%RSI),%RCX |
(741) 0x76406 MOV 0x140(%RSI),%RDI |
(741) 0x7640d IMUL %R10,%RCX |
(741) 0x76411 CMPL $0x2,-0x40(%RBP) |
(741) 0x76415 JBE 768c0 |
(741) 0x7641b LEA (%RCX,%RCX,2),%R14 |
(741) 0x7641f VXORPD %XMM10,%XMM10,%XMM10 |
(741) 0x76424 LEA (%RDX,%R13,1),%R15 |
(741) 0x76428 MOV %RDX,%RSI |
(741) 0x7642b LEA (%RDI,%R14,8),%RAX |
(741) 0x7642f LEA -0x20(%R13),%R14 |
(741) 0x76433 VMOVAPD %YMM10,%YMM12 |
(741) 0x76438 SHR $0x5,%R14 |
(741) 0x7643c VMOVAPD %YMM10,%YMM0 |
(741) 0x76440 INC %R14 |
(741) 0x76443 AND $0x3,%R14D |
(741) 0x76447 JE 76567 |
(741) 0x7644d CMP $0x1,%R14 |
(741) 0x76451 JE 76508 |
(741) 0x76457 CMP $0x2,%R14 |
(741) 0x7645b JE 764b2 |
(741) 0x7645d VMOVUPD (%RAX),%YMM0 |
(741) 0x76461 VMOVUPD 0x20(%RAX),%YMM13 |
(741) 0x76466 LEA 0x20(%RDX),%RSI |
(741) 0x7646a ADD $0x60,%RAX |
(741) 0x7646e VMOVUPD -0x20(%RAX),%YMM15 |
(741) 0x76473 VMOVUPD (%RDX),%YMM14 |
(741) 0x76477 VMOVAPD %YMM0,%YMM12 |
(741) 0x7647b VMOVAPD %YMM0,%YMM2 |
(741) 0x7647f VPERMT2PD %YMM13,%YMM5,%YMM0 |
(741) 0x76485 VPERMT2PD %YMM13,%YMM9,%YMM12 |
(741) 0x7648b VPERMT2PD %YMM13,%YMM7,%YMM2 |
(741) 0x76491 VPERMT2PD %YMM15,%YMM4,%YMM0 |
(741) 0x76497 VPERMT2PD %YMM15,%YMM8,%YMM12 |
(741) 0x7649d VFMADD132PD %YMM14,%YMM10,%YMM0 |
(741) 0x764a2 VPERMT2PD %YMM15,%YMM6,%YMM2 |
(741) 0x764a8 VFMADD132PD %YMM14,%YMM10,%YMM12 |
(741) 0x764ad VFMADD231PD %YMM14,%YMM2,%YMM10 |
(741) 0x764b2 VMOVUPD (%RAX),%YMM2 |
(741) 0x764b6 VMOVUPD 0x20(%RAX),%YMM15 |
(741) 0x764bb ADD $0x20,%RSI |
(741) 0x764bf ADD $0x60,%RAX |
(741) 0x764c3 VMOVUPD -0x20(%RAX),%YMM14 |
(741) 0x764c8 VMOVUPD -0x20(%RSI),%YMM3 |
(741) 0x764cd VMOVAPD %YMM2,%YMM13 |
(741) 0x764d1 VMOVAPD %YMM2,%YMM11 |
(741) 0x764d5 VPERMT2PD %YMM15,%YMM5,%YMM2 |
(741) 0x764db VPERMT2PD %YMM15,%YMM9,%YMM13 |
(741) 0x764e1 VPERMT2PD %YMM15,%YMM7,%YMM11 |
(741) 0x764e7 VPERMT2PD %YMM14,%YMM4,%YMM2 |
(741) 0x764ed VPERMT2PD %YMM14,%YMM8,%YMM13 |
(741) 0x764f3 VFMADD231PD %YMM3,%YMM2,%YMM0 |
(741) 0x764f8 VPERMT2PD %YMM14,%YMM6,%YMM11 |
(741) 0x764fe VFMADD231PD %YMM3,%YMM13,%YMM12 |
(741) 0x76503 VFMADD231PD %YMM3,%YMM11,%YMM10 |
(741) 0x76508 VMOVUPD (%RAX),%YMM2 |
(741) 0x7650c VMOVUPD 0x20(%RAX),%YMM15 |
(741) 0x76511 ADD $0x20,%RSI |
(741) 0x76515 ADD $0x60,%RAX |
(741) 0x76519 VMOVUPD -0x20(%RAX),%YMM14 |
(741) 0x7651e VMOVUPD -0x20(%RSI),%YMM3 |
(741) 0x76523 VMOVAPD %YMM2,%YMM13 |
(741) 0x76527 VMOVAPD %YMM2,%YMM11 |
(741) 0x7652b VPERMT2PD %YMM15,%YMM5,%YMM2 |
(741) 0x76531 VPERMT2PD %YMM15,%YMM9,%YMM13 |
(741) 0x76537 VPERMT2PD %YMM15,%YMM7,%YMM11 |
(741) 0x7653d VPERMT2PD %YMM14,%YMM4,%YMM2 |
(741) 0x76543 VPERMT2PD %YMM14,%YMM8,%YMM13 |
(741) 0x76549 VFMADD231PD %YMM3,%YMM2,%YMM0 |
(741) 0x7654e VPERMT2PD %YMM14,%YMM6,%YMM11 |
(741) 0x76554 VFMADD231PD %YMM3,%YMM13,%YMM12 |
(741) 0x76559 VFMADD231PD %YMM3,%YMM11,%YMM10 |
(741) 0x7655e CMP %RSI,%R15 |
(741) 0x76561 JE 766c9 |
(742) 0x76567 VMOVUPD 0x20(%RAX),%YMM15 |
(742) 0x7656c VMOVUPD (%RAX),%YMM2 |
(742) 0x76570 SUB $-0x80,%RSI |
(742) 0x76574 ADD $0x180,%RAX |
(742) 0x7657a VMOVUPD -0x140(%RAX),%YMM14 |
(742) 0x76582 VMOVUPD -0x80(%RSI),%YMM13 |
(742) 0x76587 VMOVAPD %YMM2,%YMM11 |
(742) 0x7658b VMOVAPD %YMM2,%YMM3 |
(742) 0x7658f VPERMT2PD %YMM15,%YMM5,%YMM2 |
(742) 0x76595 VMOVUPD -0x100(%RAX),%YMM17 |
(742) 0x7659c VPERMT2PD %YMM14,%YMM4,%YMM2 |
(742) 0x765a2 VPERMT2PD %YMM15,%YMM9,%YMM11 |
(742) 0x765a8 VPERMT2PD %YMM15,%YMM7,%YMM3 |
(742) 0x765ae VMOVUPD -0xa0(%RAX),%YMM18 |
(742) 0x765b5 VFMADD231PD %YMM13,%YMM2,%YMM0 |
(742) 0x765ba VPERMT2PD %YMM14,%YMM6,%YMM3 |
(742) 0x765c0 VPERMT2PD %YMM14,%YMM8,%YMM11 |
(742) 0x765c6 VMOVUPD -0xe0(%RAX),%YMM2 |
(742) 0x765ce VFMADD132PD %YMM13,%YMM12,%YMM11 |
(742) 0x765d3 VFMADD231PD %YMM13,%YMM3,%YMM10 |
(742) 0x765d8 VMOVUPD -0x120(%RAX),%YMM3 |
(742) 0x765e0 VMOVUPD -0x60(%RSI),%YMM14 |
(742) 0x765e5 VMOVUPD -0x40(%RAX),%YMM15 |
(742) 0x765ea VMOVAPD %YMM3,%YMM13 |
(742) 0x765ee VMOVAPD %YMM3,%YMM12 |
(742) 0x765f2 VPERMT2PD %YMM17,%YMM5,%YMM3 |
(742) 0x765f8 VPERMT2PD %YMM2,%YMM4,%YMM3 |
(742) 0x765fe VPERMT2PD %YMM17,%YMM9,%YMM13 |
(742) 0x76604 VPERMT2PD %YMM17,%YMM7,%YMM12 |
(742) 0x7660a VFMADD132PD %YMM14,%YMM0,%YMM3 |
(742) 0x7660f VPERMT2PD %YMM2,%YMM8,%YMM13 |
(742) 0x76615 VPERMT2PD %YMM2,%YMM6,%YMM12 |
(742) 0x7661b VMOVUPD -0xc0(%RAX),%YMM2 |
(742) 0x76623 VFMADD132PD %YMM14,%YMM11,%YMM13 |
(742) 0x76628 VFMADD132PD %YMM14,%YMM10,%YMM12 |
(742) 0x7662d VMOVUPD -0x80(%RAX),%YMM0 |
(742) 0x76632 VMOVUPD -0x40(%RSI),%YMM14 |
(742) 0x76637 VMOVAPD %YMM2,%YMM10 |
(742) 0x7663b VMOVAPD %YMM2,%YMM11 |
(742) 0x7663f VPERMT2PD %YMM18,%YMM5,%YMM2 |
(742) 0x76645 VPERMT2PD %YMM18,%YMM7,%YMM10 |
(742) 0x7664b VPERMT2PD %YMM0,%YMM4,%YMM2 |
(742) 0x76651 VPERMT2PD %YMM18,%YMM9,%YMM11 |
(742) 0x76657 VPERMT2PD %YMM0,%YMM6,%YMM10 |
(742) 0x7665d VPERMT2PD %YMM0,%YMM8,%YMM11 |
(742) 0x76663 VMOVUPD -0x60(%RAX),%YMM0 |
(742) 0x76668 VFMADD132PD %YMM14,%YMM3,%YMM2 |
(742) 0x7666d VMOVAPD %YMM10,%YMM3 |
(742) 0x76671 VFMADD132PD %YMM14,%YMM13,%YMM11 |
(742) 0x76676 VFMADD132PD %YMM14,%YMM12,%YMM3 |
(742) 0x7667b VMOVAPD %YMM0,%YMM10 |
(742) 0x7667f VMOVUPD -0x20(%RAX),%YMM14 |
(742) 0x76684 VMOVAPD %YMM0,%YMM12 |
(742) 0x76688 VMOVUPD -0x20(%RSI),%YMM13 |
(742) 0x7668d VPERMT2PD %YMM15,%YMM7,%YMM10 |
(742) 0x76693 VPERMT2PD %YMM15,%YMM5,%YMM0 |
(742) 0x76699 VPERMT2PD %YMM15,%YMM9,%YMM12 |
(742) 0x7669f VPERMT2PD %YMM14,%YMM6,%YMM10 |
(742) 0x766a5 VPERMT2PD %YMM14,%YMM4,%YMM0 |
(742) 0x766ab VPERMT2PD %YMM14,%YMM8,%YMM12 |
(742) 0x766b1 VFMADD132PD %YMM13,%YMM2,%YMM0 |
(742) 0x766b6 VFMADD132PD %YMM13,%YMM11,%YMM12 |
(742) 0x766bb VFMADD132PD %YMM13,%YMM3,%YMM10 |
(742) 0x766c0 CMP %RSI,%R15 |
(742) 0x766c3 JNE 76567 |
(741) 0x766c9 VEXTRACTF64X2 $0x1,%YMM10,%XMM14 |
(741) 0x766d0 VEXTRACTF64X2 $0x1,%YMM12,%XMM15 |
(741) 0x766d7 VEXTRACTF64X2 $0x1,%YMM0,%XMM19 |
(741) 0x766de MOV -0x44(%RBP),%EAX |
(741) 0x766e1 VADDPD %XMM10,%XMM14,%XMM11 |
(741) 0x766e6 VADDPD %XMM12,%XMM15,%XMM3 |
(741) 0x766eb VUNPCKHPD %XMM11,%XMM11,%XMM2 |
(741) 0x766f0 VUNPCKHPD %XMM3,%XMM3,%XMM13 |
(741) 0x766f4 VADDPD %XMM11,%XMM2,%XMM2 |
(741) 0x766f9 VADDPD %XMM3,%XMM13,%XMM11 |
(741) 0x766fd VADDPD %XMM0,%XMM19,%XMM13 |
(741) 0x76703 VUNPCKHPD %XMM13,%XMM13,%XMM3 |
(741) 0x76708 VADDPD %XMM13,%XMM3,%XMM13 |
(741) 0x7670d VUNPCKLPD %XMM11,%XMM13,%XMM13 |
(741) 0x76712 CMP %EAX,%EBX |
(741) 0x76714 JE 767e9 |
(741) 0x7671a VADDPD %XMM15,%XMM12,%XMM11 |
(741) 0x7671f VADDPD %XMM19,%XMM0,%XMM16 |
(741) 0x76725 MOV %EAX,%ESI |
(741) 0x76727 VADDPD %XMM14,%XMM10,%XMM12 |
(741) 0x7672c MOV %EBX,%R15D |
(741) 0x7672f SUB %ESI,%R15D |
(741) 0x76732 MOV %R15D,-0x3c(%RBP) |
(741) 0x76736 CMP $0x1,%R15D |
(741) 0x7673a JE 767c7 |
(741) 0x76740 LEA (%RCX,%RSI,1),%R14 |
(741) 0x76744 ADD %R11,%RSI |
(741) 0x76747 LEA (%R14,%R14,2),%R15 |
(741) 0x7674b MOV -0x38(%RBP),%R14 |
(741) 0x7674f LEA (%RDI,%R15,8),%R15 |
(741) 0x76753 VMOVUPD (%R15),%XMM0 |
(741) 0x76758 VMOVUPD 0x10(%R15),%XMM15 |
(741) 0x7675e VMOVUPD 0x20(%R15),%XMM14 |
(741) 0x76764 VMOVUPD (%R14,%RSI,8),%XMM10 |
(741) 0x7676a VUNPCKLPD %XMM0,%XMM15,%XMM13 |
(741) 0x7676e VPERMILPD $0x1,%XMM0,%XMM2 |
(741) 0x76774 VBLENDPD $0x2,%XMM15,%XMM0,%XMM0 |
(741) 0x7677a MOV -0x3c(%RBP),%R15D |
(741) 0x7677e VUNPCKLPD %XMM14,%XMM2,%XMM3 |
(741) 0x76783 VFMADD132PD %XMM10,%XMM16,%XMM0 |
(741) 0x76789 VBLENDPD $0x2,%XMM14,%XMM13,%XMM14 |
(741) 0x7678f VFMADD132PD %XMM10,%XMM11,%XMM3 |
(741) 0x76794 VFMADD132PD %XMM14,%XMM12,%XMM10 |
(741) 0x76799 MOV %R15D,%ESI |
(741) 0x7679c AND $0x1,%ESI |
(741) 0x7679f VUNPCKHPD %XMM0,%XMM0,%XMM15 |
(741) 0x767a3 VUNPCKHPD %XMM10,%XMM10,%XMM11 |
(741) 0x767a8 VUNPCKHPD %XMM3,%XMM3,%XMM12 |
(741) 0x767ac VADDPD %XMM10,%XMM11,%XMM2 |
(741) 0x767b1 VADDPD %XMM3,%XMM12,%XMM10 |
(741) 0x767b5 VADDPD %XMM0,%XMM15,%XMM3 |
(741) 0x767b9 VUNPCKLPD %XMM10,%XMM3,%XMM13 |
(741) 0x767be JE 767e9 |
(741) 0x767c0 AND $-0x2,%R15D |
(741) 0x767c4 ADD %R15D,%EAX |
(741) 0x767c7 CLTQ |
(741) 0x767c9 ADD %RAX,%RCX |
(741) 0x767cc VMOVDDUP (%RDX,%RAX,8),%XMM14 |
(741) 0x767d1 LEA (%RCX,%RCX,2),%RCX |
(741) 0x767d5 LEA (%RDI,%RCX,8),%RDI |
(741) 0x767d9 VMOVSD 0x10(%RDI),%XMM0 |
(741) 0x767de VFMADD231PD (%RDI),%XMM14,%XMM13 |
(741) 0x767e3 VFMADD231SD (%RDX,%RAX,8),%XMM0,%XMM2 |
(741) 0x767e9 VMULSD %XMM13,%XMM13,%XMM15 |
(741) 0x767ee VUNPCKHPD %XMM13,%XMM13,%XMM11 |
(741) 0x767f3 VMULSD %XMM11,%XMM11,%XMM10 |
(741) 0x767f8 VMULSD %XMM2,%XMM2,%XMM14 |
(741) 0x767fc VADDPD (%R9),%XMM13,%XMM3 |
(741) 0x76801 VADDSD 0x10(%R9),%XMM2,%XMM2 |
(741) 0x76807 ADD $0x18,%R9 |
(741) 0x7680b MOV -0x78(%RBP),%RAX |
(741) 0x7680f VADDSD %XMM10,%XMM15,%XMM13 |
(741) 0x76814 MOV -0x58(%RBP),%R14 |
(741) 0x76818 MOV -0x68(%RBP),%R15 |
(741) 0x7681c MOV -0x70(%RBP),%RSI |
(741) 0x76820 VMOVUPD %XMM3,-0x18(%R9) |
(741) 0x76826 MOV -0x60(%RBP),%RCX |
(741) 0x7682a ADD %R14,%RDX |
(741) 0x7682d VMOVSD %XMM2,-0x8(%R9) |
(741) 0x76833 ADD %R15,%R11 |
(741) 0x76836 ADD %RSI,%R12 |
(741) 0x76839 VADDSD (%RAX,%R10,8),%XMM1,%XMM1 |
(741) 0x7683f VSUBSD %XMM13,%XMM1,%XMM0 |
(741) 0x76844 VSUBSD %XMM14,%XMM0,%XMM11 |
(741) 0x76849 VMOVSD %XMM11,(%RAX,%R10,8) |
(741) 0x7684f INC %R10 |
(741) 0x76852 CMP %RCX,%R10 |
(741) 0x76855 JNE 761f0 |
0x7685b VZEROUPPER |
0x7685e LEA -0x28(%RBP),%RSP |
0x76862 POP %RBX |
0x76863 POP %R12 |
0x76865 POP %R8 |
0x76867 POP %R14 |
0x76869 POP %R15 |
0x7686b POP %RBP |
0x7686c LEA -0x10(%R8),%RSP |
0x76870 POP %R13 |
0x76872 RET |
(741) 0x76873 MOV -0x50(%RBP),%RCX |
(741) 0x76877 MOV 0x140(%RCX),%RDI |
(741) 0x7687e MOV 0x118(%RCX),%RCX |
(741) 0x76885 IMUL %R10,%RCX |
(741) 0x76889 JMP 7641b |
(741) 0x7688e VXORPD %XMM14,%XMM14,%XMM14 |
(741) 0x76893 VXORPD %XMM13,%XMM13,%XMM13 |
(741) 0x76898 VMOVSD %XMM14,%XMM14,%XMM10 |
(741) 0x7689d VMOVSD %XMM14,%XMM14,%XMM15 |
(741) 0x768a2 VMOVSD %XMM14,%XMM14,%XMM1 |
(741) 0x768a6 VMOVSD %XMM14,%XMM14,%XMM2 |
(741) 0x768aa JMP 767fc |
(741) 0x768af VXORPD %XMM0,%XMM0,%XMM0 |
(741) 0x768b3 XOR %ESI,%ESI |
(741) 0x768b5 VXORPD %XMM1,%XMM1,%XMM1 |
(741) 0x768b9 XOR %EAX,%EAX |
(741) 0x768bb JMP 763b8 |
(741) 0x768c0 VXORPD %XMM13,%XMM13,%XMM13 |
(741) 0x768c5 XOR %ESI,%ESI |
(741) 0x768c7 VXORPD %XMM2,%XMM2,%XMM2 |
(741) 0x768cb XOR %EAX,%EAX |
(741) 0x768cd VMOVAPD %XMM13,%XMM12 |
(741) 0x768d2 VMOVAPD %XMM13,%XMM11 |
(741) 0x768d7 VMOVAPD %XMM13,%XMM16 |
(741) 0x768dd JMP 7672c |
0x768e2 LEA -0x28(%RBP),%RSP |
0x768e6 MOV %R12,%RDX |
0x768e9 MOV %R14,%RSI |
0x768ec POP %RBX |
0x768ed POP %R12 |
0x768ef POP %R9 |
0x768f1 POP %R14 |
0x768f3 POP %R15 |
0x768f5 POP %RBP |
0x768f6 LEA -0x10(%R9),%RSP |
0x768fa POP %R13 |
0x768fc JMP 6ad20 |
0x76901 MOV %RDI,%RBX |
0x76904 MOV 0x468(%RDI),%RDI |
0x7690b MOV %RSI,-0x38(%RBP) |
0x7690f CALL 8560 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x76914 MOV 0x470(%RBX),%RDI |
0x7691b SUB $0x8,%RSP |
0x7691f LEA 0x150(%RBX),%R8 |
0x76926 MOV 0x478(%RBX),%EDX |
0x7692c MOV 0x47c(%RBX),%ECX |
0x76932 LEA 0x110(%RBX),%R9 |
0x76939 MOV (%RDI),%RAX |
0x7693c PUSH %R8 |
0x7693e LEA 0x90(%RBX),%R8 |
0x76945 MOV -0x38(%RBP),%RSI |
0x76949 CALLQ 0x28(%RAX) |
0x7694c POP %RAX |
0x7694d MOV 0x468(%RBX),%RDI |
0x76954 POP %RDX |
0x76955 CALL 8460 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x7695a JMP 76115 |
0x7695f NOP |
Path / |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | libqmcwfs.so |
nb instructions | 106 |
nb uops | 112 |
loop length | 422 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 18.67 cycles |
front end | 18.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 2.73 | 14.33 | 14.33 | 11.00 | 2.73 | 3.50 | 11.00 | 11.00 | 11.00 | 2.53 | 14.33 |
cycles | 3.50 | 2.73 | 14.33 | 14.33 | 11.00 | 2.73 | 3.50 | 11.00 | 11.00 | 11.00 | 2.53 | 14.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 18.37-18.38 |
Stall cycles | 0.00 |
Front-end | 18.67 |
Dispatch | 14.33 |
Overall L1 | 18.67 |
all | 22% |
load | 50% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 18% |
load | 29% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x10(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
PUSHQ -0x8(%R13) | 2 | 0 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 5-12 | 0.62 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 76901 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x821> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x484(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 768e2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x802> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD 0x478(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 7685e <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x77e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x480(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R10,%R10,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x100(%RDI),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x158(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
AND $-0x4,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1222c(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R10,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R13D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x12238(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %EAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x1224d(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x1225e(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x11fd6(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %EDI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x11fe8(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x10(%R8),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x10(%R9),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 6ad20 <_ZN11qmcplusplus16DiracDeterminantINS_13DelayedUpdateIddEEE10evaluateGLERNS_11ParticleSetERNS_14ParticleAttribINS_10TinyVectorIdLj3EEESaIS8_EEERNS6_IdSaIdEEEb.part.0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x468(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8560 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x470(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x478(%RBX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
PUSH %R8 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x90(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALLQ 0x28(%RAX) | 3 | 0.70 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.70 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 0 | 2.27 |
POP %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
POP %RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
CALL 8460 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
JMP 76115 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x35> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | DiracDeterminantRef.cpp:152-181 |
Module | libqmcwfs.so |
nb instructions | 106 |
nb uops | 112 |
loop length | 422 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 6 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 18.67 cycles |
front end | 18.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 2.73 | 14.33 | 14.33 | 11.00 | 2.73 | 3.50 | 11.00 | 11.00 | 11.00 | 2.53 | 14.33 |
cycles | 3.50 | 2.73 | 14.33 | 14.33 | 11.00 | 2.73 | 3.50 | 11.00 | 11.00 | 11.00 | 2.53 | 14.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 18.37-18.38 |
Stall cycles | 0.00 |
Front-end | 18.67 |
Dispatch | 14.33 |
Overall L1 | 18.67 |
all | 22% |
load | 50% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 18% |
load | 29% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x10(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
PUSHQ -0x8(%R13) | 2 | 0 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 5-12 | 0.62 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 76901 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x821> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x484(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 768e2 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x802> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EDX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD 0x478(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 7685e <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x77e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x480(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R10,%R10,2),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x100(%RDI),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x158(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R14),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
AND $-0x4,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1222c(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R10,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R13D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x12238(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %EAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA 0x1224d(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x1225e(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x11fd6(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %EDI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x11fe8(%RIP),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x10(%R8),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
LEA -0x10(%R9),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 6ad20 <_ZN11qmcplusplus16DiracDeterminantINS_13DelayedUpdateIddEEE10evaluateGLERNS_11ParticleSetERNS_14ParticleAttribINS_10TinyVectorIdLj3EEESaIS8_EEERNS6_IdSaIdEEEb.part.0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x468(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8560 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x470(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x150(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x478(%RBX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x47c(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x110(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
PUSH %R8 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
LEA 0x90(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALLQ 0x28(%RAX) | 3 | 0.70 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.70 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 0 | 2.27 |
POP %RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
MOV 0x468(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
POP %RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
CALL 8460 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
JMP 76115 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb+0x35> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE10evaluateGLERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEEb– | 0.48 | 0.47 |
▼Loop 741 - DiracDeterminantRef.cpp:173-178 - libqmcwfs.so– | 0 | 0 |
○Loop 742 - OperatorTags.h:63-94 - libqmcwfs.so | 0.3 | 0.25 |
○Loop 743 - inner_product.hpp:82-83 - libqmcwfs.so | 0.18 | 0.15 |