Function: _ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEER ... | Module: exec | Source: SPOSet.h:81-88 [...] | Coverage: 0.39% |
---|
Function: _ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEER ... | Module: exec | Source: SPOSet.h:81-88 [...] | Coverage: 0.39% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 83 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1046 - 1046 |
-------------------------------------------------------------------------------- |
1046: return *(this->_M_impl._M_start + __n); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/SPOSet.h: 81 - 88 |
-------------------------------------------------------------------------------- |
81: { |
82: assert(psi.size() == psiinv.size()); |
83: for (int iat = 0; iat < VP.getTotalNum(); ++iat) |
84: { |
85: evaluate(VP, iat, psi); |
86: ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); |
87: } |
88: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 178 - 249 |
-------------------------------------------------------------------------------- |
178: inline size_t size() const { return nLocal; } |
[...] |
248: inline pointer data() { return X; } |
249: inline const_pointer data() const { return X; } |
0x43da10 PUSH %RBP |
0x43da11 MOV %RSP,%RBP |
0x43da14 PUSH %R15 |
0x43da16 PUSH %R14 |
0x43da18 PUSH %R13 |
0x43da1a PUSH %R12 |
0x43da1c PUSH %RBX |
0x43da1d PUSH %RAX |
0x43da1e MOV %RCX,-0x30(%RBP) |
0x43da22 CMPQ $0,0x260(%RSI) |
0x43da2a JE 43db09 |
0x43da30 MOV %R8,%RBX |
0x43da33 MOV %RDX,%R15 |
0x43da36 MOV %RSI,%R12 |
0x43da39 MOV %RDI,%R13 |
0x43da3c XOR %R14D,%R14D |
0x43da3f JMP 43da6e |
0x43da41 NOPW %CS:(%RAX,%RAX,1) |
(895) 0x43da50 VXORPD %XMM0,%XMM0,%XMM0 |
(895) 0x43da54 MOV (%RBX),%RAX |
(895) 0x43da57 VMOVSD %XMM0,(%RAX,%R14,8) |
(895) 0x43da5d INC %R14 |
(895) 0x43da60 CMP %R14,0x260(%R12) |
(895) 0x43da68 JBE 43db09 |
(895) 0x43da6e MOV (%R13),%RAX |
(895) 0x43da72 MOV %R13,%RDI |
(895) 0x43da75 MOV %R12,%RSI |
(895) 0x43da78 MOV %R14D,%EDX |
(895) 0x43da7b MOV %R15,%RCX |
(895) 0x43da7e VZEROUPPER |
(895) 0x43da81 CALLQ 0x10(%RAX) |
(895) 0x43da84 MOV 0x8(%R15),%RDX |
(895) 0x43da88 TEST %EDX,%EDX |
(895) 0x43da8a JLE 43da50 |
(895) 0x43da8c MOV 0x18(%R15),%RAX |
(895) 0x43da90 MOV -0x30(%RBP),%RCX |
(895) 0x43da94 MOV 0x18(%RCX),%RCX |
(895) 0x43da98 MOVSXD %EDX,%RDX |
(895) 0x43da9b MOV %RDX,%RSI |
(895) 0x43da9e AND $-0x4,%RSI |
(895) 0x43daa2 JE 43dae0 |
(895) 0x43daa4 VXORPD %XMM0,%XMM0,%XMM0 |
(895) 0x43daa8 XOR %EDI,%EDI |
(895) 0x43daaa NOPW (%RAX,%RAX,1) |
(897) 0x43dab0 VMOVUPD (%RCX,%RDI,8),%YMM1 |
(897) 0x43dab5 VFMADD231PD (%RAX,%RDI,8),%YMM1,%YMM0 |
(897) 0x43dabb ADD $0x4,%RDI |
(897) 0x43dabf CMP %RSI,%RDI |
(897) 0x43dac2 JL 43dab0 |
(895) 0x43dac4 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
(895) 0x43daca VADDPD %XMM1,%XMM0,%XMM0 |
(895) 0x43dace VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
(895) 0x43dad3 VADDSD %XMM1,%XMM0,%XMM0 |
(895) 0x43dad7 JMP 43daf0 |
0x43dad9 NOPL (%RAX) |
(895) 0x43dae0 VXORPD %XMM0,%XMM0,%XMM0 |
(895) 0x43dae4 XOR %ESI,%ESI |
(895) 0x43dae6 JMP 43daf9 |
0x43dae8 NOPL (%RAX,%RAX,1) |
(896) 0x43daf0 CMP %RSI,%RDX |
(896) 0x43daf3 JE 43da54 |
(896) 0x43daf9 VMOVSD (%RCX,%RSI,8),%XMM1 |
(896) 0x43dafe VFMADD231SD (%RAX,%RSI,8),%XMM1,%XMM0 |
(896) 0x43db04 INC %RSI |
(896) 0x43db07 JMP 43daf0 |
0x43db09 ADD $0x8,%RSP |
0x43db0d POP %RBX |
0x43db0e POP %R12 |
0x43db10 POP %R13 |
0x43db12 POP %R14 |
0x43db14 POP %R15 |
0x43db16 POP %RBP |
0x43db17 VZEROUPPER |
0x43db1a RET |
0x43db1b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | SPOSet.h:81-88 |
Module | exec |
nb instructions | 30 |
nb uops | 31 |
loop length | 102 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 5.17 cycles |
front end | 5.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.00 | 0.40 | 1.00 | 4.00 | 4.00 | 4.00 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.00 | 0.40 | 1.00 | 4.00 | 4.00 | 4.00 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.10-5.11 |
Stall cycles | 0.00 |
Front-end | 5.17 |
Dispatch | 4.00 |
Overall L1 | 5.17 |
all | 12% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMPQ $0,0x260(%RSI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 43db09 <_ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEERKS6_RSt6vectorIdS5_E+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43da6e <_ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEERKS6_RSt6vectorIdS5_E+0x5e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | SPOSet.h:81-88 |
Module | exec |
nb instructions | 30 |
nb uops | 31 |
loop length | 102 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 5.17 cycles |
front end | 5.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.00 | 0.40 | 1.00 | 4.00 | 4.00 | 4.00 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.00 | 0.40 | 1.00 | 4.00 | 4.00 | 4.00 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.10-5.11 |
Stall cycles | 0.00 |
Front-end | 5.17 |
Dispatch | 4.00 |
Overall L1 | 5.17 |
all | 12% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMPQ $0,0x260(%RSI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 43db09 <_ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEERKS6_RSt6vectorIdS5_E+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43da6e <_ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEERKS6_RSt6vectorIdS5_E+0x5e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN11qmcplusplus6SPOSet17evaluateDetRatiosERKNS_18VirtualParticleSetERNS_6VectorIdSaIdEEERKS6_RSt6vectorIdS5_E– | 0.39 | 0.28 |
▼Loop 895 - inner_product.hpp:82-83 - exec– | 0.01 | 0 |
○Loop 897 - inner_product.hpp:82-83 - exec | 0.39 | 0.27 |
○Loop 896 - inner_product.hpp:82-83 - exec | 0 | 0 |