Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.87% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.87% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 203 - 230 |
-------------------------------------------------------------------------------- |
203: ScopedTimer local_timer(timer); |
204: |
205: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
206: for (int i = 0; i < nBlocks; ++i) |
207: MultiBsplineEvalRef::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), |
208: nSplinesPerBlock); |
209: } |
210: |
211: inline void evaluate(const ParticleSet& P, |
[...] |
219: for (int i = 0; i < nBlocks; ++i) |
220: { |
221: // in real simulation, phase needs to be applied. Here just fake computation |
222: const int first = i * nBlocks; |
223: for (int j = first; j < std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize); j++) |
224: { |
225: psi_v[j] = psi[i][j - first]; |
226: dpsi_v[j] = grad[i][j - first]; |
227: d2psi_v[j] = hess[i].data(0)[j - first]; |
228: } |
229: } |
230: } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 231 - 271 |
-------------------------------------------------------------------------------- |
231: inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } |
[...] |
265: inline T* data() { return myData; } |
[...] |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1124 - 1256 |
-------------------------------------------------------------------------------- |
1124: return *(this->_M_impl._M_start + __n); |
[...] |
1256: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 145 - 145 |
-------------------------------------------------------------------------------- |
145: X[i] = base[i * offset]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
0x64060 PUSH %RBP |
0x64061 MOV %RSP,%RBP |
0x64064 PUSH %R15 |
0x64066 PUSH %R14 |
0x64068 PUSH %R13 |
0x6406a MOVSXD %EDX,%R13 |
0x6406d PUSH %R12 |
0x6406f MOV %RSI,%R12 |
0x64072 PUSH %RBX |
0x64073 MOV %RDI,%RBX |
0x64076 SUB $0x78,%RSP |
0x6407a MOV 0x358(%RDI),%R15 |
0x64081 MOV %RCX,-0x88(%RBP) |
0x64088 MOV %R8,-0x90(%RBP) |
0x6408f MOV %R15,%RDI |
0x64092 MOV %R9,-0x98(%RBP) |
0x64099 CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x6409e LEA 0x48(%RBX),%RSI |
0x640a2 LEA 0x128(%R12),%RDX |
0x640aa CMP 0x124(%R12),%R13D |
0x640b2 JE 640c2 |
0x640b4 MOV 0x40(%R12),%RAX |
0x640b9 LEA (%R13,%R13,2),%RDX |
0x640be LEA (%RAX,%RDX,8),%RDX |
0x640c2 LEA -0x50(%RBP),%RDI |
0x640c6 CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> |
0x640cb MOV 0x30(%RBX),%R9D |
0x640cf TEST %R9D,%R9D |
0x640d2 JLE 641d3 |
0x640d8 MOV -0x40(%RBP),%R14 |
0x640dc MOV -0x48(%RBP),%RCX |
0x640e0 XOR %R12D,%R12D |
0x640e3 VMOVSD -0x50(%RBP),%XMM5 |
0x640e8 VMOVQ %R14,%XMM2 |
0x640ed VMOVQ %RCX,%XMM1 |
0x640f2 VMOVSD %XMM5,-0x58(%RBP) |
(683) 0x640f7 MOV 0x310(%RBX),%R11 |
(683) 0x640fe LEA (%R12,%R12,2),%R10 |
(683) 0x64102 LEA (%R12,%R12,4),%RSI |
(683) 0x64106 MOV 0x340(%RBX),%RDI |
(683) 0x6410d MOV 0x328(%RBX),%R8 |
(683) 0x64114 MOV 0x2f8(%RBX),%RAX |
(683) 0x6411b SAL $0x3,%RSI |
(683) 0x6411f VMOVSD %XMM2,-0x68(%RBP) |
(683) 0x64124 LEA (%R11,%R10,8),%R13 |
(683) 0x64128 MOV 0x18(%RDI,%RSI,1),%RCX |
(683) 0x6412d VMOVSD -0x58(%RBP),%XMM0 |
(683) 0x64132 VMOVSD %XMM1,-0x60(%RBP) |
(683) 0x64137 MOV 0x18(%R8,%RSI,1),%RDX |
(683) 0x6413c MOV (%RAX,%R12,8),%RDI |
(683) 0x64140 LEA 0x1(%R12),%R14 |
(683) 0x64145 MOV (%R13),%RSI |
(683) 0x64149 MOVSXD 0x40(%RBX),%R8 |
(683) 0x6414d CALL 637b0 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(683) 0x64152 CMP %R14D,0x30(%RBX) |
(683) 0x64156 VMOVSD -0x60(%RBP),%XMM1 |
(683) 0x6415b VMOVSD -0x68(%RBP),%XMM2 |
(683) 0x64160 JLE 641d3 |
(683) 0x64162 MOV 0x328(%RBX),%RSI |
(683) 0x64169 LEA (%R14,%R14,4),%RDX |
(683) 0x6416d MOV 0x310(%RBX),%RDI |
(683) 0x64174 LEA (%R14,%R14,2),%R8 |
(683) 0x64178 SAL $0x3,%RDX |
(683) 0x6417c MOV 0x340(%RBX),%R9 |
(683) 0x64183 VMOVSD -0x58(%RBP),%XMM0 |
(683) 0x64188 VMOVSD %XMM2,-0x68(%RBP) |
(683) 0x6418d MOV 0x2f8(%RBX),%R13 |
(683) 0x64194 LEA (%RDI,%R8,8),%R11 |
(683) 0x64198 MOV 0x18(%RSI,%RDX,1),%R10 |
(683) 0x6419d VMOVSD %XMM1,-0x60(%RBP) |
(683) 0x641a2 MOV 0x18(%R9,%RDX,1),%RCX |
(683) 0x641a7 MOV (%R11),%RSI |
(683) 0x641aa ADD $0x2,%R12 |
(683) 0x641ae MOV (%R13,%R14,8),%RDI |
(683) 0x641b3 MOVSXD 0x40(%RBX),%R8 |
(683) 0x641b7 MOV %R10,%RDX |
(683) 0x641ba CALL 637b0 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(683) 0x641bf CMP %R12D,0x30(%RBX) |
(683) 0x641c3 VMOVSD -0x60(%RBP),%XMM1 |
(683) 0x641c8 VMOVSD -0x68(%RBP),%XMM2 |
(683) 0x641cd JG 640f7 |
0x641d3 MOV %R15,%RDI |
0x641d6 CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x641db MOV 0x30(%RBX),%R15D |
0x641df MOV %R15D,-0x6c(%RBP) |
0x641e3 TEST %R15D,%R15D |
0x641e6 JLE 644a3 |
0x641ec MOVSXD -0x6c(%RBP),%R12 |
0x641f0 VMOVD 0x40(%RBX),%XMM2 |
0x641f5 XOR %R15D,%R15D |
0x641f8 XOR %R14D,%R14D |
0x641fb MOVL $0,-0x58(%RBP) |
0x64202 VMOVD 0x8(%RBX),%XMM3 |
0x64207 VMOVDQA %XMM2,%XMM1 |
0x6420b LEA (%R12,%R12,2),%RAX |
0x6420f LEA (,%R12,8),%RCX |
0x64217 MOVQ $0,-0x60(%RBP) |
0x6421f SAL $0x3,%RAX |
0x64223 MOV %RCX,-0x78(%RBP) |
0x64227 MOV %RAX,-0x80(%RBP) |
0x6422b MOVQ $0,-0x68(%RBP) |
0x64233 NOPL (%RAX,%RAX,1) |
(681) 0x64238 VPMINSD %XMM3,%XMM1,%XMM0 |
(681) 0x6423d VMOVD %XMM0,%EDI |
(681) 0x64241 CMP %EDI,-0x58(%RBP) |
(681) 0x64244 JGE 64478 |
(681) 0x6424a MOV 0x310(%RBX),%RDX |
(681) 0x64251 MOV -0x90(%RBP),%R8 |
(681) 0x64258 XOR %EAX,%EAX |
(681) 0x6425a MOV -0x58(%RBP),%R11D |
(681) 0x6425e MOV -0x88(%RBP),%R13 |
(681) 0x64265 MOV 0x328(%RBX),%R9 |
(681) 0x6426c MOV (%RDX,%R14,1),%R12 |
(681) 0x64270 SUB %R11D,%EDI |
(681) 0x64273 MOV -0x60(%RBP),%RDX |
(681) 0x64277 MOV 0x18(%R13),%R11 |
(681) 0x6427b ADD 0x18(%R8),%RDX |
(681) 0x6427f MOV -0x68(%RBP),%R8 |
(681) 0x64283 ADD %R15,%R9 |
(681) 0x64286 SAL $0x3,%RDI |
(681) 0x6428a MOV -0x98(%RBP),%R13 |
(681) 0x64291 MOV 0x18(%R9),%RSI |
(681) 0x64295 MOVSXD 0x8(%R9),%RCX |
(681) 0x64299 MOV 0x340(%RBX),%R10 |
(681) 0x642a0 ADD %R8,%R11 |
(681) 0x642a3 ADD 0x18(%R13),%R8 |
(681) 0x642a7 LEA -0x8(%RDI),%R13 |
(681) 0x642ab SHR $0x3,%R13 |
(681) 0x642af MOV 0x18(%R10,%R15,1),%R9 |
(681) 0x642b4 LEA (%RSI,%RCX,8),%R10 |
(681) 0x642b8 SAL $0x4,%RCX |
(681) 0x642bc INC %R13 |
(681) 0x642bf ADD %RSI,%RCX |
(681) 0x642c2 AND $0x3,%R13D |
(681) 0x642c6 JE 6438a |
(681) 0x642cc CMP $0x1,%R13 |
(681) 0x642d0 JE 64347 |
(681) 0x642d2 CMP $0x2,%R13 |
(681) 0x642d6 JE 6430d |
(681) 0x642d8 VMOVSD (%R12),%XMM4 |
(681) 0x642de ADD $0x18,%RDX |
(681) 0x642e2 MOV $0x8,%EAX |
(681) 0x642e7 VMOVSD %XMM4,(%R11) |
(681) 0x642ec VMOVSD (%RSI),%XMM7 |
(681) 0x642f0 VMOVSD (%RCX),%XMM6 |
(681) 0x642f4 VMOVHPD (%R10),%XMM7,%XMM8 |
(681) 0x642f9 VMOVSD %XMM6,-0x8(%RDX) |
(681) 0x642fe VMOVUPD %XMM8,-0x18(%RDX) |
(681) 0x64303 VMOVSD (%R9),%XMM9 |
(681) 0x64308 VMOVSD %XMM9,(%R8) |
(681) 0x6430d VMOVSD (%R12,%RAX,1),%XMM10 |
(681) 0x64313 ADD $0x18,%RDX |
(681) 0x64317 VMOVSD %XMM10,(%R11,%RAX,1) |
(681) 0x6431d VMOVSD (%RSI,%RAX,1),%XMM12 |
(681) 0x64322 VMOVSD (%RCX,%RAX,1),%XMM11 |
(681) 0x64327 VMOVHPD (%R10,%RAX,1),%XMM12,%XMM13 |
(681) 0x6432d VMOVSD %XMM11,-0x8(%RDX) |
(681) 0x64332 VMOVUPD %XMM13,-0x18(%RDX) |
(681) 0x64337 VMOVSD (%R9,%RAX,1),%XMM14 |
(681) 0x6433d VMOVSD %XMM14,(%R8,%RAX,1) |
(681) 0x64343 ADD $0x8,%RAX |
(681) 0x64347 VMOVSD (%R12,%RAX,1),%XMM15 |
(681) 0x6434d ADD $0x18,%RDX |
(681) 0x64351 VMOVSD %XMM15,(%R11,%RAX,1) |
(681) 0x64357 VMOVSD (%RSI,%RAX,1),%XMM0 |
(681) 0x6435c VMOVSD (%RCX,%RAX,1),%XMM5 |
(681) 0x64361 VMOVHPD (%R10,%RAX,1),%XMM0,%XMM4 |
(681) 0x64367 VMOVSD %XMM5,-0x8(%RDX) |
(681) 0x6436c VMOVUPD %XMM4,-0x18(%RDX) |
(681) 0x64371 VMOVSD (%R9,%RAX,1),%XMM6 |
(681) 0x64377 VMOVSD %XMM6,(%R8,%RAX,1) |
(681) 0x6437d ADD $0x8,%RAX |
(681) 0x64381 CMP %RAX,%RDI |
(681) 0x64384 JE 64478 |
(682) 0x6438a VMOVSD (%R12,%RAX,1),%XMM7 |
(682) 0x64390 ADD $0x60,%RDX |
(682) 0x64394 VMOVSD %XMM7,(%R11,%RAX,1) |
(682) 0x6439a VMOVSD (%RSI,%RAX,1),%XMM9 |
(682) 0x6439f VMOVSD (%RCX,%RAX,1),%XMM8 |
(682) 0x643a4 VMOVHPD (%R10,%RAX,1),%XMM9,%XMM10 |
(682) 0x643aa VMOVSD %XMM8,-0x50(%RDX) |
(682) 0x643af VMOVUPD %XMM10,-0x60(%RDX) |
(682) 0x643b4 VMOVSD (%R9,%RAX,1),%XMM11 |
(682) 0x643ba VMOVSD %XMM11,(%R8,%RAX,1) |
(682) 0x643c0 VMOVSD 0x8(%R12,%RAX,1),%XMM12 |
(682) 0x643c7 VMOVSD %XMM12,0x8(%RAX,%R11,1) |
(682) 0x643ce VMOVSD 0x8(%RSI,%RAX,1),%XMM14 |
(682) 0x643d4 VMOVSD 0x8(%RCX,%RAX,1),%XMM13 |
(682) 0x643da VMOVHPD 0x8(%R10,%RAX,1),%XMM14,%XMM15 |
(682) 0x643e1 VMOVSD %XMM13,-0x38(%RDX) |
(682) 0x643e6 VMOVUPD %XMM15,-0x48(%RDX) |
(682) 0x643eb VMOVSD 0x8(%R9,%RAX,1),%XMM5 |
(682) 0x643f2 VMOVSD %XMM5,0x8(%RAX,%R8,1) |
(682) 0x643f9 VMOVSD 0x10(%R12,%RAX,1),%XMM0 |
(682) 0x64400 VMOVSD %XMM0,0x10(%RAX,%R11,1) |
(682) 0x64407 VMOVSD 0x10(%RSI,%RAX,1),%XMM6 |
(682) 0x6440d VMOVSD 0x10(%RCX,%RAX,1),%XMM4 |
(682) 0x64413 VMOVHPD 0x10(%R10,%RAX,1),%XMM6,%XMM7 |
(682) 0x6441a VMOVSD %XMM4,-0x20(%RDX) |
(682) 0x6441f VMOVUPD %XMM7,-0x30(%RDX) |
(682) 0x64424 VMOVSD 0x10(%R9,%RAX,1),%XMM8 |
(682) 0x6442b VMOVSD %XMM8,0x10(%RAX,%R8,1) |
(682) 0x64432 VMOVSD 0x18(%R12,%RAX,1),%XMM9 |
(682) 0x64439 VMOVSD %XMM9,0x18(%RAX,%R11,1) |
(682) 0x64440 VMOVSD 0x18(%RSI,%RAX,1),%XMM11 |
(682) 0x64446 VMOVSD 0x18(%RCX,%RAX,1),%XMM10 |
(682) 0x6444c VMOVHPD 0x18(%R10,%RAX,1),%XMM11,%XMM12 |
(682) 0x64453 VMOVUPD %XMM12,-0x18(%RDX) |
(682) 0x64458 VMOVSD %XMM10,-0x8(%RDX) |
(682) 0x6445d VMOVSD 0x18(%R9,%RAX,1),%XMM13 |
(682) 0x64464 ADD $0x20,%RAX |
(682) 0x64468 VMOVSD %XMM13,-0x8(%RAX,%R8,1) |
(682) 0x6446f CMP %RAX,%RDI |
(682) 0x64472 JNE 6438a |
(681) 0x64478 MOV -0x80(%RBP),%RSI |
(681) 0x6447c ADD $0x18,%R14 |
(681) 0x64480 MOV -0x6c(%RBP),%EDI |
(681) 0x64483 VPADDD %XMM2,%XMM1,%XMM1 |
(681) 0x64487 MOV -0x78(%RBP),%R12 |
(681) 0x6448b ADD %EDI,-0x58(%RBP) |
(681) 0x6448e ADD $0x28,%R15 |
(681) 0x64492 ADD %R12,-0x68(%RBP) |
(681) 0x64496 ADD %RSI,-0x60(%RBP) |
(681) 0x6449a CMP %R14,%RSI |
(681) 0x6449d JNE 64238 |
0x644a3 ADD $0x78,%RSP |
0x644a7 POP %RBX |
0x644a8 POP %R12 |
0x644aa POP %R13 |
0x644ac POP %R14 |
0x644ae POP %R15 |
0x644b0 POP %RBP |
0x644b1 RET |
0x644b2 NOPW %CS:(%RAX,%RAX,1) |
0x644bd NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►79.81+ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:100 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:198 | libqmcwfs.so |
○ | main._omp_fn.1 | refwrap.h:346 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►9.45+ | qmcplusplus::SPOSet::evaluate_[...] | OhmmsVector.h:210 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:263 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:238 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►8.84+ | qmcplusplus::SPOSet::evaluate_[...] | OhmmsVector.h:210 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:263 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:238 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | libqmcwfs.so |
○ | main._omp_fn.0 | miniqmc.cpp:397 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►1.53+ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:100 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:198 | libqmcwfs.so |
○ | main._omp_fn.1 | refwrap.h:346 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | libqmcwfs.so |
nb instructions | 67 |
nb uops | 70 |
loop length | 281 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 11.67 cycles |
front end | 11.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
cycles | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.27 |
Stall cycles | 0.00 |
Front-end | 11.67 |
Dispatch | 9.50 |
Overall L1 | 11.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 9% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 640c2 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 641d3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x173> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x50(%RBP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %RCX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 644a3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x443> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x6c(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVL $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD 0x8(%RBX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA (%R12,%R12,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | libqmcwfs.so |
nb instructions | 67 |
nb uops | 70 |
loop length | 281 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 11.67 cycles |
front end | 11.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
cycles | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.27 |
Stall cycles | 0.00 |
Front-end | 11.67 |
Dispatch | 9.50 |
Overall L1 | 11.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 9% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 640c2 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 641d3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x173> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x50(%RBP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %RCX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 644a3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x443> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x6c(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVL $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD 0x8(%RBX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA (%R12,%R12,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 0.87 | 0.75 |
▼Loop 681 - einspline_spo_ref.hpp:219-227 - libqmcwfs.so– | 0.01 | 0.01 |
○Loop 682 - einspline_spo_ref.hpp:223-227 - libqmcwfs.so | 0.86 | 0.72 |
○Loop 683 - einspline_spo_ref.hpp:206-207 - libqmcwfs.so | 0 | 0 |