Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 28.47% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 28.47% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algobase.h: 200 - 696 |
-------------------------------------------------------------------------------- |
200: if (__b < __a) |
[...] |
366: const ptrdiff_t _Num = __last - __first; |
367: if (_Num) |
368: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
695: for (; __first != __last; ++__first) |
696: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 933 - 1056 |
-------------------------------------------------------------------------------- |
933: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algo.h: 782 - 782 |
-------------------------------------------------------------------------------- |
782: { return std::copy(__first, __first + __n, __result); } |
0x43b460 PUSH %RBP |
0x43b461 MOV %RSP,%RBP |
0x43b464 PUSH %R15 |
0x43b466 PUSH %R14 |
0x43b468 PUSH %R13 |
0x43b46a PUSH %R12 |
0x43b46c PUSH %RBX |
0x43b46d SUB $0x158,%RSP |
0x43b474 MOV %RCX,-0xa8(%RBP) |
0x43b47b MOV %EDX,%R12D |
0x43b47e MOV %RSI,%R13 |
0x43b481 MOV %RDI,%RBX |
0x43b484 MOV 0x358(%RDI),%RDI |
0x43b48b MOV %RDI,-0xb0(%RBP) |
0x43b492 CALL 48e0f0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x43b497 MOVSXD %R12D,%RCX |
0x43b49a LEA (%RCX,%RCX,2),%RDX |
0x43b49e SAL $0x3,%RDX |
0x43b4a2 ADD 0x40(%R13),%RDX |
0x43b4a6 LEA 0x128(%R13),%RAX |
0x43b4ad CMP %ECX,0x124(%R13) |
0x43b4b4 CMOVNE %RDX,%RAX |
0x43b4b8 MOV %RBX,-0x68(%RBP) |
0x43b4bc MOV 0x30(%RBX),%ECX |
0x43b4bf MOV %RCX,-0x70(%RBP) |
0x43b4c3 TEST %ECX,%ECX |
0x43b4c5 JLE 43bb1b |
0x43b4cb MOV -0x68(%RBP),%RCX |
0x43b4cf VMOVSD 0xf0(%RCX),%XMM0 |
0x43b4d7 VMOVUPD 0xe0(%RCX),%XMM1 |
0x43b4df VMOVUPD (%RAX),%XMM2 |
0x43b4e3 VMULSD 0xd8(%RCX),%XMM2,%XMM3 |
0x43b4eb VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 |
0x43b4f1 VMOVDDUP 0x10(%RAX),%XMM0 |
0x43b4f6 VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 |
0x43b4ff VMOVSD 0xf8(%RCX),%XMM4 |
0x43b507 VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 |
0x43b50c VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 |
0x43b514 VMULPD %XMM5,%XMM4,%XMM4 |
0x43b518 VMOVHPD 0x100(%RCX),%XMM1,%XMM1 |
0x43b520 VFMADD213PD %XMM4,%XMM2,%XMM1 |
0x43b525 VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 |
0x43b52e VMOVSD 0xc2a7a(%RIP),%XMM0 |
0x43b536 VCMPPD $0x1,%XMM3,%XMM0,%K1 |
0x43b53d VXORPD %XMM2,%XMM2,%XMM2 |
0x43b541 VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} |
0x43b548 VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 |
0x43b54e VSUBSD %XMM4,%XMM3,%XMM3 |
0x43b552 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x43b558 VMOVUPD %XMM3,-0x140(%RBP) |
0x43b560 VXORPD %XMM3,%XMM3,%XMM3 |
0x43b564 VCMPPD $0x1,%XMM3,%XMM1,%K1 |
0x43b56b VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 |
0x43b571 VSUBSD %XMM3,%XMM1,%XMM4 |
0x43b575 VMOVAPD %XMM4,%XMM3 |
0x43b579 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x43b57f KMOVD %K1,%EAX |
0x43b583 AND $0x2,%AL |
0x43b585 SHR $0x1,%AL |
0x43b587 VCMPSD $0x1,%XMM1,%XMM0,%K1 |
0x43b58e VMOVSD %XMM3,%XMM4,%XMM4{%K1} |
0x43b594 VMOVUPD %XMM4,-0x130(%RBP) |
0x43b59c VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 |
0x43b5a1 VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 |
0x43b5a7 VSUBSD %XMM3,%XMM1,%XMM4 |
0x43b5ab KMOVD %EAX,%K1 |
0x43b5af VMOVAPD %XMM4,%XMM3 |
0x43b5b3 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x43b5b9 VCMPSD $0x1,%XMM1,%XMM0,%K1 |
0x43b5c0 VMOVSD %XMM3,%XMM4,%XMM4{%K1} |
0x43b5c6 VMOVUPD %XMM4,-0x120(%RBP) |
0x43b5ce MOV 0x2f8(%RCX),%RAX |
0x43b5d5 MOV %RAX,-0xc8(%RBP) |
0x43b5dc MOV 0x310(%RCX),%RAX |
0x43b5e3 MOV %RAX,-0xc0(%RBP) |
0x43b5ea MOVSXD 0x40(%RCX),%RBX |
0x43b5ee LEA (,%RBX,8),%RAX |
0x43b5f6 MOV %RAX,-0xb8(%RBP) |
0x43b5fd CMP $0x1,%RBX |
0x43b601 MOV %RBX,%R12 |
0x43b604 ADC $0,%R12 |
0x43b608 DECQ -0x70(%RBP) |
0x43b60c MOV %R12,%RAX |
0x43b60f SHR $0x1,%RAX |
0x43b612 MOV %RAX,-0xf0(%RBP) |
0x43b619 MOV %R12,-0xf8(%RBP) |
0x43b620 AND $-0x2,%R12 |
0x43b624 XOR %ECX,%ECX |
0x43b626 VMOVSD 0xc298a(%RIP),%XMM10 |
0x43b62e VMOVSD 0xbfbe2(%RIP),%XMM11 |
0x43b636 VMOVDDUP 0xbfbda(%RIP),%XMM0 |
0x43b63e VMOVUPD %XMM0,-0x110(%RBP) |
0x43b646 JMP 43b668 |
0x43b648 NOPL (%RAX,%RAX,1) |
(870) 0x43b650 MOV -0xe8(%RBP),%RCX |
(870) 0x43b657 LEA 0x1(%RCX),%RAX |
(870) 0x43b65b CMP -0x70(%RBP),%RCX |
(870) 0x43b65f MOV %RAX,%RCX |
(870) 0x43b662 JE 43bb1b |
(870) 0x43b668 MOV -0xc8(%RBP),%RAX |
(870) 0x43b66f MOV (%RAX,%RCX,8),%R14 |
(870) 0x43b673 MOV %RCX,-0xe8(%RBP) |
(870) 0x43b67a LEA (%RCX,%RCX,2),%RAX |
(870) 0x43b67e VMOVUPD -0x140(%RBP),%XMM0 |
(870) 0x43b686 VSUBSD 0x28(%R14),%XMM0,%XMM0 |
(870) 0x43b68c VMOVUPD -0x130(%RBP),%XMM1 |
(870) 0x43b694 VSUBSD 0x50(%R14),%XMM1,%XMM1 |
(870) 0x43b69a MOV -0xc0(%RBP),%RCX |
(870) 0x43b6a1 MOV (%RCX,%RAX,8),%R13 |
(870) 0x43b6a5 VMOVUPD -0x120(%RBP),%XMM2 |
(870) 0x43b6ad VSUBSD 0x78(%R14),%XMM2,%XMM2 |
(870) 0x43b6b3 VMULSD 0x48(%R14),%XMM0,%XMM0 |
(870) 0x43b6b9 MOVSXD 0x38(%R14),%R15 |
(870) 0x43b6bd VROUNDSD $0x9,%XMM0,%XMM0,%XMM14 |
(870) 0x43b6c3 VSUBSD %XMM14,%XMM0,%XMM0 |
(870) 0x43b6c8 VMULSD 0x70(%R14),%XMM1,%XMM1 |
(870) 0x43b6ce MOVSXD 0x60(%R14),%R8 |
(870) 0x43b6d2 VROUNDSD $0x9,%XMM1,%XMM1,%XMM15 |
(870) 0x43b6d8 VMULSD 0x98(%R14),%XMM2,%XMM2 |
(870) 0x43b6e1 VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM16 |
(870) 0x43b6e8 MOVSXD 0x88(%R14),%R11 |
(870) 0x43b6ef VMULSD %XMM0,%XMM10,%XMM3 |
(870) 0x43b6f3 VSUBSD %XMM3,%XMM11,%XMM4 |
(870) 0x43b6f7 VMULSD %XMM0,%XMM0,%XMM5 |
(870) 0x43b6fb VMOVDDUP %XMM0,%XMM6 |
(870) 0x43b6ff VMOVUPD -0x110(%RBP),%XMM8 |
(870) 0x43b707 VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 |
(870) 0x43b70d VMULPD %XMM4,%XMM6,%XMM4 |
(870) 0x43b711 VMOVUPD 0xc28f7(%RIP),%XMM9 |
(870) 0x43b719 VADDPD %XMM4,%XMM9,%XMM7 |
(870) 0x43b71d VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 |
(870) 0x43b721 VMOVUPD 0xc28f7(%RIP),%XMM12 |
(870) 0x43b729 VFMADD213PD %XMM12,%XMM7,%XMM6 |
(870) 0x43b72e VMOVUPD %XMM6,-0x180(%RBP) |
(870) 0x43b736 VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 |
(870) 0x43b73b VSUBSD %XMM4,%XMM11,%XMM4 |
(870) 0x43b73f VFMADD213SD %XMM11,%XMM0,%XMM4 |
(870) 0x43b744 VFMADD213SD %XMM10,%XMM0,%XMM4 |
(870) 0x43b749 VMOVSD %XMM4,-0x170(%RBP) |
(870) 0x43b751 VSUBSD %XMM15,%XMM1,%XMM0 |
(870) 0x43b756 VMULSD %XMM5,%XMM3,%XMM1 |
(870) 0x43b75a VMOVSD %XMM1,-0x168(%RBP) |
(870) 0x43b762 VMULSD %XMM0,%XMM10,%XMM1 |
(870) 0x43b766 VSUBSD %XMM1,%XMM11,%XMM3 |
(870) 0x43b76a VMULSD %XMM0,%XMM0,%XMM4 |
(870) 0x43b76e VMOVDDUP %XMM0,%XMM5 |
(870) 0x43b772 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(870) 0x43b778 VMULPD %XMM3,%XMM5,%XMM3 |
(870) 0x43b77c VADDPD %XMM3,%XMM9,%XMM6 |
(870) 0x43b780 VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 |
(870) 0x43b784 VFMADD213PD %XMM12,%XMM6,%XMM5 |
(870) 0x43b789 VMOVUPD %XMM5,-0x160(%RBP) |
(870) 0x43b791 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(870) 0x43b796 VSUBSD %XMM3,%XMM11,%XMM3 |
(870) 0x43b79a VFMADD213SD %XMM11,%XMM0,%XMM3 |
(870) 0x43b79f VFMADD213SD %XMM10,%XMM0,%XMM3 |
(870) 0x43b7a4 VMOVSD %XMM3,-0x150(%RBP) |
(870) 0x43b7ac VMULSD %XMM4,%XMM1,%XMM0 |
(870) 0x43b7b0 VMOVSD %XMM0,-0x148(%RBP) |
(870) 0x43b7b8 VSUBSD %XMM16,%XMM2,%XMM2 |
(870) 0x43b7be VMULSD %XMM2,%XMM10,%XMM1 |
(870) 0x43b7c2 VSUBSD %XMM1,%XMM11,%XMM12 |
(870) 0x43b7c6 VFMADD213SD 0xbfa61(%RIP),%XMM2,%XMM12 |
(870) 0x43b7cf VMULSD %XMM2,%XMM11,%XMM0 |
(870) 0x43b7d3 VADDSD 0xc27e5(%RIP),%XMM0,%XMM13 |
(870) 0x43b7db VMULSD %XMM2,%XMM2,%XMM3 |
(870) 0x43b7df VFMADD213SD 0xc27e0(%RIP),%XMM3,%XMM13 |
(870) 0x43b7e8 MOV 0x10(%R14),%RAX |
(870) 0x43b7ec MOV %RAX,-0x78(%RBP) |
(870) 0x43b7f0 MOV 0x18(%R14),%RAX |
(870) 0x43b7f4 MOV %RAX,-0x80(%RBP) |
(870) 0x43b7f8 MOV 0x20(%R14),%R10 |
(870) 0x43b7fc MOV $0x1fffffffffffffff,%RAX |
(870) 0x43b806 TEST %RAX,%RBX |
(870) 0x43b809 JE 43b8c4 |
(870) 0x43b80f MOV %R13,%RDI |
(870) 0x43b812 XOR %ESI,%ESI |
(870) 0x43b814 MOV -0xb8(%RBP),%RDX |
(870) 0x43b81b VMOVUPD %XMM12,-0xa0(%RBP) |
(870) 0x43b823 VMOVUPD %XMM13,-0x90(%RBP) |
(870) 0x43b82b MOV %R11,-0x60(%RBP) |
(870) 0x43b82f MOV %R8,-0x58(%RBP) |
(870) 0x43b833 MOV %R10,-0x50(%RBP) |
(870) 0x43b837 VMOVSD %XMM14,-0x48(%RBP) |
(870) 0x43b83c VMOVSD %XMM15,-0x40(%RBP) |
(870) 0x43b841 VMOVSD %XMM16,-0x38(%RBP) |
(870) 0x43b848 VMOVSD %XMM1,-0x30(%RBP) |
(870) 0x43b84d VMOVSD %XMM2,-0xe0(%RBP) |
(870) 0x43b855 VMOVSD %XMM3,-0xd8(%RBP) |
(870) 0x43b85d VMOVSD %XMM0,-0xd0(%RBP) |
(870) 0x43b865 CALL 4ec910 <_intel_fast_memset> |
(870) 0x43b86a VMOVSD -0xd0(%RBP),%XMM0 |
(870) 0x43b872 VMOVSD -0xd8(%RBP),%XMM3 |
(870) 0x43b87a VMOVSD -0xe0(%RBP),%XMM2 |
(870) 0x43b882 VMOVSD -0x30(%RBP),%XMM1 |
(870) 0x43b887 VMOVSD -0x38(%RBP),%XMM16 |
(870) 0x43b88e VMOVSD -0x40(%RBP),%XMM15 |
(870) 0x43b893 VMOVSD -0x48(%RBP),%XMM14 |
(870) 0x43b898 MOV -0x50(%RBP),%R10 |
(870) 0x43b89c MOV -0x58(%RBP),%R8 |
(870) 0x43b8a0 MOV -0x60(%RBP),%R11 |
(870) 0x43b8a4 VMOVUPD -0x90(%RBP),%XMM13 |
(870) 0x43b8ac VMOVUPD -0xa0(%RBP),%XMM12 |
(870) 0x43b8b4 VMOVSD 0xbf95c(%RIP),%XMM11 |
(870) 0x43b8bc VMOVSD 0xc26f4(%RIP),%XMM10 |
(870) 0x43b8c4 VCVTTSD2SI %XMM14,%EDX |
(870) 0x43b8c9 VCVTTSD2SI %XMM15,%ESI |
(870) 0x43b8ce VCVTTSD2SI %XMM16,%ECX |
(870) 0x43b8d4 VFMADD213SD %XMM10,%XMM2,%XMM12 |
(870) 0x43b8d9 VSUBSD %XMM0,%XMM11,%XMM0 |
(870) 0x43b8dd VFMADD213SD %XMM11,%XMM2,%XMM0 |
(870) 0x43b8e2 VFMADD213SD %XMM10,%XMM2,%XMM0 |
(870) 0x43b8e7 MOV 0x8(%R14),%RAX |
(870) 0x43b8eb VMULSD %XMM3,%XMM1,%XMM1 |
(870) 0x43b8ef DEC %R11 |
(870) 0x43b8f2 MOV %ECX,%EDI |
(870) 0x43b8f4 SAR $0x1f,%EDI |
(870) 0x43b8f7 ANDN %ECX,%EDI,%ECX |
(870) 0x43b8fc CMP %RCX,%R11 |
(870) 0x43b8ff CMOVGE %RCX,%R11 |
(870) 0x43b903 MOV %R10,%RCX |
(870) 0x43b906 IMUL %R11,%RCX |
(870) 0x43b90a DEC %R8 |
(870) 0x43b90d MOV %ESI,%EDI |
(870) 0x43b90f SAR $0x1f,%EDI |
(870) 0x43b912 ANDN %ESI,%EDI,%ESI |
(870) 0x43b917 CMP %RSI,%R8 |
(870) 0x43b91a CMOVGE %RSI,%R8 |
(870) 0x43b91e MOV -0x80(%RBP),%RDI |
(870) 0x43b922 IMUL %RDI,%R8 |
(870) 0x43b926 DEC %R15 |
(870) 0x43b929 MOV %EDX,%ESI |
(870) 0x43b92b SAR $0x1f,%ESI |
(870) 0x43b92e ANDN %EDX,%ESI,%EDX |
(870) 0x43b933 CMP %RDX,%R15 |
(870) 0x43b936 CMOVGE %RDX,%R15 |
(870) 0x43b93a MOV -0x78(%RBP),%RSI |
(870) 0x43b93e IMUL %RSI,%R15 |
(870) 0x43b942 ADD %R8,%R15 |
(870) 0x43b945 MOV %R15,%R8 |
(870) 0x43b948 LEA (%R10,%R10,2),%R15 |
(870) 0x43b94c VMOVDDUP %XMM13,%XMM2 |
(870) 0x43b951 VMOVDDUP %XMM12,%XMM3 |
(870) 0x43b956 VMOVDDUP %XMM0,%XMM4 |
(870) 0x43b95a VMOVDDUP %XMM1,%XMM5 |
(870) 0x43b95e LEA (%R8,%RCX,1),%RDX |
(870) 0x43b962 LEA (%RDX,%R12,1),%R9 |
(870) 0x43b966 MOV %R9,-0xa0(%RBP) |
(870) 0x43b96d LEA (%R10,%RCX,1),%R9 |
(870) 0x43b971 ADD %R8,%R9 |
(870) 0x43b974 ADD %R12,%R9 |
(870) 0x43b977 MOV %R9,-0x90(%RBP) |
(870) 0x43b97e LEA (%RCX,%R10,2),%R9 |
(870) 0x43b982 ADD %R8,%R9 |
(870) 0x43b985 ADD %R12,%R9 |
(870) 0x43b988 MOV %R9,-0x60(%RBP) |
(870) 0x43b98c ADD %RCX,%R15 |
(870) 0x43b98f ADD %R8,%R15 |
(870) 0x43b992 ADD %R12,%R15 |
(870) 0x43b995 LEA 0x3(%R11),%RCX |
(870) 0x43b999 IMUL %R10,%RCX |
(870) 0x43b99d ADD %R8,%RCX |
(870) 0x43b9a0 LEA (%RAX,%RCX,8),%R9 |
(870) 0x43b9a4 LEA (,%RSI,8),%RCX |
(870) 0x43b9ac MOV %RCX,-0x30(%RBP) |
(870) 0x43b9b0 LEA 0x2(%R11),%RCX |
(870) 0x43b9b4 IMUL %R10,%RCX |
(870) 0x43b9b8 INC %R11 |
(870) 0x43b9bb IMUL %R10,%R11 |
(870) 0x43b9bf LEA (,%RDI,8),%R10 |
(870) 0x43b9c7 ADD %R8,%RCX |
(870) 0x43b9ca LEA (%RAX,%RCX,8),%R14 |
(870) 0x43b9ce ADD %R8,%R11 |
(870) 0x43b9d1 LEA (%RAX,%R11,8),%RCX |
(870) 0x43b9d5 LEA (%RAX,%RDX,8),%R8 |
(870) 0x43b9d9 MOV %R9,%RDX |
(870) 0x43b9dc MOV %RCX,%R9 |
(870) 0x43b9df XOR %ECX,%ECX |
(870) 0x43b9e1 JMP 43ba22 |
0x43b9e3 NOPW %CS:(%RAX,%RAX,1) |
(871) 0x43b9f0 MOV -0x38(%RBP),%RDI |
(871) 0x43b9f4 LEA 0x1(%RDI),%RCX |
(871) 0x43b9f8 MOV -0x58(%RBP),%RDX |
(871) 0x43b9fc MOV -0x30(%RBP),%RSI |
(871) 0x43ba00 ADD %RSI,%RDX |
(871) 0x43ba03 MOV -0x50(%RBP),%R14 |
(871) 0x43ba07 ADD %RSI,%R14 |
(871) 0x43ba0a MOV -0x48(%RBP),%R9 |
(871) 0x43ba0e ADD %RSI,%R9 |
(871) 0x43ba11 MOV -0x40(%RBP),%R8 |
(871) 0x43ba15 ADD %RSI,%R8 |
(871) 0x43ba18 CMP $0x3,%RDI |
(871) 0x43ba1c JE 43b650 |
(871) 0x43ba22 VMOVSD -0x180(%RBP,%RCX,8),%XMM6 |
(871) 0x43ba2b MOV -0x78(%RBP),%R11 |
(871) 0x43ba2f MOV %RCX,-0x38(%RBP) |
(871) 0x43ba33 IMUL %RCX,%R11 |
(871) 0x43ba37 MOV %R8,-0x40(%RBP) |
(871) 0x43ba3b MOV %R9,-0x48(%RBP) |
(871) 0x43ba3f MOV %R14,-0x50(%RBP) |
(871) 0x43ba43 MOV %RDX,-0x58(%RBP) |
(871) 0x43ba47 MOV %RDX,%RCX |
(871) 0x43ba4a XOR %EDX,%EDX |
(871) 0x43ba4c JMP 43ba69 |
0x43ba4e XCHG %AX,%AX |
(872) 0x43ba50 LEA 0x1(%RDX),%RSI |
(872) 0x43ba54 ADD %R10,%RCX |
(872) 0x43ba57 ADD %R10,%R14 |
(872) 0x43ba5a ADD %R10,%R9 |
(872) 0x43ba5d ADD %R10,%R8 |
(872) 0x43ba60 CMP $0x3,%RDX |
(872) 0x43ba64 MOV %RSI,%RDX |
(872) 0x43ba67 JE 43b9f0 |
(872) 0x43ba69 TEST %EBX,%EBX |
(872) 0x43ba6b JE 43ba50 |
(872) 0x43ba6d VMULSD -0x160(%RBP,%RDX,8),%XMM6,%XMM7 |
(872) 0x43ba76 CMP $0x1,%EBX |
(872) 0x43ba79 JE 43babf |
(872) 0x43ba7b VMOVDDUP %XMM7,%XMM8 |
(872) 0x43ba7f MOV -0xf0(%RBP),%RSI |
(872) 0x43ba86 XOR %EDI,%EDI |
(872) 0x43ba88 NOPL (%RAX,%RAX,1) |
(873) 0x43ba90 VMULPD (%R8,%RDI,1),%XMM3,%XMM9 |
(873) 0x43ba96 VFMADD231PD (%R9,%RDI,1),%XMM2,%XMM9 |
(873) 0x43ba9c VFMADD231PD (%R14,%RDI,1),%XMM4,%XMM9 |
(873) 0x43baa2 VFMADD231PD (%RCX,%RDI,1),%XMM5,%XMM9 |
(873) 0x43baa8 VFMADD213PD (%R13,%RDI,1),%XMM8,%XMM9 |
(873) 0x43baaf VMOVUPD %XMM9,(%R13,%RDI,1) |
(873) 0x43bab6 ADD $0x10,%RDI |
(873) 0x43baba DEC %RSI |
(873) 0x43babd JNE 43ba90 |
(872) 0x43babf CMP -0xf8(%RBP),%R12 |
(872) 0x43bac6 JE 43ba50 |
(872) 0x43bac8 MOV -0x80(%RBP),%RSI |
(872) 0x43bacc IMUL %RDX,%RSI |
(872) 0x43bad0 ADD %R11,%RSI |
(872) 0x43bad3 MOV -0xa0(%RBP),%RDI |
(872) 0x43bada ADD %RSI,%RDI |
(872) 0x43badd VMULSD (%RAX,%RDI,8),%XMM12,%XMM8 |
(872) 0x43bae2 MOV -0x90(%RBP),%RDI |
(872) 0x43bae9 ADD %RSI,%RDI |
(872) 0x43baec VFMADD231SD (%RAX,%RDI,8),%XMM13,%XMM8 |
(872) 0x43baf2 MOV -0x60(%RBP),%RDI |
(872) 0x43baf6 ADD %RSI,%RDI |
(872) 0x43baf9 VFMADD231SD (%RAX,%RDI,8),%XMM0,%XMM8 |
(872) 0x43baff ADD %R15,%RSI |
(872) 0x43bb02 VFMADD231SD (%RAX,%RSI,8),%XMM1,%XMM8 |
(872) 0x43bb08 VFMADD213SD (%R13,%R12,8),%XMM7,%XMM8 |
(872) 0x43bb0f VMOVSD %XMM8,(%R13,%R12,8) |
(872) 0x43bb16 JMP 43ba50 |
0x43bb1b MOV -0xb0(%RBP),%RDI |
0x43bb22 CALL 48e2a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x43bb27 MOV -0x68(%RBP),%R12 |
0x43bb2b MOV 0x30(%R12),%EAX |
0x43bb30 TEST %EAX,%EAX |
0x43bb32 MOV -0xa8(%RBP),%R15 |
0x43bb39 JLE 43bbb3 |
0x43bb3b XOR %EBX,%EBX |
0x43bb3d XOR %R14D,%R14D |
0x43bb40 JMP 43bb5c |
0x43bb42 NOPW %CS:(%RAX,%RAX,1) |
(869) 0x43bb50 MOVSXD %EAX,%RCX |
(869) 0x43bb53 ADD $0x18,%RBX |
(869) 0x43bb57 CMP %RCX,%R14 |
(869) 0x43bb5a JGE 43bbb3 |
(869) 0x43bb5c MOV %R14D,%ECX |
(869) 0x43bb5f IMUL %EAX,%ECX |
(869) 0x43bb62 INC %R14 |
(869) 0x43bb65 MOV 0x40(%R12),%EDX |
(869) 0x43bb6a IMUL %R14D,%EDX |
(869) 0x43bb6e MOV 0x8(%R12),%ESI |
(869) 0x43bb73 CMP %EDX,%ESI |
(869) 0x43bb75 CMOVL %ESI,%EDX |
(869) 0x43bb78 SUB %ECX,%EDX |
(869) 0x43bb7a MOVSXD %EDX,%RDX |
(869) 0x43bb7d MOV $0x1fffffffffffffff,%RSI |
(869) 0x43bb87 TEST %RSI,%RDX |
(869) 0x43bb8a JE 43bb50 |
(869) 0x43bb8c MOV 0x310(%R12),%RAX |
(869) 0x43bb94 MOV (%RAX,%RBX,1),%RSI |
(869) 0x43bb98 SAL $0x3,%RDX |
(869) 0x43bb9c MOVSXD %ECX,%RDI |
(869) 0x43bb9f SAL $0x3,%RDI |
(869) 0x43bba3 ADD 0x18(%R15),%RDI |
(869) 0x43bba7 CALL 4036f0 <memmove@plt> |
(869) 0x43bbac MOV 0x30(%R12),%EAX |
(869) 0x43bbb1 JMP 43bb50 |
0x43bbb3 ADD $0x158,%RSP |
0x43bbba POP %RBX |
0x43bbbb POP %R12 |
0x43bbbd POP %R13 |
0x43bbbf POP %R14 |
0x43bbc1 POP %R15 |
0x43bbc3 POP %RBP |
0x43bbc4 RET |
0x43bbc5 MOV %RAX,%RDI |
0x43bbc8 CALL 40d030 <__clang_call_terminate> |
0x43bbcd NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.99+ | qmcplusplus::SPOSet::evaluateD[...] | OhmmsVector.h:178 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:217 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | stl_vector.h:806 | exec |
○ | qmcplusplus::NonLocalPP<double[...] | NonLocalPP.hpp:135 | exec |
○ | main.extracted.110 | NewTimer.h:249 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 118 |
nb uops | 126 |
loop length | 593 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 11.67 | 11.67 | 11.50 | 10.67 | 9.00 | 11.50 | 11.50 | 11.50 | 9.00 | 11.67 |
cycles | 10.67 | 10.67 | 11.67 | 11.67 | 11.50 | 10.67 | 9.00 | 11.50 | 11.50 | 11.50 | 9.00 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.98-21.05 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 11.67 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 42% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 47% |
all | 29% |
load | 15% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 17% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48e0f0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVNE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43bb1b <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6bb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xf0(%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xe0(%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM1,%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xc2a7a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM3,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM1,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RBX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xc298a(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xbfbe2(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xbfbda(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 43b668 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48e2a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43bbb3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x753> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43bb5c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6fc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d030 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 118 |
nb uops | 126 |
loop length | 593 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 11.67 | 11.67 | 11.50 | 10.67 | 9.00 | 11.50 | 11.50 | 11.50 | 9.00 | 11.67 |
cycles | 10.67 | 10.67 | 11.67 | 11.67 | 11.50 | 10.67 | 9.00 | 11.50 | 11.50 | 11.50 | 9.00 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.98-21.05 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 11.67 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 42% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 47% |
all | 29% |
load | 15% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 17% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48e0f0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVNE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43bb1b <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6bb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xf0(%RCX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD 0xe0(%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM1,%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xc2a7a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM2,%XMM3,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM3,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM1,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RBX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xc298a(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xbfbe2(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xbfbda(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 43b668 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48e2a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43bbb3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x753> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43bb5c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6fc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d030 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 28.47 | 22.94 |
▼Loop 870 - MultiBsplineRef.hpp:42-71 - exec– | 0.02 | 0.02 |
▼Loop 871 - MultiBsplineRef.hpp:63-71 - exec– | 0.01 | 0 |
▼Loop 872 - MultiBsplineRef.hpp:64-71 - exec– | 0.01 | 0.01 |
○Loop 873 - MultiBsplineRef.hpp:68-70 - exec | 28.41 | 22.28 |
○Loop 869 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0.01 |