Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.23% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.23% |
---|
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 931 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
[...] |
398: { *__to = *__from; } |
[...] |
436: if (__builtin_expect(_Num > 1, true)) |
437: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
930: for (; __first != __last; ++__first) |
931: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1256 - 1256 |
-------------------------------------------------------------------------------- |
1256: { return _M_data_ptr(this->_M_impl._M_start); } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algo.h: 731 - 757 |
-------------------------------------------------------------------------------- |
731: { return std::copy(__first, __first + __n, __result); } |
[...] |
757: if (__n2 <= 0) |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 173 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
171: val_dot[i] = T1(0.0); |
172: else |
173: val_dot[i] -= std::floor(val_dot[i]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 229 - 229 |
-------------------------------------------------------------------------------- |
229: return X[i]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 53 - 63 |
-------------------------------------------------------------------------------- |
53: if (x < 0) |
[...] |
60: ind = static_cast<int>(x); |
61: dx = x - ind; |
62: // upper bound |
63: if (ind > nmax) |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
0x474a70 PUSH %RBP |
0x474a71 MOV %RSP,%RBP |
0x474a74 PUSH %R15 |
0x474a76 MOVSXD %EDX,%R15 |
0x474a79 PUSH %R14 |
0x474a7b MOV %RSI,%R14 |
0x474a7e PUSH %R13 |
0x474a80 PUSH %R12 |
0x474a82 MOV %RCX,%R12 |
0x474a85 PUSH %RBX |
0x474a86 MOV %RDI,%RBX |
0x474a89 AND $-0x20,%RSP |
0x474a8d SUB $0x120,%RSP |
0x474a94 MOV 0x358(%RDI),%R13 |
0x474a9b MOV %R13,%RDI |
0x474a9e CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x474aa3 LEA 0x128(%R14),%RCX |
0x474aaa CMP 0x124(%R14),%R15D |
0x474ab1 JE 474abf |
0x474ab3 MOV 0x40(%R14),%RAX |
0x474ab7 LEA (%R15,%R15,2),%RDX |
0x474abb LEA (%RAX,%RDX,8),%RCX |
0x474abf VMOVUPD (%RCX),%XMM2 |
0x474ac3 VMOVSD 0x10(%RCX),%XMM4 |
0x474ac8 VXORPD %XMM5,%XMM5,%XMM5 |
0x474acc VUNPCKHPD %XMM2,%XMM2,%XMM1 |
0x474ad0 VMOVSD %XMM2,%XMM2,%XMM0 |
0x474ad4 VMULSD 0x100(%RBX),%XMM1,%XMM3 |
0x474adc VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 |
0x474ae5 VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 |
0x474aee VCOMISD %XMM0,%XMM5 |
0x474af2 JBE 474b02 |
0x474af4 VCOMISD 0x118ce4(%RIP),%XMM0 |
0x474afc JA 475465 |
0x474b02 VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 |
0x474b09 VSUBSD %XMM6,%XMM0,%XMM7 |
0x474b0d MOVSXD 0x30(%RBX),%RSI |
0x474b11 TEST %ESI,%ESI |
0x474b13 JLE 47531f |
0x474b19 VPERMILPD $0,%XMM2,%XMM10 |
0x474b1f VPERMILPD $0x3,%XMM2,%XMM9 |
0x474b25 VMOVDDUP %XMM4,%XMM8 |
0x474b29 VMOVDDUP 0x118caf(%RIP),%XMM13 |
0x474b31 MOVSXD 0x40(%RBX),%RDI |
0x474b35 VMULPD 0xd8(%RBX),%XMM10,%XMM11 |
0x474b3d VXORPD %XMM14,%XMM14,%XMM14 |
0x474b42 MOV 0x2f8(%RBX),%R10 |
0x474b49 MOV %R13,0x18(%RSP) |
0x474b4e MOV %RDI,%R15 |
0x474b51 MOV 0x310(%RBX),%R9 |
0x474b58 LEA (,%RDI,8),%R8 |
0x474b60 LEA -0x1(%RDI),%R14 |
0x474b64 SHR $0x2,%R15 |
0x474b68 MOV %EDI,0x88(%RSP) |
0x474b6f LEA (%R10,%RSI,8),%R11 |
0x474b73 MOV %RDI,0x68(%RSP) |
0x474b78 SAL $0x5,%R15 |
0x474b7c AND $-0x4,%RDI |
0x474b80 MOV %R8,0x48(%RSP) |
0x474b85 VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 |
0x474b8e MOV %R9,0x40(%RSP) |
0x474b93 MOV %R10,0x50(%RSP) |
0x474b98 MOV %R11,0x38(%RSP) |
0x474b9d MOV %R14,0x90(%RSP) |
0x474ba5 MOV %R15,0xd0(%RSP) |
0x474bad VFMADD132PD 0x108(%RBX),%XMM11,%XMM8 |
0x474bb6 MOV %RDI,0x60(%RSP) |
0x474bbb MOV %RBX,0x10(%RSP) |
0x474bc0 MOV %R12,0x8(%RSP) |
0x474bc5 VMOVSD %XMM7,0x20(%RSP) |
0x474bcb VRNDSCALEPD $0x9,%XMM8,%XMM12 |
0x474bd2 VCMPPD $0xe,%XMM13,%XMM8,%K1 |
0x474bd9 VCMPPD $0x1,%XMM14,%XMM8,%K0{%K1} |
0x474be0 KNOTB %K0,%K2 |
0x474be4 VSUBPD %XMM12,%XMM8,%XMM15{%K2}{z} |
0x474bea VMOVHPD %XMM15,0x28(%RSP) |
0x474bf0 VMOVLPD %XMM15,0x30(%RSP) |
(817) 0x474bf6 MOV 0x50(%RSP),%R12 |
(817) 0x474bfb VMOVSD 0x30(%RSP),%XMM3 |
(817) 0x474c01 VXORPD %XMM6,%XMM6,%XMM6 |
(817) 0x474c05 MOV 0x40(%RSP),%RBX |
(817) 0x474c0a VMOVSD 0x28(%RSP),%XMM2 |
(817) 0x474c10 MOV (%R12),%RCX |
(817) 0x474c14 VMOVSD 0x20(%RSP),%XMM0 |
(817) 0x474c1a MOV (%RBX),%R15 |
(817) 0x474c1d VSUBSD 0x28(%RCX),%XMM3,%XMM4 |
(817) 0x474c22 VSUBSD 0x50(%RCX),%XMM2,%XMM1 |
(817) 0x474c27 VSUBSD 0x78(%RCX),%XMM0,%XMM10 |
(817) 0x474c2c MOV 0x38(%RCX),%EAX |
(817) 0x474c2f VMULSD 0x48(%RCX),%XMM4,%XMM5 |
(817) 0x474c34 VCOMISD %XMM5,%XMM6 |
(817) 0x474c38 JA 4753d9 |
(817) 0x474c3e VCVTTSD2SI %XMM5,%R13D |
(817) 0x474c42 DEC %EAX |
(817) 0x474c44 CMP %R13D,%EAX |
(817) 0x474c47 JL 475441 |
(817) 0x474c4d VRNDSCALESD $0xb,%XMM5,%XMM5,%XMM7 |
(817) 0x474c54 VSUBSD %XMM7,%XMM5,%XMM11 |
(817) 0x474c58 VMOVSD 0x1189e0(%RIP),%XMM8 |
(817) 0x474c60 VMOVSD 0x118a48(%RIP),%XMM13 |
(817) 0x474c68 MOVSXD %R13D,%RDX |
(817) 0x474c6b MOV %RDX,0xd8(%RSP) |
(817) 0x474c73 VMULSD %XMM8,%XMM11,%XMM12 |
(817) 0x474c78 VMOVSD %XMM11,%XMM11,%XMM2 |
(817) 0x474c7c VMOVSD %XMM11,%XMM11,%XMM15 |
(817) 0x474c81 VFNMADD132SD %XMM13,%XMM13,%XMM2 |
(817) 0x474c86 VFMADD213SD 0x118b59(%RIP),%XMM13,%XMM15 |
(817) 0x474c8f VMULSD %XMM11,%XMM11,%XMM9 |
(817) 0x474c94 VSUBSD %XMM12,%XMM13,%XMM14 |
(817) 0x474c99 VFMADD231SD %XMM2,%XMM11,%XMM13 |
(817) 0x474c9e VMULSD %XMM9,%XMM12,%XMM0 |
(817) 0x474ca3 VFMADD213SD 0x11899c(%RIP),%XMM9,%XMM15 |
(817) 0x474cac VFMADD213SD 0x1189eb(%RIP),%XMM11,%XMM14 |
(817) 0x474cb5 VFMADD132SD %XMM11,%XMM8,%XMM14 |
(817) 0x474cba VFMADD132SD %XMM13,%XMM8,%XMM11 |
(817) 0x474cbf VUNPCKLPD %XMM15,%XMM14,%XMM4 |
(817) 0x474cc4 VUNPCKLPD %XMM0,%XMM11,%XMM3 |
(817) 0x474cc8 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM7 |
(817) 0x474cce VMULSD 0x70(%RCX),%XMM1,%XMM1 |
(817) 0x474cd3 VXORPD %XMM5,%XMM5,%XMM5 |
(817) 0x474cd7 MOV 0x60(%RCX),%ESI |
(817) 0x474cda VCOMISD %XMM1,%XMM5 |
(817) 0x474cde JA 4753f2 |
(817) 0x474ce4 VCVTTSD2SI %XMM1,%EDI |
(817) 0x474ce8 DEC %ESI |
(817) 0x474cea CMP %EDI,%ESI |
(817) 0x474cec JL 475402 |
(817) 0x474cf2 VRNDSCALESD $0xb,%XMM1,%XMM1,%XMM6 |
(817) 0x474cf9 VSUBSD %XMM6,%XMM1,%XMM11 |
(817) 0x474cfd VMOVSD 0x11893b(%RIP),%XMM8 |
(817) 0x474d05 VMOVSD 0x1189a3(%RIP),%XMM13 |
(817) 0x474d0d MOVSXD %EDI,%R13 |
(817) 0x474d10 VMULSD %XMM8,%XMM11,%XMM12 |
(817) 0x474d15 VMOVSD %XMM11,%XMM11,%XMM2 |
(817) 0x474d19 VMOVSD %XMM11,%XMM11,%XMM15 |
(817) 0x474d1e VFNMADD132SD %XMM13,%XMM13,%XMM2 |
(817) 0x474d23 VFMADD213SD 0x118abc(%RIP),%XMM13,%XMM15 |
(817) 0x474d2c VMULSD %XMM11,%XMM11,%XMM9 |
(817) 0x474d31 VSUBSD %XMM12,%XMM13,%XMM14 |
(817) 0x474d36 VFMADD231SD %XMM2,%XMM11,%XMM13 |
(817) 0x474d3b VMULSD %XMM9,%XMM12,%XMM0 |
(817) 0x474d40 VFMADD213SD 0x1188ff(%RIP),%XMM9,%XMM15 |
(817) 0x474d49 VFMADD213SD 0x11894e(%RIP),%XMM11,%XMM14 |
(817) 0x474d52 VFMADD132SD %XMM11,%XMM8,%XMM14 |
(817) 0x474d57 VFMADD231SD %XMM13,%XMM11,%XMM8 |
(817) 0x474d5c VUNPCKLPD %XMM15,%XMM14,%XMM4 |
(817) 0x474d61 VUNPCKLPD %XMM0,%XMM8,%XMM3 |
(817) 0x474d65 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM1 |
(817) 0x474d6b VMULSD 0x98(%RCX),%XMM10,%XMM10 |
(817) 0x474d73 VXORPD %XMM5,%XMM5,%XMM5 |
(817) 0x474d77 MOV 0x88(%RCX),%EAX |
(817) 0x474d7d VCOMISD %XMM10,%XMM5 |
(817) 0x474d82 JA 4753ac |
(817) 0x474d88 VCVTTSD2SI %XMM10,%R8D |
(817) 0x474d8d DEC %EAX |
(817) 0x474d8f CMP %R8D,%EAX |
(817) 0x474d92 JL 475412 |
(817) 0x474d98 VRNDSCALESD $0xb,%XMM10,%XMM10,%XMM6 |
(817) 0x474d9f VSUBSD %XMM6,%XMM10,%XMM3 |
(817) 0x474da3 VMOVSD 0x118905(%RIP),%XMM9 |
(817) 0x474dab VMOVSD 0x11888d(%RIP),%XMM12 |
(817) 0x474db3 MOVSXD %R8D,%R9 |
(817) 0x474db6 MOV %R9,0xc8(%RSP) |
(817) 0x474dbe VMULSD 0x118a2a(%RIP),%XMM3,%XMM11 |
(817) 0x474dc6 VMOVSD %XMM3,%XMM3,%XMM8 |
(817) 0x474dca VMOVSD %XMM3,%XMM3,%XMM6 |
(817) 0x474dce VFNMADD132SD %XMM9,%XMM9,%XMM8 |
(817) 0x474dd3 VFMADD213SD 0x118a0c(%RIP),%XMM9,%XMM6 |
(817) 0x474ddc VMULSD %XMM3,%XMM3,%XMM13 |
(817) 0x474de0 VADDSD %XMM9,%XMM11,%XMM5 |
(817) 0x474de5 VXORPD 0x116ed3(%RIP),%XMM11,%XMM14 |
(817) 0x474ded VFMADD231SD %XMM8,%XMM3,%XMM9 |
(817) 0x474df2 VMULSD %XMM13,%XMM14,%XMM4 |
(817) 0x474df7 VFMADD213SD 0x118848(%RIP),%XMM13,%XMM6 |
(817) 0x474e00 VFMADD213SD 0x118897(%RIP),%XMM3,%XMM5 |
(817) 0x474e09 VFMADD132SD %XMM3,%XMM12,%XMM5 |
(817) 0x474e0e VFMADD132SD %XMM9,%XMM12,%XMM3 |
(817) 0x474e13 CMPQ $0,0x48(%RSP) |
(817) 0x474e19 MOV 0x10(%RCX),%RBX |
(817) 0x474e1d VMOVAPD %YMM7,0xe0(%RSP) |
(817) 0x474e26 MOV 0x18(%RCX),%R12 |
(817) 0x474e2a MOV 0x20(%RCX),%R14 |
(817) 0x474e2e VMOVAPD %YMM1,0x100(%RSP) |
(817) 0x474e37 JE 474ea3 |
(817) 0x474e39 MOV %RCX,0xc0(%RSP) |
(817) 0x474e41 MOV 0x48(%RSP),%RDX |
(817) 0x474e46 XOR %ESI,%ESI |
(817) 0x474e48 MOV %R15,%RDI |
(817) 0x474e4b VMOVSD %XMM4,0xa0(%RSP) |
(817) 0x474e54 VMOVSD %XMM3,0xa8(%RSP) |
(817) 0x474e5d VMOVSD %XMM6,0xb0(%RSP) |
(817) 0x474e66 VMOVSD %XMM5,0xb8(%RSP) |
(817) 0x474e6f VZEROUPPER |
(817) 0x474e72 CALL 404110 <memset@plt> |
(817) 0x474e77 MOV 0xc0(%RSP),%RCX |
(817) 0x474e7f VMOVSD 0xb8(%RSP),%XMM5 |
(817) 0x474e88 VMOVSD 0xb0(%RSP),%XMM6 |
(817) 0x474e91 VMOVSD 0xa8(%RSP),%XMM3 |
(817) 0x474e9a VMOVSD 0xa0(%RSP),%XMM4 |
(817) 0x474ea3 MOV 0xc8(%RSP),%RAX |
(817) 0x474eab MOV 0x8(%RCX),%RSI |
(817) 0x474eaf IMUL %R14,%RAX |
(817) 0x474eb3 CMPQ $0,0x68(%RSP) |
(817) 0x474eb9 JE 4752f1 |
(817) 0x474ebf MOV 0xd8(%RSP),%RDX |
(817) 0x474ec7 MOV %R13,%R8 |
(817) 0x474eca LEA 0x1(%R13),%R13 |
(817) 0x474ece MOV %R12,%R10 |
(817) 0x474ed1 IMUL %R12,%R13 |
(817) 0x474ed5 LEA (%R12,%R12,1),%R11 |
(817) 0x474ed9 SAL $0x4,%R10 |
(817) 0x474edd MOV %RBX,0x58(%RSP) |
(817) 0x474ee2 IMUL %RBX,%RDX |
(817) 0x474ee6 MOV %R10,0x98(%RSP) |
(817) 0x474eee LEA (,%R14,8),%RCX |
(817) 0x474ef6 VBROADCASTSD %XMM5,%YMM10 |
(817) 0x474efb IMUL %R12,%R8 |
(817) 0x474eff MOV 0x88(%RSP),%R12D |
(817) 0x474f07 MOV %R11,0xa0(%RSP) |
(817) 0x474f0f LEA 0xe0(%RSP),%RDI |
(817) 0x474f17 VBROADCASTSD %XMM6,%YMM9 |
(817) 0x474f1c VBROADCASTSD %XMM3,%YMM8 |
(817) 0x474f21 VBROADCASTSD %XMM4,%YMM7 |
(817) 0x474f26 MOV %RSI,%RBX |
(817) 0x474f29 ADD %RDX,%R13 |
(817) 0x474f2c AND $0x3,%R12D |
(817) 0x474f30 ADD %RDX,%R8 |
(817) 0x474f33 LEA (%R13,%RAX,1),%R9 |
(817) 0x474f38 MOV %R12D,0x8c(%RSP) |
(817) 0x474f40 MOV %R15,%R12 |
(817) 0x474f43 LEA (%R8,%RAX,1),%R8 |
(817) 0x474f47 MOV %R9,%R13 |
(817) 0x474f4a MOV %RCX,%R15 |
(819) 0x474f4d MOV %R8,0xd8(%RSP) |
(819) 0x474f55 LEA 0x100(%RSP),%RAX |
(819) 0x474f5d VMOVSD (%RDI),%XMM11 |
(819) 0x474f61 LEA (%RBX,%R13,8),%RDX |
(819) 0x474f65 MOV %RDI,0x80(%RSP) |
(819) 0x474f6d LEA (%RBX,%R8,8),%RCX |
(819) 0x474f71 VMOVDDUP %XMM5,%XMM15 |
(819) 0x474f75 VMOVDDUP %XMM6,%XMM14 |
(819) 0x474f79 MOV %R8,0x78(%RSP) |
(819) 0x474f7e VMOVDDUP %XMM3,%XMM13 |
(819) 0x474f82 VMOVDDUP %XMM4,%XMM12 |
(819) 0x474f86 MOV %R13,0xc8(%RSP) |
(819) 0x474f8e MOV %R13,0x70(%RSP) |
(819) 0x474f93 MOV %RAX,%R13 |
(818) 0x474f96 VMULSD (%R13),%XMM11,%XMM16 |
(818) 0x474f9d CMPQ $0x2,0x90(%RSP) |
(818) 0x474fa6 JBE 4753a5 |
(818) 0x474fac VMULSD 0x8(%R13),%XMM11,%XMM17 |
(818) 0x474fb3 LEA (%R15,%RCX,1),%R9 |
(818) 0x474fb7 LEA (%R15,%RDX,1),%RDI |
(818) 0x474fbb XOR %EAX,%EAX |
(818) 0x474fbd LEA (%R9,%R15,1),%R8 |
(818) 0x474fc1 LEA (%R15,%RDI,1),%RSI |
(818) 0x474fc5 VBROADCASTSD %XMM16,%YMM18 |
(818) 0x474fcb LEA (%R15,%R8,1),%R11 |
(818) 0x474fcf LEA (%RSI,%R15,1),%R10 |
(818) 0x474fd3 VBROADCASTSD %XMM17,%YMM20 |
(818) 0x474fd9 TESTB $0x20,0xd0(%RSP) |
(818) 0x474fe1 JE 475040 |
(818) 0x474fe3 VMULPD (%R9),%YMM9,%YMM2 |
(818) 0x474fe8 CMPQ $0x20,0xd0(%RSP) |
(818) 0x474ff1 MOV $0x20,%EAX |
(818) 0x474ff6 VMULPD (%R11),%YMM7,%YMM0 |
(818) 0x474ffb VMULPD (%RDI),%YMM9,%YMM1 |
(818) 0x474fff VMULPD (%R10),%YMM7,%YMM19 |
(818) 0x475005 VFMADD231PD (%RCX),%YMM10,%YMM2 |
(818) 0x47500a VFMADD231PD (%R8),%YMM8,%YMM0 |
(818) 0x47500f VFMADD231PD (%RDX),%YMM10,%YMM1 |
(818) 0x475014 VFMADD231PD (%RSI),%YMM8,%YMM19 |
(818) 0x47501a VADDPD %YMM0,%YMM2,%YMM2 |
(818) 0x47501e VADDPD %YMM19,%YMM1,%YMM0 |
(818) 0x475024 VFMADD213PD (%R12),%YMM18,%YMM2 |
(818) 0x47502b VFMADD132PD %YMM20,%YMM2,%YMM0 |
(818) 0x475031 VMOVUPD %YMM0,(%R12) |
(818) 0x475037 JE 4750f4 |
(818) 0x47503d NOPL (%RAX) |
(820) 0x475040 VMULPD (%R9,%RAX,1),%YMM9,%YMM2 |
(820) 0x475046 VMULPD (%R11,%RAX,1),%YMM7,%YMM1 |
(820) 0x47504c VMULPD (%R10,%RAX,1),%YMM7,%YMM21 |
(820) 0x475053 VFMADD231PD (%RCX,%RAX,1),%YMM10,%YMM2 |
(820) 0x475059 VFMADD231PD (%R8,%RAX,1),%YMM8,%YMM1 |
(820) 0x47505f VFMADD231PD (%RSI,%RAX,1),%YMM8,%YMM21 |
(820) 0x475066 VADDPD %YMM1,%YMM2,%YMM0 |
(820) 0x47506a VMULPD (%RDI,%RAX,1),%YMM9,%YMM2 |
(820) 0x47506f VFMADD213PD (%R12,%RAX,1),%YMM18,%YMM0 |
(820) 0x475076 VFMADD231PD (%RDX,%RAX,1),%YMM10,%YMM2 |
(820) 0x47507c VADDPD %YMM21,%YMM2,%YMM1 |
(820) 0x475082 VMULPD 0x20(%R11,%RAX,1),%YMM7,%YMM2 |
(820) 0x475089 VFMADD132PD %YMM20,%YMM0,%YMM1 |
(820) 0x47508f VMULPD 0x20(%R9,%RAX,1),%YMM9,%YMM0 |
(820) 0x475096 VFMADD231PD 0x20(%RAX,%R8,1),%YMM8,%YMM2 |
(820) 0x47509d VMOVUPD %YMM1,(%R12,%RAX,1) |
(820) 0x4750a3 VFMADD231PD 0x20(%RAX,%RCX,1),%YMM10,%YMM0 |
(820) 0x4750aa VADDPD %YMM2,%YMM0,%YMM1 |
(820) 0x4750ae VMULPD 0x20(%RAX,%RDI,1),%YMM9,%YMM0 |
(820) 0x4750b4 VMULPD 0x20(%R10,%RAX,1),%YMM7,%YMM2 |
(820) 0x4750bb VFMADD213PD 0x20(%R12,%RAX,1),%YMM18,%YMM1 |
(820) 0x4750c3 VFMADD231PD 0x20(%RAX,%RDX,1),%YMM10,%YMM0 |
(820) 0x4750ca VFMADD231PD 0x20(%RSI,%RAX,1),%YMM8,%YMM2 |
(820) 0x4750d1 VADDPD %YMM2,%YMM0,%YMM0 |
(820) 0x4750d5 VFMADD132PD %YMM20,%YMM1,%YMM0 |
(820) 0x4750db VMOVUPD %YMM0,0x20(%R12,%RAX,1) |
(820) 0x4750e2 ADD $0x40,%RAX |
(820) 0x4750e6 CMP %RAX,0xd0(%RSP) |
(820) 0x4750ee JNE 475040 |
(818) 0x4750f4 MOV 0x8c(%RSP),%R9D |
(818) 0x4750fc TEST %R9D,%R9D |
(818) 0x4750ff JE 475284 |
(818) 0x475105 MOV 0x60(%RSP),%RAX |
(818) 0x47510a MOV 0x68(%RSP),%R8 |
(818) 0x47510f SUB %RAX,%R8 |
(818) 0x475112 MOV %R8,0xa8(%RSP) |
(818) 0x47511a CMP $0x1,%R8 |
(818) 0x47511e JE 475221 |
(818) 0x475124 MOV 0xd8(%RSP),%R11 |
(818) 0x47512c VMULSD 0x8(%R13),%XMM11,%XMM23 |
(818) 0x475133 VMOVDDUP %XMM16,%XMM22 |
(818) 0x475139 LEA (%R12,%RAX,8),%R9 |
(818) 0x47513d LEA (%R14,%R11,1),%R8 |
(818) 0x475141 LEA (%R14,%R8,1),%RDI |
(818) 0x475145 ADD %RAX,%R8 |
(818) 0x475148 LEA (%RDI,%RAX,1),%RSI |
(818) 0x47514c VMULPD (%RBX,%R8,8),%XMM14,%XMM1 |
(818) 0x475152 ADD %R14,%RDI |
(818) 0x475155 LEA (%RDI,%RAX,1),%R10 |
(818) 0x475159 MOV 0xc8(%RSP),%RDI |
(818) 0x475161 MOV %RSI,0xc0(%RSP) |
(818) 0x475169 VMOVDDUP %XMM23,%XMM24 |
(818) 0x47516f MOV %R10,0xb8(%RSP) |
(818) 0x475177 LEA (%R14,%RDI,1),%RSI |
(818) 0x47517b MOV %RDI,%R11 |
(818) 0x47517e LEA (%RSI,%RAX,1),%RDI |
(818) 0x475182 ADD %R14,%RSI |
(818) 0x475185 ADD %RAX,%R11 |
(818) 0x475188 LEA (%RAX,%RSI,1),%R10 |
(818) 0x47518c MOV 0xb8(%RSP),%R8 |
(818) 0x475194 VMULPD (%RBX,%RDI,8),%XMM14,%XMM25 |
(818) 0x47519b ADD %R14,%RSI |
(818) 0x47519e ADD %RAX,%RSI |
(818) 0x4751a1 MOV %RSI,0xb0(%RSP) |
(818) 0x4751a9 VMULPD (%RBX,%R8,8),%XMM12,%XMM2 |
(818) 0x4751af MOV 0xd8(%RSP),%RSI |
(818) 0x4751b7 ADD %RAX,%RSI |
(818) 0x4751ba VFMADD231PD (%RBX,%R11,8),%XMM15,%XMM25 |
(818) 0x4751c1 MOV 0xb0(%RSP),%R11 |
(818) 0x4751c9 VFMADD231PD (%RBX,%RSI,8),%XMM15,%XMM1 |
(818) 0x4751cf MOV 0xc0(%RSP),%RSI |
(818) 0x4751d7 VFMADD231PD (%RBX,%RSI,8),%XMM13,%XMM2 |
(818) 0x4751dd VADDPD %XMM2,%XMM1,%XMM0 |
(818) 0x4751e1 VMULPD (%RBX,%R11,8),%XMM12,%XMM1 |
(818) 0x4751e7 VFMADD231PD (%RBX,%R10,8),%XMM13,%XMM1 |
(818) 0x4751ed VADDPD %XMM25,%XMM1,%XMM2 |
(818) 0x4751f3 VMULPD %XMM24,%XMM2,%XMM1 |
(818) 0x4751f9 VFMADD132PD %XMM22,%XMM1,%XMM0 |
(818) 0x4751ff VADDPD (%R9),%XMM0,%XMM0 |
(818) 0x475204 VMOVUPD %XMM0,(%R9) |
(818) 0x475209 MOV 0xa8(%RSP),%R9 |
(818) 0x475211 TEST $0x1,%R9B |
(818) 0x475215 JE 475284 |
(818) 0x475217 MOV %R9,%RDI |
(818) 0x47521a AND $-0x2,%RDI |
(818) 0x47521e ADD %RDI,%RAX |
(818) 0x475221 LEA (%R14,%RAX,1),%R8 |
(818) 0x475225 VMULSD 0x8(%R13),%XMM11,%XMM26 |
(818) 0x47522c LEA (%R12,%RAX,8),%R10 |
(818) 0x475230 LEA (%R14,%R8,1),%RSI |
(818) 0x475234 LEA (%R14,%RSI,1),%R11 |
(818) 0x475238 VMULSD (%RCX,%RSI,8),%XMM3,%XMM2 |
(818) 0x47523d VMULSD (%RCX,%R11,8),%XMM4,%XMM1 |
(818) 0x475243 VFMADD231SD (%RCX,%R8,8),%XMM6,%XMM2 |
(818) 0x475249 VFMADD231SD (%RCX,%RAX,8),%XMM5,%XMM1 |
(818) 0x47524f VADDSD %XMM1,%XMM2,%XMM0 |
(818) 0x475253 VMULSD (%RDX,%RSI,8),%XMM3,%XMM1 |
(818) 0x475258 VMULSD (%RDX,%R11,8),%XMM4,%XMM2 |
(818) 0x47525e VFMADD231SD (%RDX,%R8,8),%XMM6,%XMM1 |
(818) 0x475264 VFMADD231SD (%RDX,%RAX,8),%XMM5,%XMM2 |
(818) 0x47526a VADDSD %XMM2,%XMM1,%XMM1 |
(818) 0x47526e VMULSD %XMM26,%XMM1,%XMM2 |
(818) 0x475274 VFMADD132SD %XMM16,%XMM2,%XMM0 |
(818) 0x47527a VADDSD (%R10),%XMM0,%XMM0 |
(818) 0x47527f VMOVSD %XMM0,(%R10) |
(818) 0x475284 MOV 0x98(%RSP),%RAX |
(818) 0x47528c MOV 0xa0(%RSP),%R9 |
(818) 0x475294 ADD $0x10,%R13 |
(818) 0x475298 LEA 0x120(%RSP),%RDI |
(818) 0x4752a0 ADD %R9,0xd8(%RSP) |
(818) 0x4752a8 ADD %RAX,%RCX |
(818) 0x4752ab ADD %RAX,%RDX |
(818) 0x4752ae ADD %R9,0xc8(%RSP) |
(818) 0x4752b6 CMP %RDI,%R13 |
(818) 0x4752b9 JNE 474f96 |
(819) 0x4752bf MOV 0x80(%RSP),%RDI |
(819) 0x4752c7 MOV 0x58(%RSP),%RCX |
(819) 0x4752cc LEA 0x100(%RSP),%RDX |
(819) 0x4752d4 MOV 0x78(%RSP),%R8 |
(819) 0x4752d9 MOV 0x70(%RSP),%R13 |
(819) 0x4752de ADD $0x8,%RDI |
(819) 0x4752e2 ADD %RCX,%R8 |
(819) 0x4752e5 ADD %RCX,%R13 |
(819) 0x4752e8 CMP %RDX,%RDI |
(819) 0x4752eb JNE 474f4d |
(817) 0x4752f1 ADDQ $0x8,0x50(%RSP) |
(817) 0x4752f7 ADDQ $0x18,0x40(%RSP) |
(817) 0x4752fd MOV 0x50(%RSP),%R12 |
(817) 0x475302 CMP %R12,0x38(%RSP) |
(817) 0x475307 JNE 474bf6 |
0x47530d MOV 0x18(%RSP),%R13 |
0x475312 MOV 0x10(%RSP),%RBX |
0x475317 MOV 0x8(%RSP),%R12 |
0x47531c VZEROUPPER |
0x47531f MOV %R13,%RDI |
0x475322 XOR %R14D,%R14D |
0x475325 XOR %R15D,%R15D |
0x475328 CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x47532d MOV 0x30(%RBX),%ESI |
0x475330 TEST %ESI,%ESI |
0x475332 JLE 475396 |
(816) 0x475334 MOV 0x40(%RBX),%EAX |
(816) 0x475337 MOV %R15D,%R10D |
(816) 0x47533a INC %R15D |
(816) 0x47533d MOV 0x8(%RBX),%R11D |
(816) 0x475341 IMUL %ESI,%R10D |
(816) 0x475345 IMUL %R15D,%EAX |
(816) 0x475349 CMP %R11D,%EAX |
(816) 0x47534c CMOVG %R11D,%EAX |
(816) 0x475350 SUB %R10D,%EAX |
(816) 0x475353 TEST %EAX,%EAX |
(816) 0x475355 JLE 47538d |
(816) 0x475357 MOV 0x18(%R12),%RDI |
(816) 0x47535c MOV 0x310(%RBX),%R8 |
(816) 0x475363 CLTQ |
(816) 0x475365 MOVSXD %R10D,%R9 |
(816) 0x475368 LEA (,%RAX,8),%RDX |
(816) 0x475370 MOV (%R8,%R14,1),%RCX |
(816) 0x475374 LEA (%RDI,%R9,8),%RDI |
(816) 0x475378 CMP $0x8,%RDX |
(816) 0x47537c JE 475458 |
(816) 0x475382 MOV %RCX,%RSI |
(816) 0x475385 CALL 4040a0 <memmove@plt> |
(816) 0x47538a MOV 0x30(%RBX),%ESI |
(816) 0x47538d ADD $0x18,%R14 |
(816) 0x475391 CMP %ESI,%R15D |
(816) 0x475394 JL 475334 |
0x475396 LEA -0x28(%RBP),%RSP |
0x47539a POP %RBX |
0x47539b POP %R12 |
0x47539d POP %R13 |
0x47539f POP %R14 |
0x4753a1 POP %R15 |
0x4753a3 POP %RBP |
0x4753a4 RET |
(818) 0x4753a5 XOR %EAX,%EAX |
(818) 0x4753a7 JMP 47510a |
(817) 0x4753ac VMOVSD 0x11828c(%RIP),%XMM3 |
(817) 0x4753b4 MOV 0x11828d(%RIP),%R11 |
(817) 0x4753bb MOVQ $0,0xc8(%RSP) |
(817) 0x4753c7 VXORPD %XMM4,%XMM4,%XMM4 |
(817) 0x4753cb VMOVQ %R11,%XMM6 |
(817) 0x4753d0 VMOVSD %XMM3,%XMM3,%XMM5 |
(817) 0x4753d4 JMP 474e13 |
(817) 0x4753d9 MOVQ $0,0xd8(%RSP) |
(817) 0x4753e5 VMOVAPD 0x118253(%RIP),%YMM7 |
(817) 0x4753ed JMP 474cce |
(817) 0x4753f2 VMOVAPD 0x118246(%RIP),%YMM1 |
(817) 0x4753fa XOR %R13D,%R13D |
(817) 0x4753fd JMP 474d6b |
(817) 0x475402 VMOVAPD 0x118256(%RIP),%YMM1 |
(817) 0x47540a MOVSXD %ESI,%R13 |
(817) 0x47540d JMP 474d6b |
(817) 0x475412 MOV 0x11822f(%RIP),%R10 |
(817) 0x475419 CLTQ |
(817) 0x47541b VMOVSD 0x118255(%RIP),%XMM4 |
(817) 0x475423 VXORPD %XMM5,%XMM5,%XMM5 |
(817) 0x475427 MOV %RAX,0xc8(%RSP) |
(817) 0x47542f VMOVSD 0x118231(%RIP),%XMM6 |
(817) 0x475437 VMOVQ %R10,%XMM3 |
(817) 0x47543c JMP 474e13 |
(817) 0x475441 CLTQ |
(817) 0x475443 VMOVAPD 0x118215(%RIP),%YMM7 |
(817) 0x47544b MOV %RAX,0xd8(%RSP) |
(817) 0x475453 JMP 474cce |
(816) 0x475458 VMOVSD (%RCX),%XMM11 |
(816) 0x47545c VMOVSD %XMM11,(%RDI) |
(816) 0x475460 JMP 47538d |
0x475465 VMOVSD %XMM5,%XMM5,%XMM7 |
0x475469 JMP 474b0d |
0x47546e XCHG %AX,%AX |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 99 |
nb uops | 105 |
loop length | 455 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 16 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 17.50 cycles |
front end | 17.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.83 | 8.00 | 9.33 | 9.33 | 11.50 | 7.67 | 7.50 | 11.50 | 11.50 | 11.50 | 7.00 | 9.33 |
cycles | 7.83 | 8.00 | 9.33 | 9.33 | 11.50 | 7.67 | 7.50 | 11.50 | 11.50 | 11.50 | 7.00 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.37 |
Stall cycles | 0.00 |
Front-end | 17.50 |
Dispatch | 11.50 |
Overall L1 | 17.50 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 42% |
load | 40% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 27% |
load | 26% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 12% |
load | 11% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 17% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 15% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R14),%R15D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 474abf <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x4f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VUNPCKHPD %XMM2,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM2,%XMM2,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCOMISD %XMM0,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 474b02 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x92> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD 0x118ce4(%RIP),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JA 475465 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x9f5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM0,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVSXD 0x30(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47531f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x8af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPERMILPD $0,%XMM2,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPERMILPD $0x3,%XMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP 0x118caf(%RIP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x40(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULPD 0xd8(%RBX),%XMM10,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x2f8(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x310(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD132PD 0x108(%RBX),%XMM11,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VRNDSCALEPD $0x9,%XMM8,%XMM12 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0xe,%XMM13,%XMM8,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0x1,%XMM14,%XMM8,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KNOTB %K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %XMM12,%XMM8,%XMM15{%K2}{z} | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVHPD %XMM15,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 4-12 | 0.50 |
VMOVLPD %XMM15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 4-12 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 475396 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x926> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VMOVSD %XMM5,%XMM5,%XMM7 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 474b0d <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x9d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 99 |
nb uops | 105 |
loop length | 455 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 16 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 17.50 cycles |
front end | 17.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.83 | 8.00 | 9.33 | 9.33 | 11.50 | 7.67 | 7.50 | 11.50 | 11.50 | 11.50 | 7.00 | 9.33 |
cycles | 7.83 | 8.00 | 9.33 | 9.33 | 11.50 | 7.67 | 7.50 | 11.50 | 11.50 | 11.50 | 7.00 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.37 |
Stall cycles | 0.00 |
Front-end | 17.50 |
Dispatch | 11.50 |
Overall L1 | 17.50 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 42% |
load | 40% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 27% |
load | 26% |
store | 0% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 12% |
load | 11% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 17% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 15% |
store | 12% |
mul | 18% |
add-sub | 18% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R14),%R15D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 474abf <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x4f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0x10(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VUNPCKHPD %XMM2,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM2,%XMM2,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD 0x100(%RBX),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD132SD 0xe8(%RBX),%XMM3,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x118(%RBX),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCOMISD %XMM0,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 474b02 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x92> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD 0x118ce4(%RIP),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JA 475465 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x9f5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM0,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVSXD 0x30(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 47531f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x8af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPERMILPD $0,%XMM2,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPERMILPD $0x3,%XMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP 0x118caf(%RIP),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x40(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULPD 0xd8(%RBX),%XMM10,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x2f8(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x310(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD231PD 0xf0(%RBX),%XMM9,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD132PD 0x108(%RBX),%XMM11,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VRNDSCALEPD $0x9,%XMM8,%XMM12 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCMPPD $0xe,%XMM13,%XMM8,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0x1,%XMM14,%XMM8,%K0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KNOTB %K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %XMM12,%XMM8,%XMM15{%K2}{z} | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVHPD %XMM15,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 4-12 | 0.50 |
VMOVLPD %XMM15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 4-12 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 475396 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x926> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VMOVSD %XMM5,%XMM5,%XMM7 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 474b0d <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x9d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 23.23 | 23.05 |
▼Loop 817 - einspline_spo_ref.hpp:175-176 - exec– | 0.02 | 0.02 |
▼Loop 819 - MultiBsplineRef.hpp:63-71 - exec– | 0 | 0 |
▼Loop 818 - MultiBsplineRef.hpp:64-71 - exec– | 0.01 | 0.01 |
○Loop 820 - MultiBsplineRef.hpp:68-71 - exec | 23.17 | 19.99 |
○Loop 816 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0 |