Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 27.4% |
---|
Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 27.4% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1046 - 1169 |
-------------------------------------------------------------------------------- |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algobase.h: 235 - 924 |
-------------------------------------------------------------------------------- |
235: if (__b < __a) |
[...] |
429: const ptrdiff_t _Num = __last - __first; |
430: if (_Num) |
431: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
923: for (; __first != __last; ++__first) |
924: *__first = __tmp; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algo.h: 725 - 751 |
-------------------------------------------------------------------------------- |
725: { return std::copy(__first, __first + __n, __result); } |
[...] |
751: if (__n2 <= 0) |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
0x43bd70 PUSH %RBP |
0x43bd71 MOV %RSP,%RBP |
0x43bd74 PUSH %R15 |
0x43bd76 PUSH %R14 |
0x43bd78 PUSH %R13 |
0x43bd7a PUSH %R12 |
0x43bd7c PUSH %RBX |
0x43bd7d SUB $0x158,%RSP |
0x43bd84 MOV %RCX,-0xb8(%RBP) |
0x43bd8b MOV %EDX,%R12D |
0x43bd8e MOV %RSI,%R13 |
0x43bd91 MOV %RDI,%RBX |
0x43bd94 MOV 0x358(%RDI),%RDI |
0x43bd9b MOV %RDI,-0xc0(%RBP) |
0x43bda2 CALL 48a790 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x43bda7 MOVSXD %R12D,%RCX |
0x43bdaa LEA (%RCX,%RCX,2),%RDX |
0x43bdae SAL $0x3,%RDX |
0x43bdb2 ADD 0x40(%R13),%RDX |
0x43bdb6 LEA 0x128(%R13),%RAX |
0x43bdbd CMP %ECX,0x124(%R13) |
0x43bdc4 CMOVNE %RDX,%RAX |
0x43bdc8 MOV %RBX,-0x78(%RBP) |
0x43bdcc MOV 0x30(%RBX),%ECX |
0x43bdcf MOV %RCX,-0x80(%RBP) |
0x43bdd3 TEST %ECX,%ECX |
0x43bdd5 JLE 43c448 |
0x43bddb VMOVUPD (%RAX),%XMM0 |
0x43bddf MOV -0x78(%RBP),%RCX |
0x43bde3 VMULSD 0xd8(%RCX),%XMM0,%XMM1 |
0x43bdeb VMOVSD 0xf0(%RCX),%XMM2 |
0x43bdf3 VFMADD231SD 0x8(%RAX),%XMM2,%XMM1 |
0x43bdf9 VMOVDDUP 0x10(%RAX),%XMM2 |
0x43bdfe VFMADD231SD 0x108(%RCX),%XMM2,%XMM1 |
0x43be07 VMOVUPD 0xe0(%RCX),%XMM3 |
0x43be0f VMOVSD 0xf8(%RCX),%XMM4 |
0x43be17 VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 |
0x43be1c VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 |
0x43be24 VMULPD %XMM5,%XMM4,%XMM4 |
0x43be28 VMOVHPD 0x100(%RCX),%XMM3,%XMM3 |
0x43be30 VFMADD213PD %XMM4,%XMM0,%XMM3 |
0x43be35 VFMADD231PD 0x110(%RCX),%XMM2,%XMM3 |
0x43be3e VMOVSD 0xbc04a(%RIP),%XMM0 |
0x43be46 VCMPPD $0x1,%XMM1,%XMM0,%K1 |
0x43be4d VXORPD %XMM2,%XMM2,%XMM2 |
0x43be51 VCMPPD $0x1,%XMM2,%XMM1,%K1{%K1} |
0x43be58 VROUNDSD $0x9,%XMM1,%XMM1,%XMM4 |
0x43be5e VSUBSD %XMM4,%XMM1,%XMM1 |
0x43be62 VMOVSD %XMM2,%XMM1,%XMM1{%K1} |
0x43be68 VMOVUPD %XMM1,-0x140(%RBP) |
0x43be70 VXORPD %XMM1,%XMM1,%XMM1 |
0x43be74 VCMPPD $0x1,%XMM1,%XMM3,%K1 |
0x43be7b VROUNDSD $0x9,%XMM3,%XMM3,%XMM1 |
0x43be81 VSUBSD %XMM1,%XMM3,%XMM4 |
0x43be85 VMOVAPD %XMM4,%XMM1 |
0x43be89 VMOVSD %XMM2,%XMM1,%XMM1{%K1} |
0x43be8f KMOVD %K1,%EAX |
0x43be93 AND $0x2,%AL |
0x43be95 SHR $0x1,%AL |
0x43be97 VCMPSD $0x1,%XMM3,%XMM0,%K1 |
0x43be9e VMOVSD %XMM1,%XMM4,%XMM4{%K1} |
0x43bea4 VMOVUPD %XMM4,-0x130(%RBP) |
0x43beac VSHUFPD $0x1,%XMM3,%XMM3,%XMM1 |
0x43beb1 VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 |
0x43beb7 VSUBSD %XMM3,%XMM1,%XMM4 |
0x43bebb KMOVD %EAX,%K1 |
0x43bebf VMOVAPD %XMM4,%XMM3 |
0x43bec3 VMOVSD %XMM2,%XMM3,%XMM3{%K1} |
0x43bec9 VCMPSD $0x1,%XMM1,%XMM0,%K1 |
0x43bed0 VMOVSD %XMM3,%XMM4,%XMM4{%K1} |
0x43bed6 VMOVUPD %XMM4,-0x120(%RBP) |
0x43bede MOV 0x2f8(%RCX),%RAX |
0x43bee5 MOV %RAX,-0xd0(%RBP) |
0x43beec MOV 0x310(%RCX),%RAX |
0x43bef3 MOV %RAX,-0xc8(%RBP) |
0x43befa MOVSXD 0x40(%RCX),%R14 |
0x43befe LEA (,%R14,8),%RDX |
0x43bf06 CMP $0x1,%R14 |
0x43bf0a MOV %R14,%R15 |
0x43bf0d ADC $0,%R15 |
0x43bf11 DECQ -0x80(%RBP) |
0x43bf15 MOV %R15,%RAX |
0x43bf18 SHR $0x1,%RAX |
0x43bf1b MOV %RAX,-0xf8(%RBP) |
0x43bf22 MOV %R15,%RBX |
0x43bf25 AND $-0x2,%RBX |
0x43bf29 XOR %ECX,%ECX |
0x43bf2b VMOVSD 0xbbf65(%RIP),%XMM10 |
0x43bf33 VMOVSD 0xb90dd(%RIP),%XMM11 |
0x43bf3b VMOVDDUP 0xb90d5(%RIP),%XMM0 |
0x43bf43 VMOVUPD %XMM0,-0x110(%RBP) |
0x43bf4b MOV %RDX,-0x88(%RBP) |
0x43bf52 JMP 43bf7f |
0x43bf54 NOPW %CS:(%RAX,%RAX,1) |
(884) 0x43bf60 MOV -0xf0(%RBP),%RCX |
(884) 0x43bf67 LEA 0x1(%RCX),%RAX |
(884) 0x43bf6b CMP -0x80(%RBP),%RCX |
(884) 0x43bf6f MOV %RAX,%RCX |
(884) 0x43bf72 MOV -0x88(%RBP),%RDX |
(884) 0x43bf79 JE 43c448 |
(884) 0x43bf7f MOV -0xd0(%RBP),%RAX |
(884) 0x43bf86 MOV (%RAX,%RCX,8),%R12 |
(884) 0x43bf8a MOV %RCX,-0xf0(%RBP) |
(884) 0x43bf91 LEA (%RCX,%RCX,2),%RAX |
(884) 0x43bf95 VMOVUPD -0x140(%RBP),%XMM0 |
(884) 0x43bf9d VSUBSD 0x28(%R12),%XMM0,%XMM0 |
(884) 0x43bfa4 VMOVUPD -0x130(%RBP),%XMM1 |
(884) 0x43bfac VSUBSD 0x50(%R12),%XMM1,%XMM1 |
(884) 0x43bfb3 MOV -0xc8(%RBP),%RCX |
(884) 0x43bfba MOV (%RCX,%RAX,8),%R13 |
(884) 0x43bfbe VMOVUPD -0x120(%RBP),%XMM2 |
(884) 0x43bfc6 VSUBSD 0x78(%R12),%XMM2,%XMM2 |
(884) 0x43bfcd VMULSD 0x48(%R12),%XMM0,%XMM0 |
(884) 0x43bfd4 MOVSXD 0x38(%R12),%R9 |
(884) 0x43bfd9 VROUNDSD $0x9,%XMM0,%XMM0,%XMM14 |
(884) 0x43bfdf VSUBSD %XMM14,%XMM0,%XMM0 |
(884) 0x43bfe4 VMULSD 0x70(%R12),%XMM1,%XMM1 |
(884) 0x43bfeb MOVSXD 0x60(%R12),%RAX |
(884) 0x43bff0 MOV %RAX,-0x68(%RBP) |
(884) 0x43bff4 VROUNDSD $0x9,%XMM1,%XMM1,%XMM15 |
(884) 0x43bffa VMULSD 0x98(%R12),%XMM2,%XMM2 |
(884) 0x43c004 VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM16 |
(884) 0x43c00b MOVSXD 0x88(%R12),%R11 |
(884) 0x43c013 VMULSD %XMM0,%XMM10,%XMM3 |
(884) 0x43c017 VSUBSD %XMM3,%XMM11,%XMM4 |
(884) 0x43c01b VMULSD %XMM0,%XMM0,%XMM5 |
(884) 0x43c01f VMOVDDUP %XMM0,%XMM6 |
(884) 0x43c023 VMOVUPD -0x110(%RBP),%XMM8 |
(884) 0x43c02b VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 |
(884) 0x43c031 VMULPD %XMM4,%XMM6,%XMM4 |
(884) 0x43c035 VMOVUPD 0xbbea3(%RIP),%XMM9 |
(884) 0x43c03d VADDPD %XMM4,%XMM9,%XMM7 |
(884) 0x43c041 VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 |
(884) 0x43c045 VMOVUPD 0xbbea3(%RIP),%XMM12 |
(884) 0x43c04d VFMADD213PD %XMM12,%XMM7,%XMM6 |
(884) 0x43c052 VMOVUPD %XMM6,-0x180(%RBP) |
(884) 0x43c05a VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 |
(884) 0x43c05f VSUBSD %XMM4,%XMM11,%XMM4 |
(884) 0x43c063 VFMADD213SD %XMM11,%XMM0,%XMM4 |
(884) 0x43c068 VFMADD213SD %XMM10,%XMM0,%XMM4 |
(884) 0x43c06d VMOVSD %XMM4,-0x170(%RBP) |
(884) 0x43c075 VSUBSD %XMM15,%XMM1,%XMM0 |
(884) 0x43c07a VMULSD %XMM5,%XMM3,%XMM1 |
(884) 0x43c07e VMOVSD %XMM1,-0x168(%RBP) |
(884) 0x43c086 VMULSD %XMM0,%XMM10,%XMM1 |
(884) 0x43c08a VSUBSD %XMM1,%XMM11,%XMM3 |
(884) 0x43c08e VMULSD %XMM0,%XMM0,%XMM4 |
(884) 0x43c092 VMOVDDUP %XMM0,%XMM5 |
(884) 0x43c096 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(884) 0x43c09c VMULPD %XMM3,%XMM5,%XMM3 |
(884) 0x43c0a0 VADDPD %XMM3,%XMM9,%XMM6 |
(884) 0x43c0a4 VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 |
(884) 0x43c0a8 VFMADD213PD %XMM12,%XMM6,%XMM5 |
(884) 0x43c0ad VMOVUPD %XMM5,-0x160(%RBP) |
(884) 0x43c0b5 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(884) 0x43c0ba VSUBSD %XMM3,%XMM11,%XMM3 |
(884) 0x43c0be VFMADD213SD %XMM11,%XMM0,%XMM3 |
(884) 0x43c0c3 VFMADD213SD %XMM10,%XMM0,%XMM3 |
(884) 0x43c0c8 VMOVSD %XMM3,-0x150(%RBP) |
(884) 0x43c0d0 VMULSD %XMM4,%XMM1,%XMM0 |
(884) 0x43c0d4 VMOVSD %XMM0,-0x148(%RBP) |
(884) 0x43c0dc VSUBSD %XMM16,%XMM2,%XMM2 |
(884) 0x43c0e2 VMULSD %XMM2,%XMM10,%XMM1 |
(884) 0x43c0e6 VSUBSD %XMM1,%XMM11,%XMM12 |
(884) 0x43c0ea VFMADD213SD 0xb8f3d(%RIP),%XMM2,%XMM12 |
(884) 0x43c0f3 VMULSD %XMM2,%XMM11,%XMM0 |
(884) 0x43c0f7 VADDSD 0xbbda1(%RIP),%XMM0,%XMM13 |
(884) 0x43c0ff VMULSD %XMM2,%XMM2,%XMM3 |
(884) 0x43c103 VFMADD213SD 0xbbd9c(%RIP),%XMM3,%XMM13 |
(884) 0x43c10c MOV 0x10(%R12),%RAX |
(884) 0x43c111 MOV %RAX,-0x90(%RBP) |
(884) 0x43c118 MOV 0x18(%R12),%RAX |
(884) 0x43c11d MOV %RAX,-0x70(%RBP) |
(884) 0x43c121 MOV 0x20(%R12),%RAX |
(884) 0x43c126 MOV %RAX,-0x58(%RBP) |
(884) 0x43c12a TEST %RDX,%RDX |
(884) 0x43c12d JE 43c1e0 |
(884) 0x43c133 MOV %R13,%RDI |
(884) 0x43c136 XOR %ESI,%ESI |
(884) 0x43c138 MOV -0x88(%RBP),%RDX |
(884) 0x43c13f VMOVUPD %XMM12,-0xb0(%RBP) |
(884) 0x43c147 VMOVUPD %XMM13,-0xa0(%RBP) |
(884) 0x43c14f MOV %R9,-0x60(%RBP) |
(884) 0x43c153 MOV %R11,-0x50(%RBP) |
(884) 0x43c157 VMOVSD %XMM14,-0x48(%RBP) |
(884) 0x43c15c VMOVSD %XMM15,-0x40(%RBP) |
(884) 0x43c161 VMOVSD %XMM16,-0x38(%RBP) |
(884) 0x43c168 VMOVSD %XMM1,-0x30(%RBP) |
(884) 0x43c16d VMOVSD %XMM2,-0xe8(%RBP) |
(884) 0x43c175 VMOVSD %XMM3,-0xe0(%RBP) |
(884) 0x43c17d VMOVSD %XMM0,-0xd8(%RBP) |
(884) 0x43c185 CALL 4e63c0 <_intel_fast_memset> |
(884) 0x43c18a VMOVSD -0xd8(%RBP),%XMM0 |
(884) 0x43c192 VMOVSD -0xe0(%RBP),%XMM3 |
(884) 0x43c19a VMOVSD -0xe8(%RBP),%XMM2 |
(884) 0x43c1a2 VMOVSD -0x30(%RBP),%XMM1 |
(884) 0x43c1a7 VMOVSD -0x38(%RBP),%XMM16 |
(884) 0x43c1ae VMOVSD -0x40(%RBP),%XMM15 |
(884) 0x43c1b3 VMOVSD -0x48(%RBP),%XMM14 |
(884) 0x43c1b8 MOV -0x50(%RBP),%R11 |
(884) 0x43c1bc MOV -0x60(%RBP),%R9 |
(884) 0x43c1c0 VMOVUPD -0xa0(%RBP),%XMM13 |
(884) 0x43c1c8 VMOVUPD -0xb0(%RBP),%XMM12 |
(884) 0x43c1d0 VMOVSD 0xb8e40(%RIP),%XMM11 |
(884) 0x43c1d8 VMOVSD 0xbbcb8(%RIP),%XMM10 |
(884) 0x43c1e0 VCVTTSD2SI %XMM14,%EDX |
(884) 0x43c1e5 VCVTTSD2SI %XMM15,%ESI |
(884) 0x43c1ea VCVTTSD2SI %XMM16,%ECX |
(884) 0x43c1f0 VFMADD213SD %XMM10,%XMM2,%XMM12 |
(884) 0x43c1f5 VSUBSD %XMM0,%XMM11,%XMM0 |
(884) 0x43c1f9 VFMADD213SD %XMM11,%XMM2,%XMM0 |
(884) 0x43c1fe VFMADD213SD %XMM10,%XMM2,%XMM0 |
(884) 0x43c203 MOV 0x8(%R12),%RAX |
(884) 0x43c208 VMULSD %XMM3,%XMM1,%XMM1 |
(884) 0x43c20c VMOVDDUP %XMM13,%XMM2 |
(884) 0x43c211 VMOVDDUP %XMM12,%XMM3 |
(884) 0x43c216 VMOVDDUP %XMM0,%XMM4 |
(884) 0x43c21a VMOVDDUP %XMM1,%XMM5 |
(884) 0x43c21e MOV -0x68(%RBP),%R12 |
(884) 0x43c222 DEC %R12 |
(884) 0x43c225 MOV %ESI,%EDI |
(884) 0x43c227 SAR $0x1f,%EDI |
(884) 0x43c22a ANDN %ESI,%EDI,%ESI |
(884) 0x43c22f CMP %RSI,%R12 |
(884) 0x43c232 CMOVGE %RSI,%R12 |
(884) 0x43c236 IMUL -0x70(%RBP),%R12 |
(884) 0x43c23b DEC %R9 |
(884) 0x43c23e MOV %EDX,%ESI |
(884) 0x43c240 SAR $0x1f,%ESI |
(884) 0x43c243 ANDN %EDX,%ESI,%EDX |
(884) 0x43c248 CMP %RDX,%R9 |
(884) 0x43c24b CMOVGE %RDX,%R9 |
(884) 0x43c24f MOV -0x90(%RBP),%RDI |
(884) 0x43c256 IMUL %RDI,%R9 |
(884) 0x43c25a DEC %R11 |
(884) 0x43c25d MOV %ECX,%EDX |
(884) 0x43c25f SAR $0x1f,%EDX |
(884) 0x43c262 ANDN %ECX,%EDX,%ECX |
(884) 0x43c267 CMP %RCX,%R11 |
(884) 0x43c26a CMOVGE %RCX,%R11 |
(884) 0x43c26e MOV -0x58(%RBP),%RCX |
(884) 0x43c272 MOV %RCX,%RSI |
(884) 0x43c275 IMUL %R11,%RSI |
(884) 0x43c279 LEA (%RBX,%RSI,1),%R8 |
(884) 0x43c27d ADD %R9,%R8 |
(884) 0x43c280 LEA (%R8,%R12,1),%R10 |
(884) 0x43c284 ADD %RCX,%R8 |
(884) 0x43c287 ADD %R12,%R8 |
(884) 0x43c28a MOV %R8,-0x68(%RBP) |
(884) 0x43c28e LEA (%R10,%RCX,2),%RDX |
(884) 0x43c292 MOV %RDX,-0xa0(%RBP) |
(884) 0x43c299 LEA (%RCX,%RCX,2),%RDX |
(884) 0x43c29d MOV %R10,-0xb0(%RBP) |
(884) 0x43c2a4 ADD %R10,%RDX |
(884) 0x43c2a7 MOV %RDX,-0x60(%RBP) |
(884) 0x43c2ab ADD %R12,%R9 |
(884) 0x43c2ae LEA 0x3(%R11),%RDX |
(884) 0x43c2b2 IMUL %RCX,%RDX |
(884) 0x43c2b6 ADD %R9,%RDX |
(884) 0x43c2b9 LEA (%RAX,%RDX,8),%R8 |
(884) 0x43c2bd LEA (,%RDI,8),%RDX |
(884) 0x43c2c5 MOV %RDX,-0x30(%RBP) |
(884) 0x43c2c9 LEA 0x2(%R11),%RDX |
(884) 0x43c2cd IMUL %RCX,%RDX |
(884) 0x43c2d1 INC %R11 |
(884) 0x43c2d4 IMUL %RCX,%R11 |
(884) 0x43c2d8 MOV -0x70(%RBP),%RCX |
(884) 0x43c2dc LEA (,%RCX,8),%R10 |
(884) 0x43c2e4 ADD %R9,%RDX |
(884) 0x43c2e7 LEA (%RAX,%RDX,8),%RCX |
(884) 0x43c2eb MOV %R8,%RDX |
(884) 0x43c2ee MOV %RCX,%R8 |
(884) 0x43c2f1 ADD %R9,%R11 |
(884) 0x43c2f4 LEA (%RAX,%R11,8),%RCX |
(884) 0x43c2f8 ADD %RSI,%R9 |
(884) 0x43c2fb LEA (%RAX,%R9,8),%R12 |
(884) 0x43c2ff MOV %RCX,%R9 |
(884) 0x43c302 XOR %ECX,%ECX |
(884) 0x43c304 JMP 43c342 |
0x43c306 NOPW %CS:(%RAX,%RAX,1) |
(885) 0x43c310 MOV -0x38(%RBP),%RDI |
(885) 0x43c314 LEA 0x1(%RDI),%RCX |
(885) 0x43c318 MOV -0x58(%RBP),%RDX |
(885) 0x43c31c MOV -0x30(%RBP),%RSI |
(885) 0x43c320 ADD %RSI,%RDX |
(885) 0x43c323 MOV -0x50(%RBP),%R8 |
(885) 0x43c327 ADD %RSI,%R8 |
(885) 0x43c32a MOV -0x48(%RBP),%R9 |
(885) 0x43c32e ADD %RSI,%R9 |
(885) 0x43c331 MOV -0x40(%RBP),%R12 |
(885) 0x43c335 ADD %RSI,%R12 |
(885) 0x43c338 CMP $0x3,%RDI |
(885) 0x43c33c JE 43bf60 |
(885) 0x43c342 VMOVSD -0x180(%RBP,%RCX,8),%XMM6 |
(885) 0x43c34b MOV -0x90(%RBP),%R11 |
(885) 0x43c352 MOV %RCX,-0x38(%RBP) |
(885) 0x43c356 IMUL %RCX,%R11 |
(885) 0x43c35a MOV %R12,-0x40(%RBP) |
(885) 0x43c35e MOV %R9,-0x48(%RBP) |
(885) 0x43c362 MOV %R8,-0x50(%RBP) |
(885) 0x43c366 MOV %RDX,-0x58(%RBP) |
(885) 0x43c36a MOV %RDX,%RCX |
(885) 0x43c36d XOR %EDX,%EDX |
(885) 0x43c36f JMP 43c39d |
0x43c371 NOPW %CS:(%RAX,%RAX,1) |
(886) 0x43c380 LEA 0x1(%RDX),%RSI |
(886) 0x43c384 ADD %R10,%RCX |
(886) 0x43c387 ADD %R10,%R8 |
(886) 0x43c38a ADD %R10,%R9 |
(886) 0x43c38d ADD %R10,%R12 |
(886) 0x43c390 CMP $0x3,%RDX |
(886) 0x43c394 MOV %RSI,%RDX |
(886) 0x43c397 JE 43c310 |
(886) 0x43c39d TEST %R14D,%R14D |
(886) 0x43c3a0 JE 43c380 |
(886) 0x43c3a2 VMULSD -0x160(%RBP,%RDX,8),%XMM6,%XMM7 |
(886) 0x43c3ab CMP $0x2,%R15 |
(886) 0x43c3af JB 43c3ef |
(886) 0x43c3b1 VMOVDDUP %XMM7,%XMM8 |
(886) 0x43c3b5 MOV -0xf8(%RBP),%RSI |
(886) 0x43c3bc XOR %EDI,%EDI |
(886) 0x43c3be XCHG %AX,%AX |
(887) 0x43c3c0 VMULPD (%R12,%RDI,1),%XMM3,%XMM9 |
(887) 0x43c3c6 VFMADD231PD (%R9,%RDI,1),%XMM2,%XMM9 |
(887) 0x43c3cc VFMADD231PD (%R8,%RDI,1),%XMM4,%XMM9 |
(887) 0x43c3d2 VFMADD231PD (%RCX,%RDI,1),%XMM5,%XMM9 |
(887) 0x43c3d8 VFMADD213PD (%R13,%RDI,1),%XMM8,%XMM9 |
(887) 0x43c3df VMOVUPD %XMM9,(%R13,%RDI,1) |
(887) 0x43c3e6 ADD $0x10,%RDI |
(887) 0x43c3ea DEC %RSI |
(887) 0x43c3ed JNE 43c3c0 |
(886) 0x43c3ef CMP %R15,%RBX |
(886) 0x43c3f2 JAE 43c380 |
(886) 0x43c3f4 MOV -0x70(%RBP),%RSI |
(886) 0x43c3f8 IMUL %RDX,%RSI |
(886) 0x43c3fc ADD %R11,%RSI |
(886) 0x43c3ff MOV -0xb0(%RBP),%RDI |
(886) 0x43c406 ADD %RSI,%RDI |
(886) 0x43c409 VMULSD (%RAX,%RDI,8),%XMM12,%XMM8 |
(886) 0x43c40e MOV -0x68(%RBP),%RDI |
(886) 0x43c412 ADD %RSI,%RDI |
(886) 0x43c415 VFMADD231SD (%RAX,%RDI,8),%XMM13,%XMM8 |
(886) 0x43c41b MOV -0xa0(%RBP),%RDI |
(886) 0x43c422 ADD %RSI,%RDI |
(886) 0x43c425 VFMADD231SD (%RAX,%RDI,8),%XMM0,%XMM8 |
(886) 0x43c42b ADD -0x60(%RBP),%RSI |
(886) 0x43c42f VFMADD231SD (%RAX,%RSI,8),%XMM1,%XMM8 |
(886) 0x43c435 VFMADD213SD (%R13,%RBX,8),%XMM7,%XMM8 |
(886) 0x43c43c VMOVSD %XMM8,(%R13,%RBX,8) |
(886) 0x43c443 JMP 43c380 |
0x43c448 MOV -0xc0(%RBP),%RDI |
0x43c44f CALL 48a970 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x43c454 MOV -0x78(%RBP),%R12 |
0x43c458 MOV 0x30(%R12),%EAX |
0x43c45d TEST %EAX,%EAX |
0x43c45f MOV -0xb8(%RBP),%R15 |
0x43c466 JLE 43c4c5 |
0x43c468 XOR %EBX,%EBX |
0x43c46a XOR %R14D,%R14D |
0x43c46d JMP 43c47c |
0x43c46f NOP |
(883) 0x43c470 MOVSXD %EAX,%RCX |
(883) 0x43c473 ADD $0x18,%RBX |
(883) 0x43c477 CMP %RCX,%R14 |
(883) 0x43c47a JGE 43c4c5 |
(883) 0x43c47c MOV %R14D,%EDX |
(883) 0x43c47f IMUL %EAX,%EDX |
(883) 0x43c482 INC %R14 |
(883) 0x43c485 MOV 0x40(%R12),%ECX |
(883) 0x43c48a IMUL %R14D,%ECX |
(883) 0x43c48e MOV 0x8(%R12),%ESI |
(883) 0x43c493 CMP %ECX,%ESI |
(883) 0x43c495 CMOVL %ESI,%ECX |
(883) 0x43c498 SUB %EDX,%ECX |
(883) 0x43c49a JLE 43c470 |
(883) 0x43c49c MOV 0x310(%R12),%RAX |
(883) 0x43c4a4 MOVSXD %EDX,%RDI |
(883) 0x43c4a7 SAL $0x3,%RDI |
(883) 0x43c4ab ADD 0x18(%R15),%RDI |
(883) 0x43c4af MOV (%RAX,%RBX,1),%RSI |
(883) 0x43c4b3 MOV %ECX,%EDX |
(883) 0x43c4b5 SAL $0x3,%RDX |
(883) 0x43c4b9 CALL 4040a0 <memmove@plt> |
(883) 0x43c4be MOV 0x30(%R12),%EAX |
(883) 0x43c4c3 JMP 43c470 |
0x43c4c5 ADD $0x158,%RSP |
0x43c4cc POP %RBX |
0x43c4cd POP %R12 |
0x43c4cf POP %R13 |
0x43c4d1 POP %R14 |
0x43c4d3 POP %R15 |
0x43c4d5 POP %RBP |
0x43c4d6 RET |
0x43c4d7 MOV %RAX,%RDI |
0x43c4da CALL 40d190 <__clang_call_terminate> |
0x43c4df NOP |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 118 |
nb uops | 126 |
loop length | 588 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 11.67 | 11.67 | 11.00 | 10.67 | 9.00 | 11.00 | 11.00 | 11.00 | 9.00 | 11.67 |
cycles | 10.67 | 10.67 | 11.67 | 11.67 | 11.00 | 10.67 | 9.00 | 11.00 | 11.00 | 11.00 | 9.00 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.98-21.08 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 11.67 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 42% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 47% |
all | 29% |
load | 15% |
store | 30% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48a790 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVNE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c448 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x78(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf0(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x8(%RAX),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM3,%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xbc04a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM2,%XMM1,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM1,%XMM1{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM1,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM1,%XMM3,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM4,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM1,%XMM1{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM1,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM3,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R14,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x80(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x2,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbbf65(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb90dd(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xb90d5(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43bf7f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48a970 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43c4c5 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x755> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c47c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x70c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d190 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 118 |
nb uops | 126 |
loop length | 588 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.67 | 10.67 | 11.67 | 11.67 | 11.00 | 10.67 | 9.00 | 11.00 | 11.00 | 11.00 | 9.00 | 11.67 |
cycles | 10.67 | 10.67 | 11.67 | 11.67 | 11.00 | 10.67 | 9.00 | 11.00 | 11.00 | 11.00 | 9.00 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.98-21.08 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 11.67 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 42% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 47% |
all | 29% |
load | 15% |
store | 30% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 34% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 15% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48a790 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVNE %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c448 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x78(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0xd8(%RCX),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf0(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x8(%RAX),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%RCX),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0xf8(%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPUNPCKLQDQ 0xe8(%RCX),%XMM4,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.50 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVHPD 0x100(%RCX),%XMM3,%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VFMADD213PD %XMM4,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%RCX),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xbc04a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM2,%XMM1,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD %XMM2,%XMM1,%XMM1{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM1,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM1,%XMM3,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %XMM4,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM1,%XMM1{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVD %K1,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.25 |
SHR $0x1,%AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VCMPSD $0x1,%XMM3,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM1,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM3,%XMM3,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM1,%XMM1,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KMOVD %EAX,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD %XMM2,%XMM3,%XMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPSD $0x1,%XMM1,%XMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,%XMM4,%XMM4{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %XMM4,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x2f8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R14,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x80(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x2,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbbf65(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb90dd(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0xb90d5(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43bf7f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48a970 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43c4c5 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x755> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c47c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x70c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x158,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d190 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE– | 27.4 | 19.76 |
○Loop 883 - einspline_spo_ref.hpp:183-187 - exec | 0.01 | 0.01 |
▼Loop 884 - MultiBsplineRef.hpp:42-71 - exec– | 0.01 | 0.01 |
▼Loop 885 - MultiBsplineRef.hpp:63-71 - exec– | 0 | 0 |
▼Loop 886 - MultiBsplineRef.hpp:64-71 - exec– | 0 | 0 |
○Loop 887 - MultiBsplineRef.hpp:68-70 - exec | 27.34 | 19.2 |