Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 24.26% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 24.26% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 931 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
[...] |
398: { *__to = *__from; } |
[...] |
436: if (__builtin_expect(_Num > 1, true)) |
437: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
930: for (; __first != __last; ++__first) |
931: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 53 - 63 |
-------------------------------------------------------------------------------- |
53: if (x < 0) |
[...] |
60: ind = static_cast<int>(x); |
61: dx = x - ind; |
62: // upper bound |
63: if (ind > nmax) |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1256 - 1256 |
-------------------------------------------------------------------------------- |
1256: { return _M_data_ptr(this->_M_impl._M_start); } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algo.h: 731 - 757 |
-------------------------------------------------------------------------------- |
731: { return std::copy(__first, __first + __n, __result); } |
[...] |
757: if (__n2 <= 0) |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 229 - 229 |
-------------------------------------------------------------------------------- |
229: return X[i]; |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
0x62de0 PUSH %RBP |
0x62de1 MOV %RSP,%RBP |
0x62de4 PUSH %R15 |
0x62de6 MOVSXD %EDX,%R15 |
0x62de9 PUSH %R14 |
0x62deb MOV %RSI,%R14 |
0x62dee PUSH %R13 |
0x62df0 PUSH %R12 |
0x62df2 MOV %RCX,%R12 |
0x62df5 PUSH %RBX |
0x62df6 MOV %RDI,%RBX |
0x62df9 AND $-0x20,%RSP |
0x62dfd SUB $0x180,%RSP |
0x62e04 MOV 0x358(%RDI),%R13 |
0x62e0b MOV %R13,%RDI |
0x62e0e CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x62e13 LEA 0x48(%RBX),%RSI |
0x62e17 LEA 0x128(%R14),%RDX |
0x62e1e CMP 0x124(%R14),%R15D |
0x62e25 JE 62e33 |
0x62e27 MOV 0x40(%R14),%RAX |
0x62e2b LEA (%R15,%R15,2),%RDX |
0x62e2f LEA (%RAX,%RDX,8),%RDX |
0x62e33 LEA 0x120(%RSP),%RDI |
0x62e3b CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> |
0x62e40 MOVSXD 0x30(%RBX),%RSI |
0x62e44 TEST %ESI,%ESI |
0x62e46 JLE 63655 |
0x62e4c MOVSXD 0x40(%RBX),%RDI |
0x62e50 MOV 0x2f8(%RBX),%R10 |
0x62e57 MOV %R13,0x20(%RSP) |
0x62e5c MOV 0x310(%RBX),%R9 |
0x62e63 MOV %RBX,0x18(%RSP) |
0x62e68 MOV %RDI,%R15 |
0x62e6b LEA -0x1(%RDI),%R14 |
0x62e6f MOV %RDI,0x70(%RSP) |
0x62e74 LEA (%R10,%RSI,8),%R11 |
0x62e78 SHR $0x2,%R15 |
0x62e7c MOV %EDI,0x90(%RSP) |
0x62e83 LEA (,%RDI,8),%R8 |
0x62e8b AND $-0x4,%RDI |
0x62e8f SAL $0x5,%R15 |
0x62e93 MOV %R8,0x50(%RSP) |
0x62e98 VMOVSD 0x130(%RSP),%XMM7 |
0x62ea1 VMOVSD 0x128(%RSP),%XMM0 |
0x62eaa MOV %R9,0x48(%RSP) |
0x62eaf VMOVSD 0x120(%RSP),%XMM1 |
0x62eb8 MOV %R10,0x58(%RSP) |
0x62ebd MOV %R11,0x28(%RSP) |
0x62ec2 MOV %R14,0x98(%RSP) |
0x62eca MOV %R15,0x108(%RSP) |
0x62ed2 MOV %RDI,0x68(%RSP) |
0x62ed7 MOV %R12,0x10(%RSP) |
0x62edc VMOVSD %XMM7,0x40(%RSP) |
0x62ee2 VMOVSD %XMM0,0x38(%RSP) |
0x62ee8 VMOVSD %XMM1,0x30(%RSP) |
(673) 0x62eee MOV 0x58(%RSP),%R12 |
(673) 0x62ef3 VMOVSD 0x30(%RSP),%XMM4 |
(673) 0x62ef9 VXORPD %XMM8,%XMM8,%XMM8 |
(673) 0x62efe MOV 0x48(%RSP),%RBX |
(673) 0x62f03 VMOVSD 0x38(%RSP),%XMM2 |
(673) 0x62f09 MOV (%R12),%RCX |
(673) 0x62f0d VMOVSD 0x40(%RSP),%XMM3 |
(673) 0x62f13 MOV (%RBX),%R15 |
(673) 0x62f16 VSUBSD 0x28(%RCX),%XMM4,%XMM5 |
(673) 0x62f1b VMULSD 0x48(%RCX),%XMM5,%XMM6 |
(673) 0x62f20 VSUBSD 0x50(%RCX),%XMM2,%XMM10 |
(673) 0x62f25 VSUBSD 0x78(%RCX),%XMM3,%XMM11 |
(673) 0x62f2a MOV 0x38(%RCX),%EAX |
(673) 0x62f2d VCOMISD %XMM6,%XMM8 |
(673) 0x62f31 JA 6372f |
(673) 0x62f37 VCVTTSD2SI %XMM6,%R13D |
(673) 0x62f3b DEC %EAX |
(673) 0x62f3d CMP %R13D,%EAX |
(673) 0x62f40 JL 63758 |
(673) 0x62f46 VROUNDSD $0xb,%XMM6,%XMM6,%XMM9 |
(673) 0x62f4c VSUBSD %XMM9,%XMM6,%XMM12 |
(673) 0x62f51 VMOVSD 0x1ac77(%RIP),%XMM7 |
(673) 0x62f59 MOVSXD %R13D,%RDX |
(673) 0x62f5c VMOVSD 0x1a6fc(%RIP),%XMM14 |
(673) 0x62f64 MOV %RDX,0x110(%RSP) |
(673) 0x62f6c VMULSD %XMM14,%XMM12,%XMM15 |
(673) 0x62f71 VMOVSD %XMM12,%XMM12,%XMM1 |
(673) 0x62f75 VMOVSD %XMM12,%XMM12,%XMM2 |
(673) 0x62f79 VMULSD %XMM12,%XMM12,%XMM13 |
(673) 0x62f7e VFNMADD132SD %XMM7,%XMM7,%XMM1 |
(673) 0x62f83 VFMADD213SD 0x1a824(%RIP),%XMM7,%XMM2 |
(673) 0x62f8c VFMADD213SD 0x1a6fb(%RIP),%XMM13,%XMM2 |
(673) 0x62f95 VSUBSD %XMM15,%XMM7,%XMM0 |
(673) 0x62f9a VFMADD213SD 0x1b16d(%RIP),%XMM12,%XMM0 |
(673) 0x62fa3 VMULSD %XMM13,%XMM15,%XMM3 |
(673) 0x62fa8 VFMADD231SD %XMM1,%XMM12,%XMM7 |
(673) 0x62fad VFMADD132SD %XMM12,%XMM14,%XMM0 |
(673) 0x62fb2 VFMADD132SD %XMM7,%XMM14,%XMM12 |
(673) 0x62fb7 VUNPCKLPD %XMM2,%XMM0,%XMM5 |
(673) 0x62fbb VUNPCKLPD %XMM3,%XMM12,%XMM4 |
(673) 0x62fbf VINSERTF128 $0x1,%XMM4,%YMM5,%YMM6 |
(673) 0x62fc5 VMULSD 0x70(%RCX),%XMM10,%XMM10 |
(673) 0x62fca VXORPD %XMM8,%XMM8,%XMM8 |
(673) 0x62fcf MOV 0x60(%RCX),%ESI |
(673) 0x62fd2 VCOMISD %XMM10,%XMM8 |
(673) 0x62fd7 JA 6371f |
(673) 0x62fdd VCVTTSD2SI %XMM10,%EDI |
(673) 0x62fe2 DEC %ESI |
(673) 0x62fe4 CMP %EDI,%ESI |
(673) 0x62fe6 JL 63748 |
(673) 0x62fec VROUNDSD $0xb,%XMM10,%XMM10,%XMM9 |
(673) 0x62ff2 VSUBSD %XMM9,%XMM10,%XMM12 |
(673) 0x62ff7 VMOVSD 0x1abd1(%RIP),%XMM14 |
(673) 0x62fff MOVSXD %EDI,%R13 |
(673) 0x63002 VMULSD 0x1a79e(%RIP),%XMM12,%XMM13 |
(673) 0x6300a VMOVSD 0x1a64e(%RIP),%XMM0 |
(673) 0x63012 VXORPD 0x1a806(%RIP),%XMM13,%XMM3 |
(673) 0x6301a VMULSD %XMM12,%XMM12,%XMM1 |
(673) 0x6301f VMOVSD %XMM12,%XMM12,%XMM7 |
(673) 0x63023 VMOVSD %XMM12,%XMM12,%XMM2 |
(673) 0x63027 VFMADD213SD 0x1a780(%RIP),%XMM14,%XMM2 |
(673) 0x63030 VFNMADD132SD %XMM14,%XMM14,%XMM7 |
(673) 0x63035 VFMADD213SD 0x1a652(%RIP),%XMM1,%XMM2 |
(673) 0x6303e VADDSD %XMM14,%XMM13,%XMM15 |
(673) 0x63043 VFMADD213SD 0x1b0c4(%RIP),%XMM12,%XMM15 |
(673) 0x6304c VMULSD %XMM1,%XMM3,%XMM4 |
(673) 0x63050 VFMADD231SD %XMM7,%XMM12,%XMM14 |
(673) 0x63055 VFMADD132SD %XMM12,%XMM0,%XMM15 |
(673) 0x6305a VFMADD132SD %XMM14,%XMM0,%XMM12 |
(673) 0x6305f VUNPCKLPD %XMM2,%XMM15,%XMM10 |
(673) 0x63063 VUNPCKLPD %XMM4,%XMM12,%XMM5 |
(673) 0x63067 VINSERTF128 $0x1,%XMM5,%YMM10,%YMM12 |
(673) 0x6306d VMULSD 0x98(%RCX),%XMM11,%XMM11 |
(673) 0x63075 VXORPD %XMM8,%XMM8,%XMM8 |
(673) 0x6307a MOV 0x88(%RCX),%EAX |
(673) 0x63080 VCOMISD %XMM11,%XMM8 |
(673) 0x63085 JA 636ed |
(673) 0x6308b VCVTTSD2SI %XMM11,%R8D |
(673) 0x63090 DEC %EAX |
(673) 0x63092 CMP %R8D,%EAX |
(673) 0x63095 JL 6376f |
(673) 0x6309b VROUNDSD $0xb,%XMM11,%XMM11,%XMM9 |
(673) 0x630a1 VSUBSD %XMM9,%XMM11,%XMM13 |
(673) 0x630a6 VMOVSD 0x1ab22(%RIP),%XMM15 |
(673) 0x630ae MOVSXD %R8D,%R9 |
(673) 0x630b1 VMULSD 0x1a6ef(%RIP),%XMM13,%XMM14 |
(673) 0x630b9 VMOVSD 0x1a59f(%RIP),%XMM0 |
(673) 0x630c1 MOV %R9,0xf0(%RSP) |
(673) 0x630c9 VXORPD 0x1a74f(%RIP),%XMM14,%XMM7 |
(673) 0x630d1 VMULSD %XMM13,%XMM13,%XMM2 |
(673) 0x630d6 VMOVSD %XMM13,%XMM13,%XMM1 |
(673) 0x630da VMOVSD %XMM13,%XMM13,%XMM5 |
(673) 0x630de VFMADD213SD 0x1a6c9(%RIP),%XMM15,%XMM5 |
(673) 0x630e7 VFNMADD132SD %XMM15,%XMM15,%XMM1 |
(673) 0x630ec VFMADD213SD 0x1a59b(%RIP),%XMM2,%XMM5 |
(673) 0x630f5 VADDSD %XMM15,%XMM14,%XMM4 |
(673) 0x630fa VFMADD213SD 0x1b00d(%RIP),%XMM13,%XMM4 |
(673) 0x63103 VMULSD %XMM2,%XMM7,%XMM3 |
(673) 0x63107 VFMADD231SD %XMM1,%XMM13,%XMM15 |
(673) 0x6310c VFMADD132SD %XMM13,%XMM0,%XMM4 |
(673) 0x63111 VFMADD132SD %XMM15,%XMM0,%XMM13 |
(673) 0x63116 VMOVSD %XMM13,0x118(%RSP) |
(673) 0x6311f CMPQ $0,0x50(%RSP) |
(673) 0x63125 MOV 0x10(%RCX),%RBX |
(673) 0x63129 VMOVAPD %YMM6,0x140(%RSP) |
(673) 0x63132 MOV 0x18(%RCX),%R12 |
(673) 0x63136 MOV 0x20(%RCX),%R14 |
(673) 0x6313a VMOVAPD %YMM12,0x160(%RSP) |
(673) 0x63143 JE 6319d |
(673) 0x63145 MOV %RCX,0x100(%RSP) |
(673) 0x6314d MOV 0x50(%RSP),%RDX |
(673) 0x63152 XOR %ESI,%ESI |
(673) 0x63154 MOV %R15,%RDI |
(673) 0x63157 VMOVSD %XMM3,0xd8(%RSP) |
(673) 0x63160 VMOVSD %XMM5,0xe0(%RSP) |
(673) 0x63169 VMOVSD %XMM4,0xe8(%RSP) |
(673) 0x63172 VZEROUPPER |
(673) 0x63175 CALL 80c0 <memset@plt> |
(673) 0x6317a MOV 0x100(%RSP),%RCX |
(673) 0x63182 VMOVSD 0xe8(%RSP),%XMM4 |
(673) 0x6318b VMOVSD 0xe0(%RSP),%XMM5 |
(673) 0x63194 VMOVSD 0xd8(%RSP),%XMM3 |
(673) 0x6319d MOV 0xf0(%RSP),%RAX |
(673) 0x631a5 MOV 0x8(%RCX),%RSI |
(673) 0x631a9 IMUL %R14,%RAX |
(673) 0x631ad CMPQ $0,0x70(%RSP) |
(673) 0x631b3 JE 63627 |
(673) 0x631b9 MOV %R13,%R8 |
(673) 0x631bc LEA 0x1(%R13),%R13 |
(673) 0x631c0 MOV %R12,%R10 |
(673) 0x631c3 MOV %RBX,0x60(%RSP) |
(673) 0x631c8 MOV 0x110(%RSP),%RDX |
(673) 0x631d0 MOV 0x90(%RSP),%ECX |
(673) 0x631d7 IMUL %R12,%R8 |
(673) 0x631db LEA 0x160(%RSP),%R11 |
(673) 0x631e3 IMUL %R12,%R13 |
(673) 0x631e7 SAL $0x4,%R10 |
(673) 0x631eb MOV %R11,0xa0(%RSP) |
(673) 0x631f3 LEA (%R12,%R12,1),%R12 |
(673) 0x631f7 IMUL %RBX,%RDX |
(673) 0x631fb AND $0x3,%ECX |
(673) 0x631fe MOV %R10,0xa8(%RSP) |
(673) 0x63206 VBROADCASTSD 0x118(%RSP),%YMM12 |
(673) 0x63210 VBROADCASTSD %XMM4,%YMM6 |
(673) 0x63215 VBROADCASTSD %XMM5,%YMM13 |
(673) 0x6321a VBROADCASTSD %XMM3,%YMM11 |
(673) 0x6321f MOV %ECX,0x94(%RSP) |
(673) 0x63226 MOV %R12,0xb0(%RSP) |
(673) 0x6322e LEA 0x140(%RSP),%RDI |
(673) 0x63236 MOV %R15,%R12 |
(673) 0x63239 MOV %RSI,%R15 |
(673) 0x6323c ADD %RDX,%R13 |
(673) 0x6323f ADD %RDX,%R8 |
(673) 0x63242 LEA (%R13,%RAX,1),%R9 |
(673) 0x63247 LEA (%R8,%RAX,1),%R8 |
(673) 0x6324b LEA (,%R14,8),%R13 |
(675) 0x63253 MOV %R9,0x100(%RSP) |
(675) 0x6325b VMOVDDUP %XMM5,%XMM10 |
(675) 0x6325f VMOVSD (%RDI),%XMM7 |
(675) 0x63263 LEA 0x180(%RSP),%RBX |
(675) 0x6326b MOV %R8,0x110(%RSP) |
(675) 0x63273 LEA (%R15,%R8,8),%RCX |
(675) 0x63277 LEA (%R15,%R9,8),%RDX |
(675) 0x6327b MOV 0xa0(%RSP),%RSI |
(675) 0x63283 MOV %R9,0x88(%RSP) |
(675) 0x6328b VMOVDDUP %XMM4,%XMM14 |
(675) 0x6328f VMOVDDUP %XMM3,%XMM15 |
(675) 0x63293 VMOVDDUP 0x118(%RSP),%XMM8 |
(675) 0x6329c MOV %R8,0x80(%RSP) |
(675) 0x632a4 MOV %RDI,0x78(%RSP) |
(675) 0x632a9 MOV %RBX,0xb8(%RSP) |
(675) 0x632b1 MOV %R15,%RBX |
(675) 0x632b4 MOV %RSI,%R15 |
(675) 0x632b7 VMOVAPD %XMM10,0xc0(%RSP) |
(675) 0x632c0 VMOVAPD %XMM8,0xf0(%RSP) |
(674) 0x632c9 CMPQ $0x2,0x98(%RSP) |
(674) 0x632d2 VMULSD (%R15),%XMM7,%XMM8 |
(674) 0x632d7 JBE 636e6 |
(674) 0x632dd LEA (%RCX,%R13,1),%R9 |
(674) 0x632e1 LEA (%R13,%RDX,1),%RDI |
(674) 0x632e6 VBROADCASTSD %XMM8,%YMM10 |
(674) 0x632eb XOR %EAX,%EAX |
(674) 0x632ed VMULSD 0x8(%R15),%XMM7,%XMM9 |
(674) 0x632f3 LEA (%R13,%R9,1),%R8 |
(674) 0x632f8 LEA (%R13,%RDI,1),%RSI |
(674) 0x632fd LEA (%R8,%R13,1),%R11 |
(674) 0x63301 LEA (%R13,%RSI,1),%R10 |
(674) 0x63306 VBROADCASTSD %XMM9,%YMM9 |
(674) 0x6330b TESTB $0x20,0x108(%RSP) |
(674) 0x63313 JE 63370 |
(674) 0x63315 VMULPD (%R9),%YMM13,%YMM2 |
(674) 0x6331a MOV $0x20,%EAX |
(674) 0x6331f CMPQ $0x20,0x108(%RSP) |
(674) 0x63328 VMULPD (%R11),%YMM11,%YMM0 |
(674) 0x6332d VFMADD231PD (%RCX),%YMM6,%YMM2 |
(674) 0x63332 VFMADD231PD (%R8),%YMM12,%YMM0 |
(674) 0x63337 VADDPD %YMM0,%YMM2,%YMM1 |
(674) 0x6333b VMULPD (%RDI),%YMM13,%YMM2 |
(674) 0x6333f VMULPD (%R10),%YMM11,%YMM0 |
(674) 0x63344 VFMADD231PD (%RDX),%YMM6,%YMM2 |
(674) 0x63349 VFMADD231PD (%RSI),%YMM12,%YMM0 |
(674) 0x6334e VFMADD213PD (%R12),%YMM10,%YMM1 |
(674) 0x63354 VADDPD %YMM0,%YMM2,%YMM2 |
(674) 0x63358 VFMADD132PD %YMM9,%YMM1,%YMM2 |
(674) 0x6335d VMOVUPD %YMM2,(%R12) |
(674) 0x63363 JE 6341c |
(674) 0x63369 NOPL (%RAX) |
(676) 0x63370 VMULPD (%R9,%RAX,1),%YMM13,%YMM1 |
(676) 0x63376 VMULPD (%R11,%RAX,1),%YMM11,%YMM0 |
(676) 0x6337c VFMADD231PD (%RCX,%RAX,1),%YMM6,%YMM1 |
(676) 0x63382 VMULPD (%R10,%RAX,1),%YMM11,%YMM2 |
(676) 0x63388 VFMADD231PD (%R8,%RAX,1),%YMM12,%YMM0 |
(676) 0x6338e VFMADD231PD (%RSI,%RAX,1),%YMM12,%YMM2 |
(676) 0x63394 VADDPD %YMM0,%YMM1,%YMM1 |
(676) 0x63398 VMULPD (%RDI,%RAX,1),%YMM13,%YMM0 |
(676) 0x6339d VFMADD231PD (%RDX,%RAX,1),%YMM6,%YMM0 |
(676) 0x633a3 VFMADD213PD (%R12,%RAX,1),%YMM10,%YMM1 |
(676) 0x633a9 VADDPD %YMM2,%YMM0,%YMM0 |
(676) 0x633ad VMULPD 0x20(%R11,%RAX,1),%YMM11,%YMM2 |
(676) 0x633b4 VFMADD231PD 0x20(%R8,%RAX,1),%YMM12,%YMM2 |
(676) 0x633bb VFMADD132PD %YMM9,%YMM1,%YMM0 |
(676) 0x633c0 VMULPD 0x20(%R9,%RAX,1),%YMM13,%YMM1 |
(676) 0x633c7 VFMADD231PD 0x20(%RCX,%RAX,1),%YMM6,%YMM1 |
(676) 0x633ce VMOVUPD %YMM0,(%R12,%RAX,1) |
(676) 0x633d4 VMULPD 0x20(%RAX,%RDI,1),%YMM13,%YMM0 |
(676) 0x633da VFMADD231PD 0x20(%RDX,%RAX,1),%YMM6,%YMM0 |
(676) 0x633e1 VADDPD %YMM2,%YMM1,%YMM1 |
(676) 0x633e5 VMULPD 0x20(%R10,%RAX,1),%YMM11,%YMM2 |
(676) 0x633ec VFMADD231PD 0x20(%RAX,%RSI,1),%YMM12,%YMM2 |
(676) 0x633f3 VFMADD213PD 0x20(%R12,%RAX,1),%YMM10,%YMM1 |
(676) 0x633fa VADDPD %YMM2,%YMM0,%YMM0 |
(676) 0x633fe VFMADD132PD %YMM9,%YMM1,%YMM0 |
(676) 0x63403 VMOVUPD %YMM0,0x20(%R12,%RAX,1) |
(676) 0x6340a ADD $0x40,%RAX |
(676) 0x6340e CMP %RAX,0x108(%RSP) |
(676) 0x63416 JNE 63370 |
(674) 0x6341c MOV 0x94(%RSP),%R9D |
(674) 0x63424 TEST %R9D,%R9D |
(674) 0x63427 JE 635ba |
(674) 0x6342d MOV 0x68(%RSP),%RAX |
(674) 0x63432 MOV 0x70(%RSP),%R8 |
(674) 0x63437 SUB %RAX,%R8 |
(674) 0x6343a MOV %R8,0xd0(%RSP) |
(674) 0x63442 CMP $0x1,%R8 |
(674) 0x63446 JE 63555 |
(674) 0x6344c MOV 0x110(%RSP),%R11 |
(674) 0x63454 LEA (%R12,%RAX,8),%R9 |
(674) 0x63458 VMOVDDUP %XMM8,%XMM1 |
(674) 0x6345d VMOVAPD 0xc0(%RSP),%XMM2 |
(674) 0x63466 VMOVAPD 0xf0(%RSP),%XMM10 |
(674) 0x6346f LEA (%R11,%R14,1),%R8 |
(674) 0x63473 LEA (%R14,%R8,1),%RDI |
(674) 0x63477 ADD %RAX,%R8 |
(674) 0x6347a LEA (%RDI,%RAX,1),%RSI |
(674) 0x6347e VMULPD (%RBX,%R8,8),%XMM2,%XMM9 |
(674) 0x63484 ADD %R14,%RDI |
(674) 0x63487 LEA (%RDI,%RAX,1),%R10 |
(674) 0x6348b MOV 0x100(%RSP),%RDI |
(674) 0x63493 MOV %RSI,0xe8(%RSP) |
(674) 0x6349b MOV %R10,0xe0(%RSP) |
(674) 0x634a3 MOV 0xe0(%RSP),%R8 |
(674) 0x634ab LEA (%RDI,%R14,1),%RSI |
(674) 0x634af MOV %RDI,%R11 |
(674) 0x634b2 LEA (%RAX,%RSI,1),%RDI |
(674) 0x634b6 VMULPD (%RBX,%R8,8),%XMM15,%XMM0 |
(674) 0x634bc ADD %R14,%RSI |
(674) 0x634bf ADD %RAX,%R11 |
(674) 0x634c2 LEA (%RAX,%RSI,1),%R10 |
(674) 0x634c6 ADD %R14,%RSI |
(674) 0x634c9 ADD %RAX,%RSI |
(674) 0x634cc MOV %RSI,0xd8(%RSP) |
(674) 0x634d4 MOV 0x110(%RSP),%RSI |
(674) 0x634dc ADD %RAX,%RSI |
(674) 0x634df VFMADD231PD (%RBX,%RSI,8),%XMM14,%XMM9 |
(674) 0x634e5 MOV 0xe8(%RSP),%RSI |
(674) 0x634ed VFMADD231PD (%RBX,%RSI,8),%XMM10,%XMM0 |
(674) 0x634f3 VMULPD (%RBX,%RDI,8),%XMM2,%XMM10 |
(674) 0x634f8 VMOVAPD 0xf0(%RSP),%XMM2 |
(674) 0x63501 VFMADD231PD (%RBX,%R11,8),%XMM14,%XMM10 |
(674) 0x63507 MOV 0xd8(%RSP),%R11 |
(674) 0x6350f VADDPD %XMM0,%XMM9,%XMM0 |
(674) 0x63513 VMULPD (%RBX,%R11,8),%XMM15,%XMM9 |
(674) 0x63519 VFMADD231PD (%RBX,%R10,8),%XMM2,%XMM9 |
(674) 0x6351f VFMADD213PD (%R9),%XMM1,%XMM0 |
(674) 0x63524 VMULSD 0x8(%R15),%XMM7,%XMM1 |
(674) 0x6352a VADDPD %XMM10,%XMM9,%XMM10 |
(674) 0x6352f VMOVDDUP %XMM1,%XMM1 |
(674) 0x63533 VFMADD132PD %XMM10,%XMM0,%XMM1 |
(674) 0x63538 VMOVUPD %XMM1,(%R9) |
(674) 0x6353d MOV 0xd0(%RSP),%R9 |
(674) 0x63545 TEST $0x1,%R9B |
(674) 0x63549 JE 635ba |
(674) 0x6354b MOV %R9,%RDI |
(674) 0x6354e AND $-0x2,%RDI |
(674) 0x63552 ADD %RDI,%RAX |
(674) 0x63555 LEA (%R14,%RAX,1),%R8 |
(674) 0x63559 LEA (%R12,%RAX,8),%R10 |
(674) 0x6355d VMULSD 0x8(%R15),%XMM7,%XMM9 |
(674) 0x63563 VMOVSD 0x118(%RSP),%XMM2 |
(674) 0x6356c LEA (%R14,%R8,1),%RSI |
(674) 0x63570 LEA (%R14,%RSI,1),%R11 |
(674) 0x63574 VMULSD (%RCX,%RSI,8),%XMM2,%XMM1 |
(674) 0x63579 VMULSD (%RCX,%R11,8),%XMM3,%XMM0 |
(674) 0x6357f VFMADD231SD (%RCX,%R8,8),%XMM5,%XMM1 |
(674) 0x63585 VMULSD (%RDX,%RSI,8),%XMM2,%XMM2 |
(674) 0x6358a VFMADD231SD (%RCX,%RAX,8),%XMM4,%XMM0 |
(674) 0x63590 VFMADD231SD (%RDX,%R8,8),%XMM5,%XMM2 |
(674) 0x63596 VADDSD %XMM0,%XMM1,%XMM10 |
(674) 0x6359a VFMADD213SD (%R10),%XMM8,%XMM10 |
(674) 0x6359f VMULSD (%RDX,%R11,8),%XMM3,%XMM8 |
(674) 0x635a5 VFMADD231SD (%RDX,%RAX,8),%XMM4,%XMM8 |
(674) 0x635ab VADDSD %XMM8,%XMM2,%XMM1 |
(674) 0x635b0 VFMADD132SD %XMM9,%XMM10,%XMM1 |
(674) 0x635b5 VMOVSD %XMM1,(%R10) |
(674) 0x635ba MOV 0xa8(%RSP),%RAX |
(674) 0x635c2 MOV 0xb0(%RSP),%R9 |
(674) 0x635ca ADD $0x10,%R15 |
(674) 0x635ce ADD %R9,0x110(%RSP) |
(674) 0x635d6 ADD %R9,0x100(%RSP) |
(674) 0x635de ADD %RAX,%RCX |
(674) 0x635e1 ADD %RAX,%RDX |
(674) 0x635e4 CMP %R15,0xb8(%RSP) |
(674) 0x635ec JNE 632c9 |
(675) 0x635f2 MOV 0x60(%RSP),%RCX |
(675) 0x635f7 MOV 0x88(%RSP),%R9 |
(675) 0x635ff MOV %RBX,%R15 |
(675) 0x63602 MOV 0x80(%RSP),%R8 |
(675) 0x6360a MOV 0x78(%RSP),%RDI |
(675) 0x6360f ADD %RCX,%R9 |
(675) 0x63612 ADD $0x8,%RDI |
(675) 0x63616 ADD %RCX,%R8 |
(675) 0x63619 CMP %RDI,0xa0(%RSP) |
(675) 0x63621 JNE 63253 |
(673) 0x63627 ADDQ $0x8,0x58(%RSP) |
(673) 0x6362d MOV 0x58(%RSP),%RDX |
(673) 0x63632 ADDQ $0x18,0x48(%RSP) |
(673) 0x63638 CMP %RDX,0x28(%RSP) |
(673) 0x6363d JNE 62eee |
0x63643 MOV 0x20(%RSP),%R13 |
0x63648 MOV 0x18(%RSP),%RBX |
0x6364d MOV 0x10(%RSP),%R12 |
0x63652 VZEROUPPER |
0x63655 MOV %R13,%RDI |
0x63658 XOR %R14D,%R14D |
0x6365b CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x63660 MOV 0x30(%RBX),%ECX |
0x63663 TEST %ECX,%ECX |
0x63665 JLE 636d7 |
(672) 0x63667 MOV %ECX,%R10D |
(672) 0x6366a LEA 0x1(%R14),%ESI |
(672) 0x6366e VMOVD 0x8(%RBX),%XMM15 |
(672) 0x63673 IMUL 0x40(%RBX),%ESI |
(672) 0x63677 IMUL %R14D,%R10D |
(672) 0x6367b VMOVD %ESI,%XMM14 |
(672) 0x6367f VMOVD %R10D,%XMM7 |
(672) 0x63684 VPMINSD %XMM15,%XMM14,%XMM3 |
(672) 0x63689 VPSUBD %XMM7,%XMM3,%XMM4 |
(672) 0x6368d VMOVD %XMM4,%EAX |
(672) 0x63691 TEST %EAX,%EAX |
(672) 0x63693 JLE 636cf |
(672) 0x63695 MOV 0x310(%RBX),%R8 |
(672) 0x6369c LEA (%R14,%R14,2),%R9 |
(672) 0x636a0 CLTQ |
(672) 0x636a2 MOVSXD %R10D,%R11 |
(672) 0x636a5 MOV 0x18(%R12),%R15 |
(672) 0x636aa LEA (%R8,%R9,8),%RDX |
(672) 0x636ae MOV (%RDX),%RSI |
(672) 0x636b1 LEA (,%RAX,8),%RDX |
(672) 0x636b9 LEA (%R15,%R11,8),%RDI |
(672) 0x636bd CMP $0x8,%RDX |
(672) 0x636c1 JE 637a1 |
(672) 0x636c7 CALL 8070 <memmove@plt> |
(672) 0x636cc MOV 0x30(%RBX),%ECX |
(672) 0x636cf INC %R14 |
(672) 0x636d2 CMP %R14D,%ECX |
(672) 0x636d5 JG 63667 |
0x636d7 LEA -0x28(%RBP),%RSP |
0x636db POP %RBX |
0x636dc POP %R12 |
0x636de POP %R13 |
0x636e0 POP %R14 |
0x636e2 POP %R15 |
0x636e4 POP %RBP |
0x636e5 RET |
(674) 0x636e6 XOR %EAX,%EAX |
(674) 0x636e8 JMP 63432 |
(673) 0x636ed VMOVSD 0x19f6b(%RIP),%XMM4 |
(673) 0x636f5 MOV 0x19f94(%RIP),%R11 |
(673) 0x636fc VXORPD %XMM3,%XMM3,%XMM3 |
(673) 0x63700 MOVQ $0,0xf0(%RSP) |
(673) 0x6370c VMOVSD %XMM4,0x118(%RSP) |
(673) 0x63715 VMOVQ %R11,%XMM5 |
(673) 0x6371a JMP 6311f |
(673) 0x6371f VMOVAPD 0x19f39(%RIP),%YMM12 |
(673) 0x63727 XOR %R13D,%R13D |
(673) 0x6372a JMP 6306d |
(673) 0x6372f MOVQ $0,0x110(%RSP) |
(673) 0x6373b VMOVAPD 0x19f1d(%RIP),%YMM6 |
(673) 0x63743 JMP 62fc5 |
(673) 0x63748 VMOVAPD 0x19f30(%RIP),%YMM12 |
(673) 0x63750 MOVSXD %ESI,%R13 |
(673) 0x63753 JMP 6306d |
(673) 0x63758 CLTQ |
(673) 0x6375a VMOVAPD 0x19f1e(%RIP),%YMM6 |
(673) 0x63762 MOV %RAX,0x110(%RSP) |
(673) 0x6376a JMP 62fc5 |
(673) 0x6376f MOV 0x19f1a(%RIP),%R10 |
(673) 0x63776 CLTQ |
(673) 0x63778 VXORPD %XMM4,%XMM4,%XMM4 |
(673) 0x6377c VMOVSD 0x19f14(%RIP),%XMM3 |
(673) 0x63784 MOV %RAX,0xf0(%RSP) |
(673) 0x6378c VMOVSD 0x19ef4(%RIP),%XMM5 |
(673) 0x63794 MOV %R10,0x118(%RSP) |
(673) 0x6379c JMP 6311f |
(672) 0x637a1 VMOVSD (%RSI),%XMM5 |
(672) 0x637a5 VMOVSD %XMM5,(%RDI) |
(672) 0x637a9 JMP 636cf |
0x637ae XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►49.57+ | qmcplusplus::SPOSet::evaluateD[...] | OhmmsVector.h:249 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:217 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::eva[...] | stl_vector.h:987 | libqmcwfs.so |
○ | main._omp_fn.1 | NonLocalPP.hpp:126 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
►48.67+ | qmcplusplus::SPOSet::evaluateD[...] | OhmmsVector.h:249 | libqmcwfs.so |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:217 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:266 | libqmcwfs.so |
○ | main._omp_fn.1 | NonLocalPP.hpp:126 | exec |
○ | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | libqmcwfs.so |
nb instructions | 75 |
nb uops | 79 |
loop length | 323 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.33 | 7.00 | 7.00 | 12.00 | 2.33 | 3.00 | 12.00 | 12.00 | 12.00 | 2.33 | 7.00 |
cycles | 3.00 | 2.33 | 7.00 | 7.00 | 12.00 | 2.33 | 3.00 | 12.00 | 12.00 | 12.00 | 2.33 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.65 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 12.00 |
Overall L1 | 13.17 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 11% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R14),%R15D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 62e33 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x53> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x120(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD 0x30(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 63655 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x875> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x40(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x130(%RSP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x128(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 636d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x8f7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | libqmcwfs.so |
nb instructions | 75 |
nb uops | 79 |
loop length | 323 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.33 | 7.00 | 7.00 | 12.00 | 2.33 | 3.00 | 12.00 | 12.00 | 12.00 | 2.33 | 7.00 |
cycles | 3.00 | 2.33 | 7.00 | 7.00 | 12.00 | 2.33 | 3.00 | 12.00 | 12.00 | 12.00 | 2.33 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.65 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 12.00 |
Overall L1 | 13.17 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 11% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8540 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R14),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R14),%R15D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 62e33 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x53> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x120(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13e40 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD 0x30(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 63655 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x875> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x40(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x130(%RSP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x128(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM7,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 8450 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 636d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x8f7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 24.26 | 20.95 |
▼Loop 673 - einspline_spo_ref.hpp:175-176 - libqmcwfs.so– | 0.02 | 0.02 |
▼Loop 675 - MultiBsplineRef.hpp:63-71 - libqmcwfs.so– | 0 | 0 |
▼Loop 674 - MultiBsplineRef.hpp:64-71 - libqmcwfs.so– | 0.01 | 0.01 |
○Loop 676 - MultiBsplineRef.hpp:68-71 - libqmcwfs.so | 24.22 | 20.31 |
○Loop 672 - einspline_spo_ref.hpp:183-187 - libqmcwfs.so | 0 | 0 |