Loop Id: 861 | Module: exec | Source: MultiBsplineRef.hpp:42-71 [...] | Coverage: 0.01% |
---|
Loop Id: 861 | Module: exec | Source: MultiBsplineRef.hpp:42-71 [...] | Coverage: 0.01% |
---|
0x44d980 MOV -0x98(%RBP),%RCX |
0x44d987 CMP -0x68(%RBP),%RCX |
0x44d98b LEA 0x1(%RCX),%RCX |
0x44d98f MOV -0x70(%RBP),%RDX |
0x44d993 JE 44de40 |
0x44d999 MOV -0x90(%RBP),%RAX |
0x44d9a0 MOV (%RAX,%RCX,8),%R12 |
0x44d9a4 MOV %RCX,-0x98(%RBP) |
0x44d9ab LEA (%RCX,%RCX,2),%RAX |
0x44d9af VMOVUPD -0xe0(%RBP),%XMM0 |
0x44d9b7 VSUBSD 0x28(%R12),%XMM0,%XMM0 |
0x44d9be VSUBSD 0x50(%R12),%XMM17,%XMM1 |
0x44d9c6 MOV -0x88(%RBP),%RCX |
0x44d9cd MOV (%RCX,%RAX,8),%R13 |
0x44d9d1 VSUBSD 0x78(%R12),%XMM18,%XMM2 |
0x44d9d9 VMULSD 0x48(%R12),%XMM0,%XMM0 |
0x44d9e0 MOVSXD 0x38(%R12),%R8 |
0x44d9e5 VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM28 |
0x44d9ec VSUBSD %XMM28,%XMM0,%XMM0 |
0x44d9f2 VMULSD 0x70(%R12),%XMM1,%XMM1 |
0x44d9f9 MOVSXD 0x60(%R12),%RAX |
0x44d9fe MOV %RAX,-0x48(%RBP) |
0x44da02 VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM29 |
0x44da09 VMULSD 0x98(%R12),%XMM2,%XMM2 |
0x44da13 VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM30 |
0x44da1a MOVSXD 0x88(%R12),%RAX |
0x44da22 MOV %RAX,-0x50(%RBP) |
0x44da26 VMULSD %XMM19,%XMM0,%XMM3 |
0x44da2c VSUBSD %XMM3,%XMM20,%XMM4 |
0x44da32 VMULSD %XMM0,%XMM0,%XMM5 |
0x44da36 VMOVDDUP %XMM0,%XMM6 |
0x44da3a VMOVAPD %XMM16,%XMM8 |
0x44da40 VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 |
0x44da46 VMULPD %XMM4,%XMM6,%XMM4 |
0x44da4a VADDPD %XMM21,%XMM4,%XMM7 |
0x44da50 VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 |
0x44da54 VFMADD213PD %XMM22,%XMM7,%XMM6 |
0x44da5a VMOVUPD %XMM6,-0x120(%RBP) |
0x44da62 VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 |
0x44da67 VSUBSD %XMM4,%XMM20,%XMM4 |
0x44da6d VFMADD213SD %XMM20,%XMM0,%XMM4 |
0x44da73 VFMADD213SD %XMM19,%XMM0,%XMM4 |
0x44da79 VMOVSD %XMM4,-0x110(%RBP) |
0x44da81 VSUBSD %XMM29,%XMM1,%XMM0 |
0x44da87 VMULSD %XMM5,%XMM3,%XMM1 |
0x44da8b VMOVSD %XMM1,-0x108(%RBP) |
0x44da93 VMULSD %XMM19,%XMM0,%XMM1 |
0x44da99 VSUBSD %XMM1,%XMM20,%XMM3 |
0x44da9f VMULSD %XMM0,%XMM0,%XMM4 |
0x44daa3 VMOVDDUP %XMM0,%XMM5 |
0x44daa7 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
0x44daad VMULPD %XMM3,%XMM5,%XMM3 |
0x44dab1 VADDPD %XMM21,%XMM3,%XMM6 |
0x44dab7 VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 |
0x44dabb VFMADD213PD %XMM22,%XMM6,%XMM5 |
0x44dac1 VMOVUPD %XMM5,-0x100(%RBP) |
0x44dac9 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
0x44dace VSUBSD %XMM3,%XMM20,%XMM3 |
0x44dad4 VFMADD213SD %XMM20,%XMM0,%XMM3 |
0x44dada VFMADD213SD %XMM19,%XMM0,%XMM3 |
0x44dae0 VMOVSD %XMM3,-0xf0(%RBP) |
0x44dae8 VMULSD %XMM4,%XMM1,%XMM0 |
0x44daec VMOVSD %XMM0,-0xe8(%RBP) |
0x44daf4 VSUBSD %XMM30,%XMM2,%XMM23 |
0x44dafa VMULSD %XMM19,%XMM23,%XMM31 |
0x44db00 VSUBSD %XMM31,%XMM20,%XMM26 |
0x44db06 VFMADD213SD 0xe0520(%RIP),%XMM23,%XMM26 |
0x44db10 VMULSD %XMM20,%XMM23,%XMM25 |
0x44db16 VADDSD 0xe3380(%RIP),%XMM25,%XMM27 |
0x44db20 VMULSD %XMM23,%XMM23,%XMM24 |
0x44db26 VFMADD213SD 0xe3378(%RIP),%XMM24,%XMM27 |
0x44db30 MOV 0x10(%R12),%RAX |
0x44db35 MOV %RAX,-0x30(%RBP) |
0x44db39 MOV 0x18(%R12),%RAX |
0x44db3e MOV %RAX,-0x58(%RBP) |
0x44db42 MOV 0x20(%R12),%RAX |
0x44db47 MOV %RAX,-0x38(%RBP) |
0x44db4b TEST %RDX,%RDX |
0x44db4e JE 44db66 |
0x44db50 MOV %R13,%RDI |
0x44db53 XOR %ESI,%ESI |
0x44db55 MOV -0x70(%RBP),%RDX |
0x44db59 MOV %R8,-0x40(%RBP) |
0x44db5d CALL 52be00 <__intel_avx_rep_memset> |
0x44db62 MOV -0x40(%RBP),%R8 |
0x44db66 VCVTTSD2SI %XMM28,%EDX |
0x44db6c VCVTTSD2SI %XMM29,%ESI |
0x44db72 VCVTTSD2SI %XMM30,%ECX |
0x44db78 VFMADD213SD %XMM19,%XMM23,%XMM26 |
0x44db7e VSUBSD %XMM25,%XMM20,%XMM0 |
0x44db84 VFMADD213SD %XMM20,%XMM23,%XMM0 |
0x44db8a VFMADD213SD %XMM19,%XMM23,%XMM0 |
0x44db90 MOV 0x8(%R12),%RAX |
0x44db95 VMULSD %XMM24,%XMM31,%XMM1 |
0x44db9b VMOVDDUP %XMM27,%XMM2 |
0x44dba1 VMOVDDUP %XMM26,%XMM3 |
0x44dba7 VMOVDDUP %XMM0,%XMM4 |
0x44dbab VMOVDDUP %XMM1,%XMM5 |
0x44dbaf MOV -0x50(%RBP),%R12 |
0x44dbb3 DEC %R12 |
0x44dbb6 MOV %ECX,%EDI |
0x44dbb8 SAR $0x1f,%EDI |
0x44dbbb ANDN %ECX,%EDI,%ECX |
0x44dbc0 CMP %RCX,%R12 |
0x44dbc3 CMOVGE %RCX,%R12 |
0x44dbc7 MOV -0x38(%RBP),%R9 |
0x44dbcb MOV %R9,%R11 |
0x44dbce IMUL %R12,%R11 |
0x44dbd2 MOV -0x48(%RBP),%RCX |
0x44dbd6 DEC %RCX |
0x44dbd9 MOV %ESI,%EDI |
0x44dbdb SAR $0x1f,%EDI |
0x44dbde ANDN %ESI,%EDI,%ESI |
0x44dbe3 CMP %RSI,%RCX |
0x44dbe6 CMOVGE %RSI,%RCX |
0x44dbea IMUL -0x58(%RBP),%RCX |
0x44dbef DEC %R8 |
0x44dbf2 MOV %EDX,%ESI |
0x44dbf4 SAR $0x1f,%ESI |
0x44dbf7 ANDN %EDX,%ESI,%EDX |
0x44dbfc CMP %RDX,%R8 |
0x44dbff CMOVGE %RDX,%R8 |
0x44dc03 IMUL -0x30(%RBP),%R8 |
0x44dc08 LEA 0x2(%R12),%RDX |
0x44dc0d IMUL %R9,%RDX |
0x44dc11 LEA 0x3(%R12),%RSI |
0x44dc16 IMUL %R9,%RSI |
0x44dc1a LEA (%RBX,%R8,1),%RDI |
0x44dc1e ADD %RCX,%RDI |
0x44dc21 LEA (%R9,%R11,1),%R10 |
0x44dc25 ADD %RDI,%R10 |
0x44dc28 MOV %R10,-0x50(%RBP) |
0x44dc2c LEA (%RDI,%R11,1),%R10 |
0x44dc30 MOV %R10,-0x48(%RBP) |
0x44dc34 LEA (%RDI,%RDX,1),%R10 |
0x44dc38 MOV %R10,-0x40(%RBP) |
0x44dc3c ADD %RSI,%RDI |
0x44dc3f MOV %RDI,-0xd0(%RBP) |
0x44dc46 ADD %RCX,%R8 |
0x44dc49 ADD %R8,%RSI |
0x44dc4c LEA (%RAX,%RSI,8),%RSI |
0x44dc50 MOV -0x30(%RBP),%RCX |
0x44dc54 LEA (,%RCX,8),%RDI |
0x44dc5c MOV %RDI,-0xa0(%RBP) |
0x44dc63 INC %R12 |
0x44dc66 IMUL %R9,%R12 |
0x44dc6a MOV -0x58(%RBP),%RCX |
0x44dc6e LEA (,%RCX,8),%R10 |
0x44dc76 ADD %R8,%RDX |
0x44dc79 LEA (%RAX,%RDX,8),%RDX |
0x44dc7d ADD %R8,%R12 |
0x44dc80 LEA (%RAX,%R12,8),%R9 |
0x44dc84 ADD %R11,%R8 |
0x44dc87 LEA (%RAX,%R8,8),%R12 |
0x44dc8b XOR %ECX,%ECX |
0x44dc8d JMP 44dd01 |
(862) 0x44dcc0 MOV -0x38(%RBP),%RSI |
(862) 0x44dcc4 MOV -0xa0(%RBP),%RCX |
(862) 0x44dccb ADD %RCX,%RSI |
(862) 0x44dcce MOV -0xc0(%RBP),%RDX |
(862) 0x44dcd5 ADD %RCX,%RDX |
(862) 0x44dcd8 MOV -0xb8(%RBP),%R9 |
(862) 0x44dcdf ADD %RCX,%R9 |
(862) 0x44dce2 MOV -0xb0(%RBP),%R12 |
(862) 0x44dce9 ADD %RCX,%R12 |
(862) 0x44dcec MOV -0xa8(%RBP),%RCX |
(862) 0x44dcf3 CMP $0x3,%RCX |
(862) 0x44dcf7 LEA 0x1(%RCX),%RCX |
(862) 0x44dcfb JE 44d980 |
(862) 0x44dd01 VMOVSD -0x120(%RBP,%RCX,8),%XMM6 |
(862) 0x44dd0a MOV -0x30(%RBP),%R11 |
(862) 0x44dd0e MOV %RCX,-0xa8(%RBP) |
(862) 0x44dd15 IMUL %RCX,%R11 |
(862) 0x44dd19 MOV %R12,-0xb0(%RBP) |
(862) 0x44dd20 MOV %R9,-0xb8(%RBP) |
(862) 0x44dd27 MOV %RDX,-0xc0(%RBP) |
(862) 0x44dd2e MOV %RDX,%RCX |
(862) 0x44dd31 MOV %RSI,-0x38(%RBP) |
(862) 0x44dd35 MOV %RSI,%RDX |
(862) 0x44dd38 XOR %ESI,%ESI |
(862) 0x44dd3a JMP 44dd5a |
(863) 0x44dd40 ADD %R10,%RDX |
(863) 0x44dd43 ADD %R10,%RCX |
(863) 0x44dd46 ADD %R10,%R9 |
(863) 0x44dd49 ADD %R10,%R12 |
(863) 0x44dd4c CMP $0x3,%RSI |
(863) 0x44dd50 LEA 0x1(%RSI),%RSI |
(863) 0x44dd54 JE 44dcc0 |
(863) 0x44dd5a TEST %R14D,%R14D |
(863) 0x44dd5d JE 44dd40 |
(863) 0x44dd5f VMULSD -0x100(%RBP,%RSI,8),%XMM6,%XMM7 |
(863) 0x44dd68 CMP $0x2,%R15 |
(863) 0x44dd6c JB 44ddaf |
(863) 0x44dd6e VMOVDDUP %XMM7,%XMM8 |
(863) 0x44dd72 MOV -0xc8(%RBP),%RDI |
(863) 0x44dd79 XOR %R8D,%R8D |
(863) 0x44dd7c NOPL (%RAX) |
(864) 0x44dd80 VMULPD (%R12,%R8,1),%XMM3,%XMM9 |
(864) 0x44dd86 VFMADD231PD (%R9,%R8,1),%XMM2,%XMM9 |
(864) 0x44dd8c VFMADD231PD (%RCX,%R8,1),%XMM4,%XMM9 |
(864) 0x44dd92 VFMADD231PD (%RDX,%R8,1),%XMM5,%XMM9 |
(864) 0x44dd98 VFMADD213PD (%R13,%R8,1),%XMM8,%XMM9 |
(864) 0x44dd9f VMOVUPD %XMM9,(%R13,%R8,1) |
(864) 0x44dda6 ADD $0x10,%R8 |
(864) 0x44ddaa DEC %RDI |
(864) 0x44ddad JNE 44dd80 |
(863) 0x44ddaf CMP %R15,%RBX |
(863) 0x44ddb2 JAE 44dd40 |
(863) 0x44ddb4 MOV -0x58(%RBP),%RDI |
(863) 0x44ddb8 IMUL %RSI,%RDI |
(863) 0x44ddbc ADD %R11,%RDI |
(863) 0x44ddbf MOV -0x48(%RBP),%R8 |
(863) 0x44ddc3 ADD %RDI,%R8 |
(863) 0x44ddc6 VMULSD (%RAX,%R8,8),%XMM26,%XMM8 |
(863) 0x44ddcd MOV -0x50(%RBP),%R8 |
(863) 0x44ddd1 ADD %RDI,%R8 |
(863) 0x44ddd4 VFMADD231SD (%RAX,%R8,8),%XMM27,%XMM8 |
(863) 0x44dddb MOV -0x40(%RBP),%R8 |
(863) 0x44dddf ADD %RDI,%R8 |
(863) 0x44dde2 VFMADD231SD (%RAX,%R8,8),%XMM0,%XMM8 |
(863) 0x44dde8 ADD -0xd0(%RBP),%RDI |
(863) 0x44ddef VFMADD231SD (%RAX,%RDI,8),%XMM1,%XMM8 |
(863) 0x44ddf5 VFMADD213SD (%R13,%RBX,8),%XMM7,%XMM8 |
(863) 0x44ddfc VMOVSD %XMM8,(%R13,%RBX,8) |
(863) 0x44de03 JMP 44dd40 |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1169 - 1169 |
-------------------------------------------------------------------------------- |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algobase.h: 923 - 924 |
-------------------------------------------------------------------------------- |
923: for (; __first != __last; ++__first) |
924: *__first = __tmp; |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 175 - 176 |
-------------------------------------------------------------------------------- |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 2.72 |
CQA speedup if fully vectorized | 7.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.11 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE |
Source | MultiBsplineData.hpp:54-57,stl_vector.h:1169-1169,stl_algobase.h:923-924,MultiBsplineEvalHelper.hpp:45-47,einspline_spo_ref.hpp:175-176,MultiBsplineRef.hpp:42-44,MultiBsplineRef.hpp:47-49,MultiBsplineRef.hpp:56-58,MultiBsplineRef.hpp:67-67,MultiBsplineRef.hpp:70-71 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 27.17 |
CQA cycles if no scalar integer | 23.00 |
CQA cycles if FP arith vectorized | 10.00 |
CQA cycles if fully vectorized | 3.68 |
Front-end cycles | 27.17 |
DIV/SQRT cycles | 24.50 |
P0 cycles | 24.00 |
P1 cycles | 11.00 |
P2 cycles | 11.00 |
P3 cycles | 9.50 |
P4 cycles | 22.50 |
P5 cycles | 12.00 |
P6 cycles | 9.50 |
P7 cycles | 9.50 |
P8 cycles | 9.50 |
P9 cycles | 12.00 |
P10 cycles | 11.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 30.60 - 30.61 |
Stall cycles (UFS) | 2.93 - 2.97 |
Nb insns | 156.00 |
Nb uops | 163.00 |
Nb loads | 33.00 |
Nb stores | 18.00 |
Nb stack references | 20.00 |
FLOP/cycle | 2.21 |
Nb FLOP add-sub | 17.00 |
Nb FLOP mul | 17.00 |
Nb FLOP fma | 13.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 15.46 |
Bytes prefetched | 0.00 |
Bytes loaded | 260.00 |
Bytes stored | 160.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 16.28 |
Vectorization ratio load | 8.33 |
Vectorization ratio store | 11.11 |
Vectorization ratio mul | 13.33 |
Vectorization ratio add_sub | 12.50 |
Vectorization ratio fma | 18.18 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 20.83 |
Vector-efficiency ratio all | 14.53 |
Vector-efficiency ratio load | 13.54 |
Vector-efficiency ratio store | 13.89 |
Vector-efficiency ratio mul | 14.17 |
Vector-efficiency ratio add_sub | 14.06 |
Vector-efficiency ratio fma | 14.77 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.10 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 2.72 |
CQA speedup if fully vectorized | 7.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.11 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE |
Source | MultiBsplineData.hpp:54-57,stl_vector.h:1169-1169,stl_algobase.h:923-924,MultiBsplineEvalHelper.hpp:45-47,einspline_spo_ref.hpp:175-176,MultiBsplineRef.hpp:42-44,MultiBsplineRef.hpp:47-49,MultiBsplineRef.hpp:56-58,MultiBsplineRef.hpp:67-67,MultiBsplineRef.hpp:70-71 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 27.17 |
CQA cycles if no scalar integer | 23.00 |
CQA cycles if FP arith vectorized | 10.00 |
CQA cycles if fully vectorized | 3.68 |
Front-end cycles | 27.17 |
DIV/SQRT cycles | 24.50 |
P0 cycles | 24.00 |
P1 cycles | 11.00 |
P2 cycles | 11.00 |
P3 cycles | 9.50 |
P4 cycles | 22.50 |
P5 cycles | 12.00 |
P6 cycles | 9.50 |
P7 cycles | 9.50 |
P8 cycles | 9.50 |
P9 cycles | 12.00 |
P10 cycles | 11.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 30.60 - 30.61 |
Stall cycles (UFS) | 2.93 - 2.97 |
Nb insns | 156.00 |
Nb uops | 163.00 |
Nb loads | 33.00 |
Nb stores | 18.00 |
Nb stack references | 20.00 |
FLOP/cycle | 2.21 |
Nb FLOP add-sub | 17.00 |
Nb FLOP mul | 17.00 |
Nb FLOP fma | 13.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 15.46 |
Bytes prefetched | 0.00 |
Bytes loaded | 260.00 |
Bytes stored | 160.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 16.28 |
Vectorization ratio load | 8.33 |
Vectorization ratio store | 11.11 |
Vectorization ratio mul | 13.33 |
Vectorization ratio add_sub | 12.50 |
Vectorization ratio fma | 18.18 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 20.83 |
Vector-efficiency ratio all | 14.53 |
Vector-efficiency ratio load | 13.54 |
Vector-efficiency ratio store | 13.89 |
Vector-efficiency ratio mul | 14.17 |
Vector-efficiency ratio add_sub | 14.06 |
Vector-efficiency ratio fma | 14.77 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.10 |
Path / |
Function | _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE |
Source file and lines | MultiBsplineRef.hpp:42-71 |
Module | exec |
nb instructions | 156 |
nb uops | 163 |
loop length | 783 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 25 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 20 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 27.17 cycles |
front end | 27.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 24.50 | 24.00 | 11.00 | 11.00 | 9.50 | 22.50 | 12.00 | 9.50 | 9.50 | 9.50 | 12.00 | 11.00 |
cycles | 24.50 | 24.00 | 11.00 | 11.00 | 9.50 | 22.50 | 12.00 | 9.50 | 9.50 | 9.50 | 12.00 | 11.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.60-30.61 |
Stall cycles | 2.93-2.97 |
RS full (events) | 7.51-7.64 |
Front-end | 27.17 |
Dispatch | 24.50 |
Overall L1 | 27.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 21% |
load | 10% |
store | 33% |
mul | 13% |
add-sub | 13% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
all | 16% |
load | 8% |
store | 11% |
mul | 13% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 13% |
store | 16% |
mul | 14% |
add-sub | 14% |
fma | 14% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 13% |
store | 13% |
mul | 14% |
add-sub | 14% |
fma | 14% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP -0x68(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x70(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 44de40 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD -0xe0(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VSUBSD 0x28(%R12),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VSUBSD 0x50(%R12),%XMM17,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD 0x78(%R12),%XMM18,%XMM2 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD 0x48(%R12),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD 0x38(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM28 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM28,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x70(%R12),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD 0x60(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM29 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULSD 0x98(%R12),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM30 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
MOVSXD 0x88(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM19,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM20,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD %XMM16,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %XMM4,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %XMM21,%XMM4,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD213PD %XMM22,%XMM7,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM6,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM4,%XMM20,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM29,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM19,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM1,%XMM20,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %XMM3,%XMM5,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %XMM21,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD213PD %XMM22,%XMM6,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM5,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM3,%XMM20,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM4,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM30,%XMM2,%XMM23 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM19,%XMM23,%XMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM31,%XMM20,%XMM26 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD 0xe0520(%RIP),%XMM23,%XMM26 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD %XMM20,%XMM23,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0xe3380(%RIP),%XMM25,%XMM27 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM23,%XMM23,%XMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD 0xe3378(%RIP),%XMM24,%XMM27 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 44db66 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x3e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x70(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 52be00 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCVTTSD2SI %XMM28,%EDX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VCVTTSD2SI %XMM29,%ESI | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VCVTTSD2SI %XMM30,%ECX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VFMADD213SD %XMM19,%XMM23,%XMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM25,%XMM20,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM23,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM23,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x8(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM24,%XMM31,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM27,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM26,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM0,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM1,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ECX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %ECX,%EDI,%ECX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RCX,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ESI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %ESI,%EDI,%ESI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RSI,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL -0x58(%RBP),%RCX | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
DEC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %EDX,%ESI,%EDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RDX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RDX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL -0x30(%RBP),%R8 | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x2(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x3(%R12),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%R8,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R9,%R11,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R11,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%RDX,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RSI,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R8,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R12,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R11,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R8,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 44dd01 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x581> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE |
Source file and lines | MultiBsplineRef.hpp:42-71 |
Module | exec |
nb instructions | 156 |
nb uops | 163 |
loop length | 783 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 25 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 20 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 27.17 cycles |
front end | 27.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 24.50 | 24.00 | 11.00 | 11.00 | 9.50 | 22.50 | 12.00 | 9.50 | 9.50 | 9.50 | 12.00 | 11.00 |
cycles | 24.50 | 24.00 | 11.00 | 11.00 | 9.50 | 22.50 | 12.00 | 9.50 | 9.50 | 9.50 | 12.00 | 11.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.60-30.61 |
Stall cycles | 2.93-2.97 |
RS full (events) | 7.51-7.64 |
Front-end | 27.17 |
Dispatch | 24.50 |
Overall L1 | 27.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 21% |
load | 10% |
store | 33% |
mul | 13% |
add-sub | 13% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
all | 16% |
load | 8% |
store | 11% |
mul | 13% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 13% |
store | 16% |
mul | 14% |
add-sub | 14% |
fma | 14% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 13% |
store | 13% |
mul | 14% |
add-sub | 14% |
fma | 14% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP -0x68(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x70(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 44de40 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD -0xe0(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VSUBSD 0x28(%R12),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VSUBSD 0x50(%R12),%XMM17,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD 0x78(%R12),%XMM18,%XMM2 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD 0x48(%R12),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD 0x38(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VRNDSCALESD $0x9,%XMM0,%XMM0,%XMM28 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM28,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD 0x70(%R12),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOVSXD 0x60(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM29 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULSD 0x98(%R12),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM2,%XMM2,%XMM30 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
MOVSXD 0x88(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM19,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM3,%XMM20,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM0,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD %XMM16,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %XMM4,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %XMM21,%XMM4,%XMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPUNPCKLQDQ %XMM5,%XMM6,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD213PD %XMM22,%XMM7,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM6,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM4,%XMM20,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM29,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM19,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM1,%XMM20,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %XMM3,%XMM5,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %XMM21,%XMM3,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPUNPCKLQDQ %XMM4,%XMM5,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD213PD %XMM22,%XMM6,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %XMM5,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM3,%XMM20,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM3,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM4,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM30,%XMM2,%XMM23 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM19,%XMM23,%XMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM31,%XMM20,%XMM26 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD 0xe0520(%RIP),%XMM23,%XMM26 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD %XMM20,%XMM23,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0xe3380(%RIP),%XMM25,%XMM27 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM23,%XMM23,%XMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD 0xe3378(%RIP),%XMM24,%XMM27 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 44db66 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x3e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x70(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 52be00 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCVTTSD2SI %XMM28,%EDX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VCVTTSD2SI %XMM29,%ESI | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VCVTTSD2SI %XMM30,%ECX | 2 | 1.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VFMADD213SD %XMM19,%XMM23,%XMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM25,%XMM20,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213SD %XMM20,%XMM23,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213SD %XMM19,%XMM23,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x8(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM24,%XMM31,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDDUP %XMM27,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM26,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM0,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDDUP %XMM1,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ECX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %ECX,%EDI,%ECX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RCX,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ESI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %ESI,%EDI,%ESI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RSI,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL -0x58(%RBP),%RCX | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
DEC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x1f,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ANDN %EDX,%ESI,%EDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1-2 | 0.33 |
CMP %RDX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %RDX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL -0x30(%RBP),%R8 | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x2(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x3(%R12),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%R8,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R9,%R11,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%R11,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDI,%RDX,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RSI,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R8,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R12,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R11,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R8,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 44dd01 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x581> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |