Loop Id: 491 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.03% |
---|
Loop Id: 491 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.03% |
---|
0x4fa20 VMOVAPD (%RSI,%RDX,2),%YMM10 [9] |
0x4fa25 VMOVAPD 0x20(%RSI,%RDX,2),%YMM9 [9] |
0x4fa2b KMOVB %K1,%K2 |
0x4fa2f KMOVB %K1,%K5 |
0x4fa33 KMOVB %K1,%K6 |
0x4fa37 KMOVB %K1,%K3 |
0x4fa3b VMOVAPD 0x480(%RSP),%YMM8 [17] |
0x4fa44 KMOVB %K1,%K4 |
0x4fa48 KMOVB %K1,%K7 |
0x4fa4c VMOVAPD 0x3e0(%RSP),%YMM24 [17] |
0x4fa54 VMULPD %YMM18,%YMM10,%YMM0 |
0x4fa5a VMOVDQA (%RDI,%RDX,1),%YMM4 [2] |
0x4fa5f ADD $0x20,%RDX |
0x4fa63 VMULPD %YMM18,%YMM9,%YMM15 |
0x4fa69 VRNDSCALEPD $0xb,%YMM0,%YMM7 |
0x4fa70 VCVTTPD2DQ %YMM0,%XMM5 |
0x4fa74 VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} [10] |
0x4fa7b KMOVB %K1,%K2 |
0x4fa7f VRNDSCALEPD $0xb,%YMM15,%YMM2 |
0x4fa86 VCVTTPD2DQ %YMM15,%XMM3 |
0x4fa8b VINSERTI128 $0x1,%XMM3,%YMM5,%YMM6 |
0x4fa91 VPADDD 0x38747(%RIP),%YMM6,%YMM13 [12] |
0x4fa99 VPADDD 0x3875f(%RIP),%YMM6,%YMM14 [12] |
0x4faa1 VPERM2I128 $0x11,%YMM6,%YMM6,%YMM11 |
0x4faa7 VPADDD 0x38771(%RIP),%YMM6,%YMM6 [12] |
0x4faaf VGATHERDPD (%RAX,%XMM11,8),%YMM21{%K3} [1] |
0x4fab6 KMOVB %K1,%K3 |
0x4faba VGATHERDPD (%RAX,%XMM14,8),%YMM12{%K6} [4] |
0x4fac1 VGATHERDPD (%RAX,%XMM13,8),%YMM23{%K4} [3] |
0x4fac8 KMOVB %K1,%K4 |
0x4facc KMOVB %K1,%K6 |
0x4fad0 VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} [11] |
0x4fad7 KMOVB %K1,%K2 |
0x4fadb VSUBPD %YMM7,%YMM0,%YMM0 |
0x4fadf VPERM2I128 $0x11,%YMM13,%YMM13,%YMM7 |
0x4fae5 VSUBPD %YMM2,%YMM15,%YMM1 |
0x4fae9 VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} [16] |
0x4faf0 VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 |
0x4faf6 VMOVAPD 0x4c0(%RSP),%YMM6 [17] |
0x4faff KMOVB %K1,%K5 |
0x4fb03 VPERM2I128 $0x11,%YMM14,%YMM14,%YMM15 |
0x4fb09 VMOVAPD 0x4a0(%RSP),%YMM14 [17] |
0x4fb12 VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} [15] |
0x4fb19 KMOVB %K1,%K3 |
0x4fb1d VMULPD %YMM0,%YMM0,%YMM3 |
0x4fb21 VGATHERDPD (%RAX,%XMM15,8),%YMM11{%K7} [13] |
0x4fb28 VMOVAPD %YMM6,%YMM7 |
0x4fb2c VMOVAPD 0x460(%RSP),%YMM15 [17] |
0x4fb35 KMOVB %K1,%K7 |
0x4fb39 VFMADD132PD %YMM0,%YMM14,%YMM7 |
0x4fb3e VFMADD132PD %YMM1,%YMM14,%YMM6 |
0x4fb43 VMOVAPD %YMM8,%YMM14 |
0x4fb48 VFMADD132PD %YMM0,%YMM15,%YMM14 |
0x4fb4d VFMADD132PD %YMM1,%YMM15,%YMM8 |
0x4fb52 VMOVAPD 0x440(%RSP),%YMM15 [17] |
0x4fb5b VMULPD %YMM1,%YMM1,%YMM2 |
0x4fb5f VMOVAPD %YMM15,%YMM19 |
0x4fb65 VMULPD %YMM3,%YMM0,%YMM17 |
0x4fb6b VMULPD %YMM8,%YMM5,%YMM8 |
0x4fb70 VMULPD %YMM14,%YMM23,%YMM14 |
0x4fb76 VMULPD %YMM2,%YMM1,%YMM16 |
0x4fb7c VFMADD132PD %YMM21,%YMM8,%YMM6 |
0x4fb82 VMOVAPD 0x400(%RSP),%YMM8 [17] |
0x4fb8b VFMADD132PD %YMM22,%YMM14,%YMM7 |
0x4fb91 VMOVAPD 0x420(%RSP),%YMM14 [17] |
0x4fb9a VFMADD132PD %YMM0,%YMM14,%YMM19 |
0x4fba0 VFMADD132PD %YMM1,%YMM14,%YMM15 |
0x4fba5 VMOVAPD %YMM8,%YMM14 |
0x4fbaa VFMADD132PD %YMM0,%YMM24,%YMM14 |
0x4fbb0 VFMADD132PD %YMM1,%YMM24,%YMM8 |
0x4fbb6 VMULPD %YMM14,%YMM20,%YMM14 |
0x4fbbc VMULPD %YMM8,%YMM13,%YMM8 |
0x4fbc1 VFMADD231PD %YMM19,%YMM12,%YMM14 |
0x4fbc7 VMOVDQA32 %XMM4,%XMM19 |
0x4fbcd VFMADD231PD %YMM15,%YMM11,%YMM8 |
0x4fbd2 VMOVAPD 0x3c0(%RSP),%YMM15 [17] |
0x4fbdb VADDPD %YMM14,%YMM7,%YMM7 |
0x4fbe0 VADDPD %YMM8,%YMM6,%YMM6 |
0x4fbe5 VMULPD %YMM15,%YMM7,%YMM14 |
0x4fbea VBROADCASTSD 0x3808d(%RIP),%YMM7 [12] |
0x4fbf3 VMULPD %YMM15,%YMM6,%YMM8 |
0x4fbf8 VDIVPD %YMM10,%YMM7,%YMM10 |
0x4fbfd VSCATTERDPD %YMM14,(%R11,%XMM4,8){%K4} [6] |
0x4fc04 VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 |
0x4fc0a VSCATTERDPD %YMM8,(%R11,%XMM4,8){%K5} [5] |
0x4fc11 VMOVAPD 0x380(%RSP),%YMM6 [17] |
0x4fc1a VMOVAPD 0x360(%RSP),%YMM15 [17] |
0x4fc23 VMOVAPD 0x3a0(%RSP),%YMM14 [17] |
0x4fc2c VMOVAPD 0x320(%RSP),%YMM8 [17] |
0x4fc35 VDIVPD %YMM9,%YMM7,%YMM9 |
0x4fc3a VMOVAPD %YMM6,%YMM7 |
0x4fc3e VFMADD132PD %YMM0,%YMM15,%YMM7 |
0x4fc43 VFMADD132PD %YMM1,%YMM15,%YMM6 |
0x4fc48 VMOVAPD 0x300(%RSP),%YMM15 [17] |
0x4fc51 VFMADD231PD %YMM14,%YMM3,%YMM7 |
0x4fc56 VFMADD231PD %YMM14,%YMM2,%YMM6 |
0x4fc5b VMOVAPD %YMM8,%YMM14 |
0x4fc60 VFMADD132PD %YMM0,%YMM15,%YMM14 |
0x4fc65 VFMADD132PD %YMM1,%YMM15,%YMM8 |
0x4fc6a VMOVAPD 0x340(%RSP),%YMM15 [17] |
0x4fc73 VMULPD %YMM18,%YMM10,%YMM10 |
0x4fc79 VFMADD231PD %YMM15,%YMM3,%YMM14 |
0x4fc7e VFMADD231PD %YMM15,%YMM2,%YMM8 |
0x4fc83 VMULPD %YMM14,%YMM23,%YMM14 |
0x4fc89 VMULPD %YMM8,%YMM5,%YMM8 |
0x4fc8e VFMADD132PD %YMM22,%YMM14,%YMM7 |
0x4fc94 VMOVAPD 0x2c0(%RSP),%YMM14 [17] |
0x4fc9d VMOVAPD 0x2a0(%RSP),%YMM15 [17] |
0x4fca6 VFMADD132PD %YMM21,%YMM8,%YMM6 |
0x4fcac VMOVAPD 0x2e0(%RSP),%YMM8 [17] |
0x4fcb5 VMOVAPD %YMM14,%YMM24 |
0x4fcbb VFMADD132PD %YMM1,%YMM15,%YMM14 |
0x4fcc0 VFMADD132PD %YMM0,%YMM15,%YMM24 |
0x4fcc6 VMOVAPD 0x280(%RSP),%YMM15 [17] |
0x4fccf VMULPD %YMM18,%YMM9,%YMM9 |
0x4fcd5 VFMADD231PD %YMM8,%YMM2,%YMM14 |
0x4fcda VFMADD231PD %YMM8,%YMM3,%YMM24 |
0x4fce0 VMOVAPD %YMM15,%YMM8 |
0x4fce5 VFMADD213PD 0x4e0(%RSP),%YMM0,%YMM8 [17] |
0x4fcef VFMADD213PD 0x4e0(%RSP),%YMM1,%YMM15 [17] |
0x4fcf9 VFMADD231PD 0x500(%RSP),%YMM3,%YMM8 [17] |
0x4fd03 VFMADD231PD 0x500(%RSP),%YMM2,%YMM15 [17] |
0x4fd0d VMULPD %YMM8,%YMM20,%YMM8 |
0x4fd13 VMULPD %YMM15,%YMM13,%YMM15 |
0x4fd18 VFMADD231PD %YMM24,%YMM12,%YMM8 |
0x4fd1e VFMADD132PD %YMM11,%YMM15,%YMM14 |
0x4fd23 VADDPD %YMM8,%YMM7,%YMM7 |
0x4fd28 VADDPD %YMM14,%YMM6,%YMM6 |
0x4fd2d VMULPD %YMM10,%YMM7,%YMM10 |
0x4fd32 VMULPD %YMM9,%YMM6,%YMM9 |
0x4fd37 VSCATTERDPD %YMM10,(%R10,%XMM19,8){%K6} [18] |
0x4fd3e VSCATTERDPD %YMM9,(%R10,%XMM4,8){%K7} [8] |
0x4fd45 VMOVAPD 0x240(%RSP),%YMM14 [17] |
0x4fd4e VMOVAPD 0x220(%RSP),%YMM7 [17] |
0x4fd57 VMOVAPD 0x260(%RSP),%YMM15 [17] |
0x4fd60 VMOVAPD 0x200(%RSP),%YMM10 [17] |
0x4fd69 VMULPD %YMM14,%YMM3,%YMM8 |
0x4fd6e VMOVAPD %YMM7,%YMM9 |
0x4fd72 VMULPD %YMM14,%YMM2,%YMM6 |
0x4fd77 VFMADD132PD %YMM0,%YMM10,%YMM9 |
0x4fd7c VMOVAPD 0x1c0(%RSP),%YMM14 [17] |
0x4fd85 VFMADD132PD %YMM1,%YMM10,%YMM7 |
0x4fd8a VMULPD %YMM14,%YMM3,%YMM10 |
0x4fd8f VMULPD %YMM14,%YMM2,%YMM14 |
0x4fd94 VFMADD231PD %YMM15,%YMM17,%YMM8 |
0x4fd9a VFMADD231PD %YMM15,%YMM16,%YMM6 |
0x4fda0 VMOVAPD 0x180(%RSP),%YMM15 [17] |
0x4fda9 VADDPD %YMM8,%YMM9,%YMM9 |
0x4fdae VMOVAPD 0x1e0(%RSP),%YMM8 [17] |
0x4fdb7 VADDPD %YMM6,%YMM7,%YMM7 |
0x4fdbb VFMADD231PD %YMM8,%YMM17,%YMM10 |
0x4fdc1 VFMADD231PD %YMM8,%YMM16,%YMM14 |
0x4fdc7 VMOVAPD 0x1a0(%RSP),%YMM8 [17] |
0x4fdd0 VMOVAPD %YMM8,%YMM6 |
0x4fdd4 VFMADD132PD %YMM1,%YMM15,%YMM8 |
0x4fdd9 VFMADD132PD %YMM0,%YMM15,%YMM6 |
0x4fdde VMOVAPD %YMM1,%YMM15 |
0x4fde2 VFMADD132PD %YMM26,%YMM25,%YMM1 |
0x4fde8 VFMADD132PD %YMM29,%YMM28,%YMM15 |
0x4fdee VADDPD %YMM14,%YMM8,%YMM14 |
0x4fdf3 VMOVAPD %YMM0,%YMM8 |
0x4fdf7 VFMADD132PD %YMM29,%YMM28,%YMM8 |
0x4fdfd VFMADD132PD %YMM26,%YMM25,%YMM0 |
0x4fe03 VADDPD %YMM10,%YMM6,%YMM10 |
0x4fe08 VMULPD %YMM14,%YMM5,%YMM5 |
0x4fe0d VMULPD %YMM23,%YMM10,%YMM6 |
0x4fe13 VFMADD231PD %YMM7,%YMM21,%YMM5 |
0x4fe19 VMULPD %YMM30,%YMM2,%YMM7 |
0x4fe1f VFMADD231PD %YMM9,%YMM22,%YMM6 |
0x4fe25 VMULPD %YMM30,%YMM3,%YMM9 |
0x4fe2b VMULPD %YMM27,%YMM2,%YMM2 |
0x4fe31 VMULPD %YMM27,%YMM3,%YMM3 |
0x4fe37 VFMADD231PD %YMM31,%YMM16,%YMM7 |
0x4fe3d VFMADD231PD %YMM31,%YMM17,%YMM9 |
0x4fe43 VADDPD %YMM7,%YMM15,%YMM10 |
0x4fe47 VADDPD %YMM9,%YMM8,%YMM14 |
0x4fe4c VMOVAPD 0x160(%RSP),%YMM9 [17] |
0x4fe55 VFMADD231PD %YMM9,%YMM17,%YMM3 |
0x4fe5b VFMADD231PD %YMM9,%YMM16,%YMM2 |
0x4fe61 VADDPD %YMM3,%YMM0,%YMM0 |
0x4fe65 VADDPD %YMM1,%YMM2,%YMM1 |
0x4fe69 VMULPD %YMM20,%YMM0,%YMM7 |
0x4fe6f VMULPD %YMM1,%YMM13,%YMM13 |
0x4fe73 VFMADD132PD %YMM14,%YMM7,%YMM12 |
0x4fe78 VFMADD132PD %YMM10,%YMM13,%YMM11 |
0x4fe7d VADDPD %YMM12,%YMM6,%YMM12 |
0x4fe82 VADDPD %YMM11,%YMM5,%YMM11 |
0x4fe87 VSCATTERDPD %YMM12,(%R9,%XMM19,8){%K2} [7] |
0x4fe8e VSCATTERDPD %YMM11,(%R9,%XMM4,8){%K3} [14] |
0x4fe95 CMP %RDX,%R13 |
0x4fe98 JNE 4fa20 |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 305 - 336 |
-------------------------------------------------------------------------------- |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.12 |
CQA speedup if FP arith vectorized | 1.59 |
CQA speedup if fully vectorized | 2.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.01 |
Bottlenecks | P0, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 67.50 |
CQA cycles if no scalar integer | 60.50 |
CQA cycles if FP arith vectorized | 42.50 |
CQA cycles if fully vectorized | 30.75 |
Front-end cycles | 48.67 |
DIV/SQRT cycles | 67.50 |
P0 cycles | 67.00 |
P1 cycles | 23.67 |
P2 cycles | 23.67 |
P3 cycles | 12.00 |
P4 cycles | 34.50 |
P5 cycles | 3.60 |
P6 cycles | 12.00 |
P7 cycles | 12.00 |
P8 cycles | 12.00 |
P9 cycles | 3.40 |
P10 cycles | 23.67 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 7 - 36 |
FE+BE cycles (UFS) | 72.16 - 97.99 |
Stall cycles (UFS) | 29.68 - 55.27 |
Nb insns | 191.00 |
Nb uops | 292.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 9.13 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 24.77 |
Bytes prefetched | 0.00 |
Bytes loaded | 1480.00 |
Bytes stored | 192.00 |
Stride 0 | 11.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 48.78 |
Vector-efficiency ratio load | 49.20 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 43.36 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.12 |
CQA speedup if FP arith vectorized | 1.59 |
CQA speedup if fully vectorized | 2.20 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.01 |
Bottlenecks | P0, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 67.50 |
CQA cycles if no scalar integer | 60.50 |
CQA cycles if FP arith vectorized | 42.50 |
CQA cycles if fully vectorized | 30.75 |
Front-end cycles | 48.67 |
DIV/SQRT cycles | 67.50 |
P0 cycles | 67.00 |
P1 cycles | 23.67 |
P2 cycles | 23.67 |
P3 cycles | 12.00 |
P4 cycles | 34.50 |
P5 cycles | 3.60 |
P6 cycles | 12.00 |
P7 cycles | 12.00 |
P8 cycles | 12.00 |
P9 cycles | 3.40 |
P10 cycles | 23.67 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 7 - 36 |
FE+BE cycles (UFS) | 72.16 - 97.99 |
Stall cycles (UFS) | 29.68 - 55.27 |
Nb insns | 191.00 |
Nb uops | 292.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 9.13 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 24.77 |
Bytes prefetched | 0.00 |
Bytes loaded | 1480.00 |
Bytes stored | 192.00 |
Stride 0 | 11.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 48.78 |
Vector-efficiency ratio load | 49.20 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 43.36 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 292 |
loop length | 1150 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 48.67 cycles |
front end | 48.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 67.50 | 67.00 | 23.67 | 23.67 | 12.00 | 34.50 | 3.60 | 12.00 | 12.00 | 12.00 | 3.40 | 23.67 |
cycles | 67.50 | 67.00 | 23.67 | 23.67 | 12.00 | 34.50 | 3.60 | 12.00 | 12.00 | 12.00 | 3.40 | 23.67 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 7.00-36.00 |
FE+BE cycles | 72.16-97.99 |
Stall cycles | 29.68-55.27 |
RS full (events) | 62.29-0.65 |
Front-end | 48.67 |
Dispatch | 67.50 |
DIV/SQRT | 16.00 |
Data deps. | 7.00-36.00 |
Overall L1 | 67.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 49% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 48% |
all | 48% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 43% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x20(%RSI,%RDX,2),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x480(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x3e0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM10,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA (%RDI,%RDX,1),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMULPD %YMM18,%YMM9,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0xb,%YMM0,%YMM7 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VRNDSCALEPD $0xb,%YMM15,%YMM2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM15,%XMM3 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM3,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x38747(%RIP),%YMM6,%YMM13 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDD 0x3875f(%RIP),%YMM6,%YMM14 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x38771(%RIP),%YMM6,%YMM6 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VGATHERDPD (%RAX,%XMM11,8),%YMM21{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM14,8),%YMM12{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERDPD (%RAX,%XMM13,8),%YMM23{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBPD %YMM7,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBPD %YMM2,%YMM15,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4c0(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERM2I128 $0x11,%YMM14,%YMM14,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM15,8),%YMM11{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD 0x460(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM0,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM3,%YMM0,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM5,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM21,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x400(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x420(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM0,%YMM14,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM24,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM24,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM20,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM13,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM12,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM15,%YMM11,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM14,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM8,%YMM6,%YMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x3808d(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMULPD %YMM15,%YMM6,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM7,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VSCATTERDPD %YMM14,(%R11,%XMM4,8){%K4} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM8,(%R11,%XMM4,8){%K5} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VMOVAPD 0x380(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x360(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x3a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x320(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVPD %YMM9,%YMM7,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231PD %YMM14,%YMM3,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM14,%YMM2,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM5,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x2a0(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM21,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2e0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x280(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM3,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD 0x4e0(%RSP),%YMM0,%YMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213PD 0x4e0(%RSP),%YMM1,%YMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x500(%RSP),%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x500(%RSP),%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULPD %YMM8,%YMM20,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM12,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM11,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM8,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM10,%YMM7,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM6,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM10,(%R10,%XMM19,8){%K6} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VSCATTERDPD %YMM9,(%R10,%XMM4,8){%K7} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VMOVAPD 0x240(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x220(%RSP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x260(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x200(%RSP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM14,%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM14,%YMM2,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM10,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM1,%YMM10,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM17,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x180(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM8,%YMM9,%YMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM6,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM8,%YMM17,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1a0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM1,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM26,%YMM25,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM8,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM0,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM29,%YMM28,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM26,%YMM25,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM6,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM10,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM21,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM22,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM3,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM16,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM17,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM7,%YMM15,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM9,%YMM8,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x160(%RSP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231PD %YMM9,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM16,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM20,%YMM0,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM1,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM14,%YMM7,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM10,%YMM13,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM12,%YMM6,%YMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM11,%YMM5,%YMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSCATTERDPD %YMM12,(%R9,%XMM19,8){%K2} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VSCATTERDPD %YMM11,(%R9,%XMM4,8){%K3} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
CMP %RDX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4fa20 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x8b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 292 |
loop length | 1150 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 48.67 cycles |
front end | 48.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 67.50 | 67.00 | 23.67 | 23.67 | 12.00 | 34.50 | 3.60 | 12.00 | 12.00 | 12.00 | 3.40 | 23.67 |
cycles | 67.50 | 67.00 | 23.67 | 23.67 | 12.00 | 34.50 | 3.60 | 12.00 | 12.00 | 12.00 | 3.40 | 23.67 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 7.00-36.00 |
FE+BE cycles | 72.16-97.99 |
Stall cycles | 29.68-55.27 |
RS full (events) | 62.29-0.65 |
Front-end | 48.67 |
Dispatch | 67.50 |
DIV/SQRT | 16.00 |
Data deps. | 7.00-36.00 |
Overall L1 | 67.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 49% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 48% |
all | 48% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 43% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x20(%RSI,%RDX,2),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x480(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x3e0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM10,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA (%RDI,%RDX,1),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMULPD %YMM18,%YMM9,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0xb,%YMM0,%YMM7 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VRNDSCALEPD $0xb,%YMM15,%YMM2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM15,%XMM3 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM3,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x38747(%RIP),%YMM6,%YMM13 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDD 0x3875f(%RIP),%YMM6,%YMM14 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x38771(%RIP),%YMM6,%YMM6 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VGATHERDPD (%RAX,%XMM11,8),%YMM21{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM14,8),%YMM12{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERDPD (%RAX,%XMM13,8),%YMM23{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBPD %YMM7,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPERM2I128 $0x11,%YMM13,%YMM13,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSUBPD %YMM2,%YMM15,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4c0(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERM2I128 $0x11,%YMM14,%YMM14,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD 0x4a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM15,8),%YMM11{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD 0x460(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM0,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM3,%YMM0,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM5,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM21,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x400(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x420(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM0,%YMM14,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM24,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM24,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM20,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM13,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM12,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM15,%YMM11,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM14,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM8,%YMM6,%YMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x3808d(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMULPD %YMM15,%YMM6,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM7,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VSCATTERDPD %YMM14,(%R11,%XMM4,8){%K4} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM8,(%R11,%XMM4,8){%K5} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VMOVAPD 0x380(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x360(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x3a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x320(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVPD %YMM9,%YMM7,%YMM9 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231PD %YMM14,%YMM3,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM14,%YMM2,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM8,%YMM5,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x2a0(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM21,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2e0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x280(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM18,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM3,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD 0x4e0(%RSP),%YMM0,%YMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD213PD 0x4e0(%RSP),%YMM1,%YMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x500(%RSP),%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231PD 0x500(%RSP),%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULPD %YMM8,%YMM20,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM12,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM11,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM8,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM10,%YMM7,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM6,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM10,(%R10,%XMM19,8){%K6} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VSCATTERDPD %YMM9,(%R10,%XMM4,8){%K7} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VMOVAPD 0x240(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x220(%RSP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x260(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD 0x200(%RSP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULPD %YMM14,%YMM3,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM14,%YMM2,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM10,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD132PD %YMM1,%YMM10,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM17,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x180(%RSP),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM8,%YMM9,%YMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VADDPD %YMM6,%YMM7,%YMM7 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM8,%YMM17,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1a0(%RSP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM1,%YMM15,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM1,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM26,%YMM25,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM8,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM0,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD132PD %YMM29,%YMM28,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM26,%YMM25,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM10,%YMM6,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM10,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM21,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM22,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM3,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM16,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM17,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM7,%YMM15,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM9,%YMM8,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x160(%RSP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231PD %YMM9,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM16,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM20,%YMM0,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM1,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM14,%YMM7,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM10,%YMM13,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM12,%YMM6,%YMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM11,%YMM5,%YMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSCATTERDPD %YMM12,(%R9,%XMM19,8){%K2} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VSCATTERDPD %YMM11,(%R9,%XMM4,8){%K3} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
CMP %RDX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4fa20 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x8b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |