Loop Id: 312 | Module: exec | Source: OneBodyJastrowRef.h:134-146 [...] | Coverage: 0.01% |
---|
Loop Id: 312 | Module: exec | Source: OneBodyJastrowRef.h:134-146 [...] | Coverage: 0.01% |
---|
0x41b878 VXORPD %XMM0,%XMM0,%XMM0 |
0x41b87c XOR %R8D,%R8D |
0x41b87f VPBROADCASTQ %R8,%YMM20 |
0x41b885 VPSUBQ %YMM20,%YMM18,%YMM18 |
0x41b88b VPCMPNLEUQ 0xdab2a(%RIP),%YMM18,%K1 |
0x41b896 VMOVUPD (%RSI,%R8,8),%YMM18{%K1}{z} |
0x41b89d VMOVAPD %YMM18,%YMM30{%K1} |
0x41b8a3 VMULPD %YMM19,%YMM30,%YMM18 |
0x41b8a9 VCVTTPD2DQ %YMM18,%XMM19 |
0x41b8af VPMOVSXDQ %XMM19,%YMM19 |
0x41b8b5 VPSLLQ $0x3,%YMM19,%YMM19 |
0x41b8bc VPADDQ %YMM19,%YMM13,%YMM13 |
0x41b8c2 VPXORD %XMM19,%XMM19,%XMM19 |
0x41b8c8 KMOVQ %K1,%K2 |
0x41b8cd VGATHERQPD (,%YMM13,1),%YMM19{%K2} |
0x41b8d8 VRNDSCALEPD $0xb,%YMM18,%YMM20 |
0x41b8df VXORPD %XMM21,%XMM21,%XMM21 |
0x41b8e5 KMOVQ %K1,%K2 |
0x41b8ea VGATHERQPD 0x8(,%YMM13,1),%YMM21{%K2} |
0x41b8f5 VSUBPD %YMM20,%YMM18,%YMM18 |
0x41b8fb VMOVAPD %YMM19,%YMM29{%K1} |
0x41b901 VMOVAPD %YMM21,%YMM28{%K1} |
0x41b907 VFMADD231PD %YMM16,%YMM18,%YMM10 |
0x41b90d VXORPD %XMM16,%XMM16,%XMM16 |
0x41b913 KMOVQ %K1,%K2 |
0x41b918 VGATHERQPD 0x10(,%YMM13,1),%YMM16{%K2} |
0x41b923 VFMADD213PD %YMM11,%YMM18,%YMM10 |
0x41b929 VFMADD213PD %YMM14,%YMM18,%YMM10 |
0x41b92f VMULPD %YMM28,%YMM10,%YMM10 |
0x41b935 VMOVAPD %YMM16,%YMM27{%K1} |
0x41b93b VXORPD %XMM11,%XMM11,%XMM11 |
0x41b940 KMOVQ %K1,%K2 |
0x41b945 VGATHERQPD 0x18(,%YMM13,1),%YMM11{%K2} |
0x41b950 VFMADD231PD %YMM12,%YMM18,%YMM7 |
0x41b956 VFMADD213PD %YMM15,%YMM18,%YMM7 |
0x41b95c VFMADD213PD %YMM17,%YMM18,%YMM7 |
0x41b962 VMOVAPD %YMM11,%YMM26{%K1} |
0x41b968 VFMADD231PD %YMM3,%YMM18,%YMM1 |
0x41b96e VFMADD213PD %YMM5,%YMM18,%YMM1 |
0x41b974 VFMADD213PD %YMM4,%YMM18,%YMM1 |
0x41b97a VFMADD213PD %YMM10,%YMM29,%YMM1 |
0x41b980 VFMADD231PD %YMM6,%YMM18,%YMM2 |
0x41b986 VFMADD213PD %YMM9,%YMM18,%YMM2 |
0x41b98c VFMADD213PD %YMM8,%YMM18,%YMM2 |
0x41b992 VFMADD213PD %YMM1,%YMM27,%YMM2 |
0x41b998 VFMADD231PD %YMM7,%YMM26,%YMM2{%K1}{z} |
0x41b99e VEXTRACTF128 $0x1,%YMM2,%XMM1 |
0x41b9a4 VADDPD %XMM1,%XMM2,%XMM1 |
0x41b9a8 VSHUFPD $0x1,%XMM1,%XMM1,%XMM2 |
0x41b9ad VADDSD %XMM2,%XMM1,%XMM1 |
0x41b9b1 VADDSD %XMM1,%XMM0,%XMM0 |
0x41b9b5 VADDSD %XMM31,%XMM0,%XMM31 |
0x41b9bb LEA 0x1(%RDI),%RCX |
0x41b9bf CMP %RAX,%RDI |
0x41b9c2 MOV %RCX,%RDI |
0x41b9c5 JE 41b760 |
0x41b9cb MOV (%R15,%RDI,8),%R8 |
0x41b9cf TEST %R8,%R8 |
0x41b9d2 JE 41b9bb |
0x41b9d4 MOV 0x268(%R13),%R9 |
0x41b9db MOV 0x18(%R9),%R9 |
0x41b9df MOV (%R9,%RDI,4),%R11 |
0x41b9e3 MOV %R11,%R12 |
0x41b9e6 SHR $0x20,%R12 |
0x41b9ea SUB %R11D,%R12D |
0x41b9ed VXORPD %XMM0,%XMM0,%XMM0 |
0x41b9f1 TEST %R12D,%R12D |
0x41b9f4 JLE 41b9b5 |
0x41b9f6 VMOVSD 0x8(%R8),%XMM1 |
0x41b9fc MOV %R12D,%R12D |
0x41b9ff MOV %R12,%R10 |
0x41ba02 MOVSXD %R11D,%R14 |
0x41ba05 MOV $-0x10,%ECX |
0x41ba0a AND %RCX,%R10 |
0x41ba0d JE 41bd7c |
0x41ba13 MOV %R15,%RAX |
0x41ba16 VMOVQ %R11,%XMM2 |
0x41ba1b VBROADCASTSD %XMM1,%YMM3 |
0x41ba20 LEA (%RDX,%R14,8),%R15 |
0x41ba24 XOR %R9D,%R9D |
0x41ba27 XOR %R13D,%R13D |
0x41ba2a VMOVDQU 0xda94e(%RIP),%YMM9 |
0x41ba32 VMOVDQU 0xda966(%RIP),%YMM10 |
0x41ba3a NOPW (%RAX,%RAX,1) |
(315) 0x41ba40 VMOVUPD (%R15,%R13,8),%YMM4 |
(315) 0x41ba46 VMOVUPD 0x20(%R15,%R13,8),%YMM5 |
(315) 0x41ba4d VMOVUPD 0x40(%R15,%R13,8),%YMM6 |
(315) 0x41ba54 VMOVUPD 0x60(%R15,%R13,8),%YMM7 |
(315) 0x41ba5b VMOVD %R13D,%XMM8 |
(315) 0x41ba60 VPADDD %XMM2,%XMM8,%XMM8 |
(315) 0x41ba64 VPBROADCASTD %XMM8,%YMM8 |
(315) 0x41ba69 VPCMPNEQD %YMM9,%YMM8,%K0 |
(315) 0x41ba70 VPCMPNEQD %YMM10,%YMM8,%K1 |
(315) 0x41ba77 VCMPPD $0x1,%YMM3,%YMM4,%K5 |
(315) 0x41ba7e VCMPPD $0x1,%YMM3,%YMM5,%K2 |
(315) 0x41ba85 KMOVW %K2,0x10(%RSP) |
(315) 0x41ba8b KSHIFTLB $0x4,%K2,%K3 |
(315) 0x41ba91 KORB %K3,%K5,%K3 |
(315) 0x41ba95 VCMPPD $0x1,%YMM3,%YMM6,%K6 |
(315) 0x41ba9c VCMPPD $0x1,%YMM3,%YMM7,%K4 |
(315) 0x41baa3 MOVSXD %R9D,%R9 |
(315) 0x41baa6 LEA (%RSI,%R9,8),%RBX |
(315) 0x41baaa KANDB %K0,%K3,%K7 |
(315) 0x41baae KMOVB %K7,%EDX |
(315) 0x41bab2 KANDW %K1,%K6,%K7 |
(315) 0x41bab6 POPCNT %EDX,%EDX |
(315) 0x41baba VCOMPRESSPD %YMM6,(%RBX,%RDX,8){%K7} |
(315) 0x41bac1 KANDW %K0,%K5,%K5 |
(315) 0x41bac5 VCOMPRESSPD %YMM4,(%RSI,%R9,8){%K5} |
(315) 0x41bacc KSHIFTLB $0x4,%K4,%K2 |
(315) 0x41bad2 KORB %K2,%K6,%K2 |
(315) 0x41bad6 KMOVB %K7,0xe(%RSP) |
(315) 0x41badc KSHIFTRB $0x4,%K1,%K6 |
(315) 0x41bae2 KANDW %K6,%K4,%K4 |
(315) 0x41bae6 LEA (%RBX,%RDX,8),%RDX |
(315) 0x41baea MOVZX 0xe(%RSP),%ECX |
(315) 0x41baef POPCNT %ECX,%ECX |
(315) 0x41baf3 VCOMPRESSPD %YMM7,(%RDX,%RCX,8){%K4} |
(315) 0x41bafa KUNPCKBW %K3,%K2,%K2 |
(315) 0x41bafe KMOVB %K5,0xf(%RSP) |
(315) 0x41bb04 KUNPCKBW %K0,%K1,%K1 |
(315) 0x41bb08 KSHIFTRB $0x4,%K0,%K0 |
(315) 0x41bb0e KMOVW 0x10(%RSP),%K3 |
(315) 0x41bb14 KANDW %K0,%K3,%K3 |
(315) 0x41bb18 MOVZX 0xf(%RSP),%ECX |
(315) 0x41bb1d POPCNT %ECX,%ECX |
(315) 0x41bb21 VCOMPRESSPD %YMM5,(%RBX,%RCX,8){%K3} |
(315) 0x41bb28 KANDW %K1,%K2,%K0 |
(315) 0x41bb2c KMOVW %K0,%ECX |
(315) 0x41bb30 POPCNT %ECX,%ECX |
(315) 0x41bb34 ADD %ECX,%R9D |
(315) 0x41bb37 ADD $0x10,%R13 |
(315) 0x41bb3b CMP %R10,%R13 |
(315) 0x41bb3e JB 41ba40 |
0x41bb44 CMP %R12,%R10 |
0x41bb47 MOV 0x18(%RSP),%RDX |
0x41bb4c MOV %RAX,%R15 |
0x41bb4f MOV 0x50(%RSP),%RAX |
0x41bb54 MOV 0x48(%RSP),%R13 |
0x41bb59 JNE 41bd82 |
0x41bb5f TEST %R9D,%R9D |
0x41bb62 JLE 41b9b5 |
0x41bb68 VMOVSD 0x238(%R8),%XMM3 |
0x41bb71 MOV 0x218(%R8),%R10 |
0x41bb78 VMOVSD 0x18(%R8),%XMM17 |
0x41bb7f VMOVSD 0x20(%R8),%XMM1 |
0x41bb85 VMOVSD 0x28(%R8),%XMM5 |
0x41bb8b VMOVSD 0x30(%R8),%XMM4 |
0x41bb91 VMOVSD 0x38(%R8),%XMM16 |
0x41bb98 VMOVSD 0x40(%R8),%XMM10 |
0x41bb9e VMOVSD 0x48(%R8),%XMM11 |
0x41bba4 VMOVSD 0x50(%R8),%XMM14 |
0x41bbaa VMOVSD 0x58(%R8),%XMM6 |
0x41bbb0 VMOVSD 0x60(%R8),%XMM2 |
0x41bbb6 VMOVSD 0x68(%R8),%XMM9 |
0x41bbbc VMOVSD 0x70(%R8),%XMM8 |
0x41bbc2 VMOVSD 0x78(%R8),%XMM12 |
0x41bbc8 VMOVSD 0x80(%R8),%XMM7 |
0x41bbd1 VMOVSD 0x88(%R8),%XMM15 |
0x41bbda VMOVSD 0x90(%R8),%XMM0 |
0x41bbe3 MOV %R9D,%R9D |
0x41bbe6 MOV %R9,%R8 |
0x41bbe9 VPBROADCASTQ %R9,%YMM18 |
0x41bbef VPBROADCASTQ %R10,%YMM13 |
0x41bbf5 MOV $-0x4,%ECX |
0x41bbfa VBROADCASTSD %XMM3,%YMM19 |
0x41bc00 VBROADCASTSD %XMM17,%YMM3 |
0x41bc06 VBROADCASTSD %XMM1,%YMM1 |
0x41bc0b VBROADCASTSD %XMM5,%YMM5 |
0x41bc10 VBROADCASTSD %XMM4,%YMM4 |
0x41bc15 VBROADCASTSD %XMM16,%YMM16 |
0x41bc1b VBROADCASTSD %XMM10,%YMM10 |
0x41bc20 VBROADCASTSD %XMM11,%YMM11 |
0x41bc25 VBROADCASTSD %XMM14,%YMM14 |
0x41bc2a VBROADCASTSD %XMM6,%YMM6 |
0x41bc2f VBROADCASTSD %XMM2,%YMM2 |
0x41bc34 VBROADCASTSD %XMM9,%YMM9 |
0x41bc39 VBROADCASTSD %XMM8,%YMM8 |
0x41bc3e VBROADCASTSD %XMM12,%YMM12 |
0x41bc43 VBROADCASTSD %XMM7,%YMM7 |
0x41bc48 VBROADCASTSD %XMM15,%YMM15 |
0x41bc4d VBROADCASTSD %XMM0,%YMM17 |
0x41bc53 AND %RCX,%R8 |
0x41bc56 JE 41b878 |
0x41bc5c VXORPD %XMM0,%XMM0,%XMM0 |
0x41bc60 XOR %R11D,%R11D |
0x41bc63 NOPW %CS:(%RAX,%RAX,1) |
(313) 0x41bc70 VMULPD (%RSI,%R11,8),%YMM19,%YMM20 |
(313) 0x41bc77 VCVTTPD2DQ %YMM20,%XMM21 |
(313) 0x41bc7d VXORPD %XMM22,%XMM22,%XMM22 |
(313) 0x41bc83 KXNORW %K0,%K0,%K1 |
(313) 0x41bc87 VGATHERDPD (%R10,%XMM21,8),%YMM22{%K1} |
(313) 0x41bc8e VXORPD %XMM23,%XMM23,%XMM23 |
(313) 0x41bc94 KXNORW %K0,%K0,%K1 |
(313) 0x41bc98 VGATHERDPD 0x8(%R10,%XMM21,8),%YMM23{%K1} |
(313) 0x41bca0 VXORPD %XMM24,%XMM24,%XMM24 |
(313) 0x41bca6 KXNORW %K0,%K0,%K1 |
(313) 0x41bcaa VGATHERDPD 0x10(%R10,%XMM21,8),%YMM24{%K1} |
(313) 0x41bcb2 VRNDSCALEPD $0xb,%YMM20,%YMM25 |
(313) 0x41bcb9 VSUBPD %YMM25,%YMM20,%YMM20 |
(313) 0x41bcbf VXORPD %XMM25,%XMM25,%XMM25 |
(313) 0x41bcc5 KXNORW %K0,%K0,%K1 |
(313) 0x41bcc9 VGATHERDPD 0x18(%R10,%XMM21,8),%YMM25{%K1} |
(313) 0x41bcd1 VMOVAPD %YMM3,%YMM21 |
(313) 0x41bcd7 VFMADD213PD %YMM1,%YMM20,%YMM21 |
(313) 0x41bcdd VFMADD213PD %YMM5,%YMM20,%YMM21 |
(313) 0x41bce3 VFMADD213PD %YMM4,%YMM20,%YMM21 |
(313) 0x41bce9 VFMADD213PD %YMM0,%YMM22,%YMM21 |
(313) 0x41bcef VMOVAPD %YMM16,%YMM0 |
(313) 0x41bcf5 VFMADD213PD %YMM10,%YMM20,%YMM0 |
(313) 0x41bcfb VFMADD213PD %YMM11,%YMM20,%YMM0 |
(313) 0x41bd01 VFMADD213PD %YMM14,%YMM20,%YMM0 |
(313) 0x41bd07 VFMADD213PD %YMM21,%YMM23,%YMM0 |
(313) 0x41bd0d VMOVAPD %YMM6,%YMM21 |
(313) 0x41bd13 VFMADD213PD %YMM2,%YMM20,%YMM21 |
(313) 0x41bd19 VFMADD213PD %YMM9,%YMM20,%YMM21 |
(313) 0x41bd1f VFMADD213PD %YMM8,%YMM20,%YMM21 |
(313) 0x41bd25 VFMADD213PD %YMM0,%YMM24,%YMM21 |
(313) 0x41bd2b VMOVAPD %YMM12,%YMM0 |
(313) 0x41bd2f VFMADD213PD %YMM7,%YMM20,%YMM0 |
(313) 0x41bd35 VFMADD213PD %YMM15,%YMM20,%YMM0 |
(313) 0x41bd3b VFMADD213PD %YMM17,%YMM20,%YMM0 |
(313) 0x41bd41 VFMADD213PD %YMM21,%YMM25,%YMM0 |
(313) 0x41bd47 ADD $0x4,%R11 |
(313) 0x41bd4b CMP %R8,%R11 |
(313) 0x41bd4e JB 41bc70 |
0x41bd54 VEXTRACTF32X4 $0x1,%YMM0,%XMM20 |
0x41bd5b VADDPD %XMM20,%XMM0,%XMM0 |
0x41bd61 VSHUFPD $0x1,%XMM0,%XMM0,%XMM20 |
0x41bd68 VADDSD %XMM20,%XMM0,%XMM0 |
0x41bd6e CMP %R9,%R8 |
0x41bd71 JNE 41b87f |
0x41bd77 JMP 41b9b5 |
0x41bd7c XOR %R10D,%R10D |
0x41bd7f XOR %R9D,%R9D |
0x41bd82 SUB %R10,%R12 |
0x41bd85 ADD %R10D,%R11D |
0x41bd88 NOT %R11D |
0x41bd8b ADD %R10,%R14 |
0x41bd8e LEA (%RDX,%R14,8),%R10 |
0x41bd92 XOR %EBX,%EBX |
0x41bd94 JMP 41bdac |
(314) 0x41bda0 INC %RBX |
(314) 0x41bda3 CMP %RBX,%R12 |
(314) 0x41bda6 JE 41bb5f |
(314) 0x41bdac VMOVSD (%R10,%RBX,8),%XMM2 |
(314) 0x41bdb2 VUCOMISD %XMM2,%XMM1 |
(314) 0x41bdb6 JBE 41bda0 |
(314) 0x41bdb8 CMP %EBX,%R11D |
(314) 0x41bdbb JE 41bda0 |
(314) 0x41bdbd MOVSXD %R9D,%R9 |
(314) 0x41bdc0 VMOVSD %XMM2,(%RSI,%R9,8) |
(314) 0x41bdc6 INC %R9D |
(314) 0x41bdc9 JMP 41bda0 |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 134 - 146 |
-------------------------------------------------------------------------------- |
134: for (int k = 0; k < ratios.size(); ++k) |
[...] |
143: for (int jg = 0; jg < NumGroups; ++jg) |
144: { |
145: if (F[jg] != nullptr) |
146: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1169 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.09 |
CQA speedup if FP arith vectorized | 1.17 |
CQA speedup if fully vectorized | 3.54 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | OneBodyJastrowRef.h:134-134,OneBodyJastrowRef.h:143-146,shared_ptr_base.h:1296-1296,stl_vector.h:1064-1064,stl_vector.h:1169-1169,ParticleSet.h:313-313,BsplineFunctor.h:236-236,BsplineFunctor.h:240-240,BsplineFunctor.h:246-251,BsplineFunctor.h:256-260,OhmmsVector.h:223-223 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 28.33 |
CQA cycles if no scalar integer | 26.00 |
CQA cycles if FP arith vectorized | 24.25 |
CQA cycles if fully vectorized | 8.00 |
Front-end cycles | 28.33 |
DIV/SQRT cycles | 22.00 |
P0 cycles | 26.50 |
P1 cycles | 15.00 |
P2 cycles | 15.00 |
P3 cycles | 0.00 |
P4 cycles | 26.50 |
P5 cycles | 12.60 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 12.40 |
P10 cycles | 15.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 30.74 - 88.81 |
Stall cycles (UFS) | 3.02 - 61.01 |
Nb insns | 153.00 |
Nb uops | 170.00 |
Nb loads | 34.00 |
Nb stores | 0.00 |
Nb stack references | 3.00 |
FLOP/cycle | 4.94 |
Nb FLOP add-sub | 12.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 60.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.38 |
Bytes prefetched | 0.00 |
Bytes loaded | 464.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 44.64 |
Vectorization ratio load | 29.63 |
Vectorization ratio store | NA |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 41.67 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 40.98 |
Vector-efficiency ratio all | 25.56 |
Vector-efficiency ratio load | 23.61 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 22.92 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 22.13 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.09 |
CQA speedup if FP arith vectorized | 1.17 |
CQA speedup if fully vectorized | 3.54 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | OneBodyJastrowRef.h:134-134,OneBodyJastrowRef.h:143-146,shared_ptr_base.h:1296-1296,stl_vector.h:1064-1064,stl_vector.h:1169-1169,ParticleSet.h:313-313,BsplineFunctor.h:236-236,BsplineFunctor.h:240-240,BsplineFunctor.h:246-251,BsplineFunctor.h:256-260,OhmmsVector.h:223-223 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 28.33 |
CQA cycles if no scalar integer | 26.00 |
CQA cycles if FP arith vectorized | 24.25 |
CQA cycles if fully vectorized | 8.00 |
Front-end cycles | 28.33 |
DIV/SQRT cycles | 22.00 |
P0 cycles | 26.50 |
P1 cycles | 15.00 |
P2 cycles | 15.00 |
P3 cycles | 0.00 |
P4 cycles | 26.50 |
P5 cycles | 12.60 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 12.40 |
P10 cycles | 15.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 30.74 - 88.81 |
Stall cycles (UFS) | 3.02 - 61.01 |
Nb insns | 153.00 |
Nb uops | 170.00 |
Nb loads | 34.00 |
Nb stores | 0.00 |
Nb stack references | 3.00 |
FLOP/cycle | 4.94 |
Nb FLOP add-sub | 12.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 60.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.38 |
Bytes prefetched | 0.00 |
Bytes loaded | 464.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 44.64 |
Vectorization ratio load | 29.63 |
Vectorization ratio store | NA |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 41.67 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 40.98 |
Vector-efficiency ratio all | 25.56 |
Vector-efficiency ratio load | 23.61 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 22.92 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 22.13 |
Path / |
Function | _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | OneBodyJastrowRef.h:134-146 |
Module | exec |
nb instructions | 153 |
nb uops | 170 |
loop length | 822 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 27 |
used zmm registers | 0 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 3.50 |
micro-operation queue | 28.33 cycles |
front end | 28.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 22.00 | 26.50 | 15.00 | 15.00 | 0.00 | 26.50 | 12.60 | 0.00 | 0.00 | 0.00 | 12.40 | 15.00 |
cycles | 22.00 | 26.50 | 15.00 | 15.00 | 0.00 | 26.50 | 12.60 | 0.00 | 0.00 | 0.00 | 12.40 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.74-88.81 |
Stall cycles | 3.02-61.01 |
ROB full (events) | 0.11-64.00 |
LM full (events) | 2.97-0.01 |
Front-end | 28.33 |
Dispatch | 26.50 |
Overall L1 | 28.33 |
all | 26% |
load | 75% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 51% |
load | 21% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 42% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 53% |
all | 44% |
load | 29% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 41% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
all | 18% |
load | 40% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 28% |
load | 20% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 21% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 26% |
all | 25% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %R8,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM20,%YMM18,%YMM18 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPCMPNLEUQ 0xdab2a(%RIP),%YMM18,%K1 | |||||||||||||||
VMOVUPD (%RSI,%R8,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM19,%YMM30,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM18,%XMM19 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM19,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM19,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPADDQ %YMM19,%YMM13,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPXORD %XMM19,%XMM19,%XMM19 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD (,%YMM13,1),%YMM19{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VRNDSCALEPD $0xb,%YMM18,%YMM20 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x8(,%YMM13,1),%YMM21{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VSUBPD %YMM20,%YMM18,%YMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM19,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM21,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM16,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x10(,%YMM13,1),%YMM16{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD213PD %YMM11,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM16,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x18(,%YMM13,1),%YMM11{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD231PD %YMM12,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM17,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM11,%YMM26{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM3,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM4,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM29,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM27,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM26,%YMM2{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM31,%XMM0,%XMM31 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA 0x1(%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JE 41b760 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RDI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 41b9bb <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2cb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x268(%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R9),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R9,%RDI,4),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %R11D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %R11D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $-0x10,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41bd7c <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x68c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R11,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU 0xda94e(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU 0xda966(%RIP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R12,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41bd82 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x692> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x238(%R8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%R8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x18(%R8),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20(%R8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x28(%R8),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x30(%R8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x38(%R8),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%R8),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x48(%R8),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%R8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x58(%R8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x60(%R8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R8),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%R8),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%R8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%R8),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%R8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x90(%R8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %R9,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $-0x4,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41b878 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x188> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTF32X4 $0x1,%YMM0,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM20,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM20,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CMP %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 41b87f <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x18f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R10,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOT %R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RDX,%R14,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41bdac <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x6bc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | OneBodyJastrowRef.h:134-146 |
Module | exec |
nb instructions | 153 |
nb uops | 170 |
loop length | 822 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 27 |
used zmm registers | 0 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 3.50 |
micro-operation queue | 28.33 cycles |
front end | 28.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 22.00 | 26.50 | 15.00 | 15.00 | 0.00 | 26.50 | 12.60 | 0.00 | 0.00 | 0.00 | 12.40 | 15.00 |
cycles | 22.00 | 26.50 | 15.00 | 15.00 | 0.00 | 26.50 | 12.60 | 0.00 | 0.00 | 0.00 | 12.40 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.74-88.81 |
Stall cycles | 3.02-61.01 |
ROB full (events) | 0.11-64.00 |
LM full (events) | 2.97-0.01 |
Front-end | 28.33 |
Dispatch | 26.50 |
Overall L1 | 28.33 |
all | 26% |
load | 75% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 51% |
load | 21% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 42% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 53% |
all | 44% |
load | 29% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 41% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
all | 18% |
load | 40% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 28% |
load | 20% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 21% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 26% |
all | 25% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %R8,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM20,%YMM18,%YMM18 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPCMPNLEUQ 0xdab2a(%RIP),%YMM18,%K1 | |||||||||||||||
VMOVUPD (%RSI,%R8,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM19,%YMM30,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM18,%XMM19 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM19,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM19,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPADDQ %YMM19,%YMM13,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPXORD %XMM19,%XMM19,%XMM19 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD (,%YMM13,1),%YMM19{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VRNDSCALEPD $0xb,%YMM18,%YMM20 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x8(,%YMM13,1),%YMM21{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VSUBPD %YMM20,%YMM18,%YMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM19,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM21,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM16,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x10(,%YMM13,1),%YMM16{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD213PD %YMM11,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM18,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM16,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VGATHERQPD 0x18(,%YMM13,1),%YMM11{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD231PD %YMM12,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM17,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM11,%YMM26{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM3,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM4,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM29,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM18,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM27,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM26,%YMM2{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM31,%XMM0,%XMM31 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA 0x1(%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JE 41b760 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RDI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 41b9bb <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2cb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x268(%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R9),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R9,%RDI,4),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %R11D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %R11D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $-0x10,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41bd7c <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x68c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R11,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU 0xda94e(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU 0xda966(%RIP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R12,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41bd82 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x692> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x238(%R8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%R8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x18(%R8),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20(%R8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x28(%R8),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x30(%R8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x38(%R8),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%R8),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x48(%R8),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%R8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x58(%R8),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x60(%R8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R8),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%R8),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%R8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%R8),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%R8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x90(%R8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %R9,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $-0x4,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41b878 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x188> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTF32X4 $0x1,%YMM0,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM20,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM20,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CMP %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 41b87f <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x18f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 41b9b5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2c5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R10,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOT %R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RDX,%R14,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41bdac <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x6bc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |