Loop Id: 384 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 384 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x429a00 VXORPD %XMM0,%XMM0,%XMM0 |
0x429a04 VMOVSD 0x50(%RSP),%XMM1 |
0x429a0a VSUBSD %XMM0,%XMM1,%XMM0 |
0x429a0e VZEROUPPER |
0x429a11 CALL 529610 <exp> |
0x429a16 MOV 0x48(%RSP),%RCX |
0x429a1b MOV (%RCX),%RAX |
0x429a1e MOV 0x20(%RSP),%RDX |
0x429a23 VMOVSD %XMM0,(%RAX,%RDX,8) |
0x429a28 INC %RDX |
0x429a2b MOV 0x8(%RCX),%RCX |
0x429a2f SUB %RAX,%RCX |
0x429a32 SAR $0x3,%RCX |
0x429a36 MOV %RDX,0x20(%RSP) |
0x429a3b CMP %RDX,%RCX |
0x429a3e MOV 0x40(%RSP),%RDI |
0x429a43 MOV 0x30(%RSP),%RCX |
0x429a48 JBE 42a300 |
0x429a4e CMPB $0,0x298(%RDI) |
0x429a55 JE 42a340 |
0x429a5b MOVSXD 0x2a0(%RDI),%R14 |
0x429a62 MOV 0xd8(%RCX),%RAX |
0x429a69 VMOVSD (%RAX,%R14,8),%XMM0 |
0x429a6f VMOVSD %XMM0,0x50(%RSP) |
0x429a75 MOV 0x290(%RDI),%RBX |
0x429a7c MOV 0x248(%RCX),%ESI |
0x429a82 VZEROUPPER |
0x429a85 CALL 47a890 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x429a8a MOV 0x30(%RSP),%RDI |
0x429a8f MOV 0xa0(%RDI),%RCX |
0x429a96 TEST %RCX,%RCX |
0x429a99 JE 429a00 |
0x429a9f MOV 0x48(%RAX),%RAX |
0x429aa3 MOV 0x20(%RSP),%RDX |
0x429aa8 LEA (%RDX,%RDX,4),%RDX |
0x429aac MOV 0x18(%RAX,%RDX,8),%RAX |
0x429ab1 MOV %RAX,0x38(%RSP) |
0x429ab6 MOV 0x18(%RBX),%RDX |
0x429aba MOV (%RDX,%R14,4),%ESI |
0x429abe IMUL %ECX,%ESI |
0x429ac1 MOV 0x1d0(%RDI),%RDX |
0x429ac8 MOV 0x200(%RDI),%RAX |
0x429acf MOV %RAX,0x60(%RSP) |
0x429ad4 MOVSXD %ESI,%RAX |
0x429ad7 MOV %RAX,0x58(%RSP) |
0x429adc MOV 0x268(%RBX),%RAX |
0x429ae3 MOV %RAX,0x68(%RSP) |
0x429ae8 DEC %RCX |
0x429aeb VPBROADCASTD %R14D,%YMM1 |
0x429af1 VXORPD %XMM0,%XMM0,%XMM0 |
0x429af5 XOR %R9D,%R9D |
0x429af8 VMOVDQU64 0x115cbe(%RIP),%YMM20 |
0x429b02 VMOVDQU64 0x115cd4(%RIP),%YMM21 |
0x429b0c MOV %RCX,0x28(%RSP) |
0x429b11 JMP 429b55 |
(385) 0x429b40 VXORPD %XMM6,%XMM6,%XMM6 |
(385) 0x429b44 VADDSD %XMM0,%XMM6,%XMM0 |
(385) 0x429b48 CMP %RCX,%R9 |
(385) 0x429b4b LEA 0x1(%R9),%R9 |
(385) 0x429b4f JE 429a04 |
(385) 0x429b55 MOV 0x68(%RSP),%RAX |
(385) 0x429b5a MOV 0x18(%RAX),%RSI |
(385) 0x429b5e MOV (%RSI,%R9,4),%R13 |
(385) 0x429b62 MOV %R13,%RSI |
(385) 0x429b65 SHR $0x20,%RSI |
(385) 0x429b69 SUB %R13D,%ESI |
(385) 0x429b6c TEST %ESI,%ESI |
(385) 0x429b6e JLE 429b40 |
(385) 0x429b70 MOV %R14,%RCX |
(385) 0x429b73 MOV 0x58(%RSP),%RAX |
(385) 0x429b78 LEA (%R9,%RAX,1),%R10 |
(385) 0x429b7c MOV 0x60(%RSP),%RAX |
(385) 0x429b81 MOV (%RAX,%R10,8),%R11 |
(385) 0x429b85 VMOVSD 0x8(%R11),%XMM2 |
(385) 0x429b8b MOV %ESI,%R15D |
(385) 0x429b8e MOV %R15,%R10 |
(385) 0x429b91 MOVSXD %R13D,%R12 |
(385) 0x429b94 MOV $-0x10,%EAX |
(385) 0x429b99 AND %RAX,%R10 |
(385) 0x429b9c JE 429f80 |
(385) 0x429ba2 VMOVQ %R13,%XMM3 |
(385) 0x429ba7 VBROADCASTSD %XMM2,%YMM4 |
(385) 0x429bac MOV 0x38(%RSP),%RAX |
(385) 0x429bb1 LEA (%RAX,%R12,8),%RBX |
(385) 0x429bb5 XOR %R14D,%R14D |
(385) 0x429bb8 XOR %ESI,%ESI |
(385) 0x429bba JMP 429bdb |
(388) 0x429bc0 KMOVD %K1,%EAX |
(388) 0x429bc4 MOVZX %AX,%EAX |
(388) 0x429bc7 POPCNT %EAX,%EAX |
(388) 0x429bcb ADD %EAX,%R14D |
(388) 0x429bce ADD $0x10,%RSI |
(388) 0x429bd2 CMP %R10,%RSI |
(388) 0x429bd5 JAE 429d00 |
(388) 0x429bdb VMOVUPD (%RBX,%RSI,8),%YMM8 |
(388) 0x429be0 VMOVUPD 0x20(%RBX,%RSI,8),%YMM6 |
(388) 0x429be6 VMOVUPD 0x40(%RBX,%RSI,8),%YMM7 |
(388) 0x429bec VMOVUPD 0x60(%RBX,%RSI,8),%YMM5 |
(388) 0x429bf2 VMOVD %ESI,%XMM9 |
(388) 0x429bf6 VPADDD %XMM3,%XMM9,%XMM9 |
(388) 0x429bfa VPBROADCASTD %XMM9,%YMM9 |
(388) 0x429bff VPADDD %YMM20,%YMM9,%YMM10 |
(388) 0x429c05 VPADDD %YMM21,%YMM9,%YMM9 |
(388) 0x429c0b VPCMPNEQD %YMM1,%YMM9,%K0 |
(388) 0x429c12 VPCMPNEQD %YMM1,%YMM10,%K1 |
(388) 0x429c19 KUNPCKBW %K0,%K1,%K0 |
(388) 0x429c1d VCMPPD $0x1,%YMM4,%YMM8,%K1 |
(388) 0x429c24 VCMPPD $0x1,%YMM4,%YMM6,%K2 |
(388) 0x429c2b KSHIFTLB $0x4,%K2,%K2 |
(388) 0x429c31 KORB %K2,%K1,%K1 |
(388) 0x429c35 VCMPPD $0x1,%YMM4,%YMM7,%K2 |
(388) 0x429c3c VCMPPD $0x1,%YMM4,%YMM5,%K3 |
(388) 0x429c43 KSHIFTLB $0x4,%K3,%K3 |
(388) 0x429c49 KORB %K3,%K2,%K2 |
(388) 0x429c4d KUNPCKBW %K1,%K2,%K1 |
(388) 0x429c51 KANDW %K0,%K1,%K1 |
(388) 0x429c55 KORTESTW %K1,%K1 |
(388) 0x429c59 JE 429bc0 |
(388) 0x429c5f MOVSXD %R14D,%RDI |
(388) 0x429c62 LEA (%RDX,%RDI,8),%RAX |
(388) 0x429c66 VCOMPRESSPD %YMM8,(%RDX,%RDI,8){%K1} |
(388) 0x429c6d KSHIFTRW $0x8,%K1,%K2 |
(388) 0x429c73 KMOVB %K1,%EDI |
(388) 0x429c77 POPCNT %EDI,%EDI |
(388) 0x429c7b VCOMPRESSPD %YMM7,(%RAX,%RDI,8){%K2} |
(388) 0x429c82 KSHIFTLB $0x4,%K1,%K0 |
(388) 0x429c88 KSHIFTRB $0x4,%K0,%K0 |
(388) 0x429c8e KMOVB %K0,0x1f(%RSP) |
(388) 0x429c94 KSHIFTRB $0x4,%K1,%K3 |
(388) 0x429c9a MOVZX 0x1f(%RSP),%R8D |
(388) 0x429ca0 POPCNT %R8D,%R8D |
(388) 0x429ca5 VCOMPRESSPD %YMM6,(%RAX,%R8,8){%K3} |
(388) 0x429cac KSHIFTLB $0x4,%K2,%K0 |
(388) 0x429cb2 KSHIFTRB $0x4,%K0,%K0 |
(388) 0x429cb8 KMOVB %K0,0x1e(%RSP) |
(388) 0x429cbe LEA (%RAX,%RDI,8),%RAX |
(388) 0x429cc2 KSHIFTRB $0x4,%K2,%K2 |
(388) 0x429cc8 MOVZX 0x1e(%RSP),%EDI |
(388) 0x429ccd POPCNT %EDI,%EDI |
(388) 0x429cd1 VCOMPRESSPD %YMM5,(%RAX,%RDI,8){%K2} |
(388) 0x429cd8 JMP 429bc0 |
(385) 0x429d00 CMP %R15,%R10 |
(385) 0x429d03 JNE 429f86 |
(385) 0x429d09 TEST %R14D,%R14D |
(385) 0x429d0c JLE 42a000 |
(385) 0x429d12 VMOVSD 0x238(%R11),%XMM19 |
(385) 0x429d19 MOV 0x218(%R11),%R10 |
(385) 0x429d20 VMOVSD 0x18(%R11),%XMM16 |
(385) 0x429d27 VMOVSD 0x20(%R11),%XMM17 |
(385) 0x429d2e VMOVSD 0x28(%R11),%XMM8 |
(385) 0x429d34 VMOVSD 0x30(%R11),%XMM18 |
(385) 0x429d3b VMOVSD 0x38(%R11),%XMM9 |
(385) 0x429d41 VMOVSD 0x40(%R11),%XMM14 |
(385) 0x429d47 VMOVSD 0x48(%R11),%XMM12 |
(385) 0x429d4d VMOVSD 0x50(%R11),%XMM15 |
(385) 0x429d53 VMOVSD 0x58(%R11),%XMM28 |
(385) 0x429d5a VMOVSD 0x60(%R11),%XMM29 |
(385) 0x429d61 VMOVSD 0x68(%R11),%XMM30 |
(385) 0x429d68 VMOVSD 0x70(%R11),%XMM31 |
(385) 0x429d6f VMOVSD 0x78(%R11),%XMM2 |
(385) 0x429d75 VMOVSD 0x80(%R11),%XMM3 |
(385) 0x429d7e VMOVSD 0x88(%R11),%XMM4 |
(385) 0x429d87 VMOVSD 0x90(%R11),%XMM5 |
(385) 0x429d90 MOV %R14D,%R14D |
(385) 0x429d93 MOV %R14,%R11 |
(385) 0x429d96 MOV $-0x4,%EAX |
(385) 0x429d9b AND %RAX,%R11 |
(385) 0x429d9e VMOVUPD %XMM2,0x110(%RSP) |
(385) 0x429da7 VMOVUPD %XMM3,0x100(%RSP) |
(385) 0x429db0 JE 42a040 |
(385) 0x429db6 VBROADCASTSD %XMM19,%YMM6 |
(385) 0x429dbc VBROADCASTSD %XMM16,%YMM20 |
(385) 0x429dc2 VBROADCASTSD %XMM17,%YMM21 |
(385) 0x429dc8 VMOVUPD %XMM8,0x90(%RSP) |
(385) 0x429dd1 VBROADCASTSD %XMM8,%YMM22 |
(385) 0x429dd7 VBROADCASTSD %XMM18,%YMM23 |
(385) 0x429ddd VMOVUPD %XMM9,0x70(%RSP) |
(385) 0x429de3 VBROADCASTSD %XMM9,%YMM24 |
(385) 0x429de9 VBROADCASTSD %XMM14,%YMM25 |
(385) 0x429def VMOVUPD %XMM12,0x80(%RSP) |
(385) 0x429df8 VBROADCASTSD %XMM12,%YMM26 |
(385) 0x429dfe VBROADCASTSD %XMM15,%YMM27 |
(385) 0x429e04 VMOVUPD %XMM28,0xc0(%RSP) |
(385) 0x429e0c VBROADCASTSD %XMM28,%YMM28 |
(385) 0x429e12 VMOVUPD %XMM29,0xb0(%RSP) |
(385) 0x429e1a VBROADCASTSD %XMM29,%YMM29 |
(385) 0x429e20 VMOVUPD %XMM30,0xd0(%RSP) |
(385) 0x429e28 VBROADCASTSD %XMM30,%YMM30 |
(385) 0x429e2e VMOVUPD %XMM31,0xa0(%RSP) |
(385) 0x429e36 VBROADCASTSD %XMM31,%YMM31 |
(385) 0x429e3c VBROADCASTSD %XMM2,%YMM2 |
(385) 0x429e41 VBROADCASTSD %XMM3,%YMM3 |
(385) 0x429e46 VMOVUPD %XMM4,0xf0(%RSP) |
(385) 0x429e4f VBROADCASTSD %XMM4,%YMM4 |
(385) 0x429e54 VMOVUPD %XMM5,0xe0(%RSP) |
(385) 0x429e5d VBROADCASTSD %XMM5,%YMM5 |
(385) 0x429e62 VXORPD %XMM7,%XMM7,%XMM7 |
(385) 0x429e66 XOR %ESI,%ESI |
(385) 0x429e68 NOPL (%RAX,%RAX,1) |
(386) 0x429e70 VMULPD (%RDX,%RSI,8),%YMM6,%YMM8 |
(386) 0x429e75 VCVTTPD2DQ %YMM8,%XMM9 |
(386) 0x429e7a VPXOR %XMM10,%XMM10,%XMM10 |
(386) 0x429e7f KXNORW %K0,%K0,%K1 |
(386) 0x429e83 VGATHERDPD (%R10,%XMM9,8),%YMM10{%K1} |
(386) 0x429e8a VXORPD %XMM11,%XMM11,%XMM11 |
(386) 0x429e8f KXNORW %K0,%K0,%K1 |
(386) 0x429e93 VGATHERDPD 0x8(%R10,%XMM9,8),%YMM11{%K1} |
(386) 0x429e9b VXORPD %XMM12,%XMM12,%XMM12 |
(386) 0x429ea0 KXNORW %K0,%K0,%K1 |
(386) 0x429ea4 VGATHERDPD 0x10(%R10,%XMM9,8),%YMM12{%K1} |
(386) 0x429eac VROUNDPD $0xb,%YMM8,%YMM13 |
(386) 0x429eb2 VSUBPD %YMM13,%YMM8,%YMM8 |
(386) 0x429eb7 VXORPD %XMM13,%XMM13,%XMM13 |
(386) 0x429ebc KXNORW %K0,%K0,%K1 |
(386) 0x429ec0 VGATHERDPD 0x18(%R10,%XMM9,8),%YMM13{%K1} |
(386) 0x429ec8 VMOVAPD %YMM8,%YMM9 |
(386) 0x429ecd VFMADD213PD %YMM21,%YMM20,%YMM9 |
(386) 0x429ed3 VFMADD213PD %YMM22,%YMM8,%YMM9 |
(386) 0x429ed9 VFMADD213PD %YMM23,%YMM8,%YMM9 |
(386) 0x429edf VFMADD213PD %YMM7,%YMM10,%YMM9 |
(386) 0x429ee4 VMOVAPD %YMM8,%YMM7 |
(386) 0x429ee8 VFMADD213PD %YMM25,%YMM24,%YMM7 |
(386) 0x429eee VFMADD213PD %YMM26,%YMM8,%YMM7 |
(386) 0x429ef4 VFMADD213PD %YMM27,%YMM8,%YMM7 |
(386) 0x429efa VFMADD213PD %YMM9,%YMM11,%YMM7 |
(386) 0x429eff VMOVAPD %YMM8,%YMM9 |
(386) 0x429f04 VFMADD213PD %YMM29,%YMM28,%YMM9 |
(386) 0x429f0a VFMADD213PD %YMM30,%YMM8,%YMM9 |
(386) 0x429f10 VFMADD213PD %YMM31,%YMM8,%YMM9 |
(386) 0x429f16 VFMADD213PD %YMM7,%YMM12,%YMM9 |
(386) 0x429f1b VMOVAPD %YMM8,%YMM7 |
(386) 0x429f1f VFMADD213PD %YMM3,%YMM2,%YMM7 |
(386) 0x429f24 VFMADD213PD %YMM4,%YMM8,%YMM7 |
(386) 0x429f29 VFMADD213PD %YMM5,%YMM8,%YMM7 |
(386) 0x429f2e VFMADD213PD %YMM9,%YMM13,%YMM7 |
(386) 0x429f33 ADD $0x4,%RSI |
(386) 0x429f37 CMP %R11,%RSI |
(386) 0x429f3a JB 429e70 |
(385) 0x429f40 VEXTRACTF128 $0x1,%YMM7,%XMM2 |
(385) 0x429f46 VADDPD %XMM2,%XMM7,%XMM2 |
(385) 0x429f4a VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
(385) 0x429f4f VADDSD %XMM3,%XMM2,%XMM6 |
(385) 0x429f53 CMP %R14,%R11 |
(385) 0x429f56 JNE 42a080 |
(385) 0x429f5c VMOVDQU64 0x11585a(%RIP),%YMM20 |
(385) 0x429f66 VMOVDQU64 0x115870(%RIP),%YMM21 |
(385) 0x429f70 MOV %RCX,%R14 |
(385) 0x429f73 JMP 42a2e4 |
(385) 0x429f80 XOR %R10D,%R10D |
(385) 0x429f83 XOR %R14D,%R14D |
(385) 0x429f86 MOV %ECX,%ESI |
(385) 0x429f88 SUB %R13D,%ESI |
(385) 0x429f8b MOV 0x38(%RSP),%RAX |
(385) 0x429f90 LEA (%RAX,%R12,8),%RBX |
(385) 0x429f94 JMP 429fcc |
(387) 0x429fc0 INC %R10 |
(387) 0x429fc3 CMP %R10,%R15 |
(387) 0x429fc6 JE 429d09 |
(387) 0x429fcc VMOVSD (%RBX,%R10,8),%XMM3 |
(387) 0x429fd2 VUCOMISD %XMM3,%XMM2 |
(387) 0x429fd6 JBE 429fc0 |
(387) 0x429fd8 CMP %R10D,%ESI |
(387) 0x429fdb JE 429fc0 |
(387) 0x429fdd MOVSXD %R14D,%R14 |
(387) 0x429fe0 VMOVSD %XMM3,(%RDX,%R14,8) |
(387) 0x429fe6 INC %R14D |
(387) 0x429fe9 JMP 429fc0 |
(385) 0x42a000 VXORPD %XMM6,%XMM6,%XMM6 |
(385) 0x42a004 MOV %RCX,%R14 |
(385) 0x42a007 JMP 42a2e4 |
(385) 0x42a040 VPBROADCASTQ %R14,%YMM2 |
(385) 0x42a046 VXORPD %XMM6,%XMM6,%XMM6 |
(385) 0x42a04a XOR %R11D,%R11D |
(385) 0x42a04d JMP 42a0e4 |
(385) 0x42a080 VPBROADCASTQ %R14,%YMM2 |
(385) 0x42a086 VMOVDQU64 0x115730(%RIP),%YMM20 |
(385) 0x42a090 VMOVDQU64 0x115746(%RIP),%YMM21 |
(385) 0x42a09a VMOVUPD 0xf0(%RSP),%XMM4 |
(385) 0x42a0a3 VMOVUPD 0xe0(%RSP),%XMM5 |
(385) 0x42a0ac VMOVUPD 0xd0(%RSP),%XMM30 |
(385) 0x42a0b4 VMOVUPD 0xc0(%RSP),%XMM28 |
(385) 0x42a0bc VMOVUPD 0xb0(%RSP),%XMM29 |
(385) 0x42a0c4 VMOVUPD 0xa0(%RSP),%XMM31 |
(385) 0x42a0cc VMOVUPD 0x90(%RSP),%XMM8 |
(385) 0x42a0d5 VMOVUPD 0x80(%RSP),%XMM12 |
(385) 0x42a0de VMOVUPD 0x70(%RSP),%XMM9 |
(385) 0x42a0e4 VPBROADCASTQ %R11,%YMM3 |
(385) 0x42a0ea VPSUBQ %YMM3,%YMM2,%YMM2 |
(385) 0x42a0ee VPCMPNLEUQ 0x1156a7(%RIP),%YMM2,%K1 |
(385) 0x42a0f9 KORTESTB %K1,%K1 |
(385) 0x42a0fd JE 42a2c0 |
(385) 0x42a103 VMOVUPD (%RDX,%R11,8),%YMM2{%K1}{z} |
(385) 0x42a10a VMOVUPD 0x120(%RSP),%YMM3 |
(385) 0x42a113 VMOVAPD %YMM2,%YMM3{%K1} |
(385) 0x42a119 VBROADCASTSD %XMM19,%YMM2 |
(385) 0x42a11f VMOVUPD %YMM3,0x120(%RSP) |
(385) 0x42a128 VMULPD %YMM3,%YMM2,%YMM2 |
(385) 0x42a12c VCVTTPD2DQ %YMM2,%XMM3 |
(385) 0x42a130 VMOVAPD %XMM4,%XMM23 |
(385) 0x42a136 VROUNDPD $0xb,%YMM2,%YMM4 |
(385) 0x42a13c VSUBPD %YMM4,%YMM2,%YMM19 |
(385) 0x42a142 VXORPD %XMM4,%XMM4,%XMM4 |
(385) 0x42a146 KMOVQ %K1,%K2 |
(385) 0x42a14b VGATHERDPD (%R10,%XMM3,8),%YMM4{%K2} |
(385) 0x42a152 VMOVAPD %XMM5,%XMM24 |
(385) 0x42a158 VBROADCASTSD %XMM16,%YMM5 |
(385) 0x42a15e VBROADCASTSD %XMM17,%YMM2 |
(385) 0x42a164 VBROADCASTSD %XMM18,%YMM7 |
(385) 0x42a16a VMOVUPD 0x140(%RSP),%YMM22 |
(385) 0x42a172 VMOVAPD %YMM4,%YMM22{%K1} |
(385) 0x42a178 VXORPD %XMM4,%XMM4,%XMM4 |
(385) 0x42a17c KMOVQ %K1,%K2 |
(385) 0x42a181 VGATHERDPD 0x8(%R10,%XMM3,8),%YMM4{%K2} |
(385) 0x42a189 VBROADCASTSD %XMM8,%YMM8 |
(385) 0x42a18e VBROADCASTSD %XMM9,%YMM9 |
(385) 0x42a193 VBROADCASTSD %XMM14,%YMM10 |
(385) 0x42a198 VBROADCASTSD %XMM15,%YMM11 |
(385) 0x42a19d VBROADCASTSD %XMM12,%YMM12 |
(385) 0x42a1a2 VMOVUPD 0x160(%RSP),%YMM13 |
(385) 0x42a1ab VMOVAPD %YMM4,%YMM13{%K1} |
(385) 0x42a1b1 VFMADD231PD %YMM9,%YMM19,%YMM10 |
(385) 0x42a1b7 VFMADD213PD %YMM12,%YMM19,%YMM10 |
(385) 0x42a1bd VFMADD213PD %YMM11,%YMM19,%YMM10 |
(385) 0x42a1c3 VMOVUPD %YMM13,0x160(%RSP) |
(385) 0x42a1cc VMULPD %YMM13,%YMM10,%YMM4 |
(385) 0x42a1d1 VXORPD %XMM9,%XMM9,%XMM9 |
(385) 0x42a1d6 KMOVQ %K1,%K2 |
(385) 0x42a1db VGATHERDPD 0x10(%R10,%XMM3,8),%YMM9{%K2} |
(385) 0x42a1e3 VBROADCASTSD %XMM28,%YMM10 |
(385) 0x42a1e9 VBROADCASTSD %XMM29,%YMM11 |
(385) 0x42a1ef VBROADCASTSD %XMM31,%YMM12 |
(385) 0x42a1f5 VBROADCASTSD %XMM30,%YMM13 |
(385) 0x42a1fb VXORPD %XMM14,%XMM14,%XMM14 |
(385) 0x42a200 KMOVQ %K1,%K2 |
(385) 0x42a205 VGATHERDPD 0x18(%R10,%XMM3,8),%YMM14{%K2} |
(385) 0x42a20d VMOVUPD 0x180(%RSP),%YMM18 |
(385) 0x42a215 VMOVAPD %YMM9,%YMM18{%K1} |
(385) 0x42a21b VBROADCASTSD 0x110(%RSP),%YMM9 |
(385) 0x42a225 VBROADCASTSD 0x100(%RSP),%YMM3 |
(385) 0x42a22f VBROADCASTSD %XMM24,%YMM15 |
(385) 0x42a235 VBROADCASTSD %XMM23,%YMM16 |
(385) 0x42a23b VMOVUPD 0x1a0(%RSP),%YMM17 |
(385) 0x42a243 VMOVAPD %YMM14,%YMM17{%K1} |
(385) 0x42a249 VFMADD231PD %YMM5,%YMM19,%YMM2 |
(385) 0x42a24f VFMADD213PD %YMM8,%YMM19,%YMM2 |
(385) 0x42a255 VFMADD213PD %YMM7,%YMM19,%YMM2 |
(385) 0x42a25b VMOVUPD %YMM22,0x140(%RSP) |
(385) 0x42a263 VFMADD213PD %YMM4,%YMM22,%YMM2 |
(385) 0x42a269 VFMADD231PD %YMM10,%YMM19,%YMM11 |
(385) 0x42a26f VFMADD213PD %YMM13,%YMM19,%YMM11 |
(385) 0x42a275 VFMADD213PD %YMM12,%YMM19,%YMM11 |
(385) 0x42a27b VMOVUPD %YMM18,0x180(%RSP) |
(385) 0x42a283 VFMADD213PD %YMM2,%YMM18,%YMM11 |
(385) 0x42a289 VFMADD231PD %YMM9,%YMM19,%YMM3 |
(385) 0x42a28f VFMADD213PD %YMM16,%YMM19,%YMM3 |
(385) 0x42a295 VFMADD213PD %YMM15,%YMM19,%YMM3 |
(385) 0x42a29b VMOVUPD %YMM17,0x1a0(%RSP) |
(385) 0x42a2a3 VFMADD213PD %YMM11,%YMM17,%YMM3 |
(385) 0x42a2a9 JMP 42a2c4 |
(385) 0x42a2c0 VPXOR %XMM3,%XMM3,%XMM3 |
(385) 0x42a2c4 MOV %RCX,%R14 |
(385) 0x42a2c7 VMOVAPD %YMM3,%YMM2{%K1}{z} |
(385) 0x42a2cd VEXTRACTF128 $0x1,%YMM2,%XMM3 |
(385) 0x42a2d3 VADDPD %XMM3,%XMM2,%XMM2 |
(385) 0x42a2d7 VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
(385) 0x42a2dc VADDSD %XMM3,%XMM2,%XMM2 |
(385) 0x42a2e0 VADDSD %XMM2,%XMM6,%XMM6 |
(385) 0x42a2e4 MOV 0x28(%RSP),%RCX |
(385) 0x42a2e9 JMP 429b44 |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/refwrap.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: { return *_M_data; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/shared_ptr_base.h: 1308 - 1308 |
-------------------------------------------------------------------------------- |
1308: { return _M_ptr; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/optional: 897 - 1213 |
-------------------------------------------------------------------------------- |
897: { return this->_M_payload._M_engaged; } |
[...] |
1213: return this->_M_is_engaged() |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/scratch_na/users/xoserete/qaas_runs/171-417-3180/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 229 - 260 |
-------------------------------------------------------------------------------- |
229: const real_type* restrict distArray = _distArray + iStart; |
[...] |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 806 - 1056 |
-------------------------------------------------------------------------------- |
806: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
933: return *(this->_M_impl._M_start + __n); |
[...] |
951: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::WaveFunction::eva[...] | stl_vector.h:806 | exec |
○ | qmcplusplus::NonLocalPP<double[...] | NonLocalPP.hpp:135 | exec |
○ | main.extracted.110 | NewTimer.h:249 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.27 |
CQA speedup if FP arith vectorized | 2.18 |
CQA speedup if fully vectorized | 14.08 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.18 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | refwrap.h:313-313,shared_ptr_base.h:1308-1308,optional:897-897,optional:1213-1213,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249,stl_vector.h:806-806,stl_vector.h:933-933,stl_vector.h:951-951,stl_vector.h:1056-1056 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.83 |
CQA cycles if no scalar integer | 4.33 |
CQA cycles if FP arith vectorized | 4.51 |
CQA cycles if fully vectorized | 0.70 |
Front-end cycles | 9.83 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 5.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 8.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.83 |
Stall cycles (UFS) | 1.44 |
Nb insns | 55.00 |
Nb uops | 59.00 |
Nb loads | 25.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.80 |
Bytes prefetched | 0.00 |
Bytes loaded | 229.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 24.00 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 15.81 |
Vector-efficiency ratio load | 19.87 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.45 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.27 |
CQA speedup if FP arith vectorized | 2.18 |
CQA speedup if fully vectorized | 14.08 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.18 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | refwrap.h:313-313,shared_ptr_base.h:1308-1308,optional:897-897,optional:1213-1213,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249,stl_vector.h:806-806,stl_vector.h:933-933,stl_vector.h:951-951,stl_vector.h:1056-1056 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.83 |
CQA cycles if no scalar integer | 4.33 |
CQA cycles if FP arith vectorized | 4.51 |
CQA cycles if fully vectorized | 0.70 |
Front-end cycles | 9.83 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 5.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 8.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.83 |
Stall cycles (UFS) | 1.44 |
Nb insns | 55.00 |
Nb uops | 59.00 |
Nb loads | 25.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.80 |
Bytes prefetched | 0.00 |
Bytes loaded | 229.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 24.00 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 15.81 |
Vector-efficiency ratio load | 19.87 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.45 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 55 |
nb uops | 59 |
loop length | 275 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
cycles | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.83 |
Stall cycles | 1.44 |
LM full (events) | 4.07 |
Front-end | 9.83 |
Dispatch | 8.33 |
Overall L1 | 9.83 |
all | 22% |
load | 40% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 24% |
load | 28% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 15% |
load | 22% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 15% |
load | 19% |
store | 12% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x50(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 529610 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42a300 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42a340 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x9c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%RCX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 47a890 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 429a00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0x115cbe(%RIP),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0x115cd4(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 429b55 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 55 |
nb uops | 59 |
loop length | 275 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
cycles | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.83 |
Stall cycles | 1.44 |
LM full (events) | 4.07 |
Front-end | 9.83 |
Dispatch | 8.33 |
Overall L1 | 9.83 |
all | 22% |
load | 40% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 24% |
load | 28% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 15% |
load | 22% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 15% |
load | 19% |
store | 12% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x50(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 529610 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42a300 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42a340 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x9c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%RCX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 47a890 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 429a00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0x115cbe(%RIP),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0x115cd4(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 429b55 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |