Loop Id: 392 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 392 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x42ab00 VXORPD %XMM0,%XMM0,%XMM0 |
0x42ab04 VMOVSD 0x40(%RSP),%XMM1 |
0x42ab0a VSUBSD %XMM0,%XMM1,%XMM0 |
0x42ab0e VZEROUPPER |
0x42ab11 CALL 51aab0 <exp> |
0x42ab16 MOV 0x38(%RSP),%RCX |
0x42ab1b MOV (%RCX),%RAX |
0x42ab1e MOV 0x10(%RSP),%RDX |
0x42ab23 VMOVSD %XMM0,(%RAX,%RDX,8) |
0x42ab28 INC %RDX |
0x42ab2b MOV 0x8(%RCX),%RCX |
0x42ab2f SUB %RAX,%RCX |
0x42ab32 SAR $0x3,%RCX |
0x42ab36 MOV %RDX,0x10(%RSP) |
0x42ab3b CMP %RDX,%RCX |
0x42ab3e MOV 0x30(%RSP),%RCX |
0x42ab43 MOV 0x20(%RSP),%R8 |
0x42ab48 JBE 42b400 |
0x42ab4e CMPB $0,0x298(%RCX) |
0x42ab55 JE 42b440 |
0x42ab5b MOVSXD 0x2a0(%RCX),%R14 |
0x42ab62 MOV 0xd8(%R8),%RAX |
0x42ab69 VMOVSD (%RAX,%R14,8),%XMM0 |
0x42ab6f VMOVSD %XMM0,0x40(%RSP) |
0x42ab75 MOV 0x290(%RCX),%RBX |
0x42ab7c MOV 0x248(%R8),%ESI |
0x42ab83 MOV %RCX,%RDI |
0x42ab86 VZEROUPPER |
0x42ab89 CALL 47b090 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x42ab8e MOV 0x20(%RSP),%RDI |
0x42ab93 MOV 0xa0(%RDI),%RCX |
0x42ab9a TEST %RCX,%RCX |
0x42ab9d JE 42ab00 |
0x42aba3 MOV 0x48(%RAX),%RAX |
0x42aba7 MOV 0x10(%RSP),%RDX |
0x42abac LEA (%RDX,%RDX,4),%RDX |
0x42abb0 MOV 0x18(%RAX,%RDX,8),%RAX |
0x42abb5 MOV %RAX,0x28(%RSP) |
0x42abba MOV 0x18(%RBX),%RDX |
0x42abbe MOV (%RDX,%R14,4),%ESI |
0x42abc2 IMUL %ECX,%ESI |
0x42abc5 MOV 0x1d0(%RDI),%RDX |
0x42abcc MOV 0x200(%RDI),%RAX |
0x42abd3 MOV %RAX,0x50(%RSP) |
0x42abd8 MOVSXD %ESI,%RAX |
0x42abdb MOV %RAX,0x48(%RSP) |
0x42abe0 MOV 0x268(%RBX),%RAX |
0x42abe7 MOV %RAX,0x58(%RSP) |
0x42abec DEC %RCX |
0x42abef VPBROADCASTD %R14D,%YMM1 |
0x42abf5 VXORPD %XMM0,%XMM0,%XMM0 |
0x42abf9 XOR %R9D,%R9D |
0x42abfc VMOVDQU64 0x1047da(%RIP),%YMM21 |
0x42ac06 VMOVDQU64 0x1047f0(%RIP),%YMM22 |
0x42ac10 MOV %RCX,0x18(%RSP) |
0x42ac15 JMP 42ac56 |
(393) 0x42ac40 VXORPD %XMM11,%XMM11,%XMM11 |
(393) 0x42ac45 VADDSD %XMM0,%XMM11,%XMM0 |
(393) 0x42ac49 CMP %RCX,%R9 |
(393) 0x42ac4c LEA 0x1(%R9),%R9 |
(393) 0x42ac50 JE 42ab04 |
(393) 0x42ac56 MOV 0x58(%RSP),%RAX |
(393) 0x42ac5b MOV 0x18(%RAX),%RSI |
(393) 0x42ac5f MOV (%RSI,%R9,4),%R13 |
(393) 0x42ac63 MOV %R13,%R15 |
(393) 0x42ac66 SHR $0x20,%R15 |
(393) 0x42ac6a SUB %R13D,%R15D |
(393) 0x42ac6d TEST %R15D,%R15D |
(393) 0x42ac70 JLE 42ac40 |
(393) 0x42ac72 MOV %R14,%RCX |
(393) 0x42ac75 MOV 0x48(%RSP),%RAX |
(393) 0x42ac7a LEA (%R9,%RAX,1),%RSI |
(393) 0x42ac7e MOV 0x50(%RSP),%RAX |
(393) 0x42ac83 MOV (%RAX,%RSI,8),%R11 |
(393) 0x42ac87 VMOVSD 0x8(%R11),%XMM2 |
(393) 0x42ac8d MOV %R15D,%R15D |
(393) 0x42ac90 MOV %R15,%R10 |
(393) 0x42ac93 MOVSXD %R13D,%R12 |
(393) 0x42ac96 MOV $-0x10,%EAX |
(393) 0x42ac9b AND %RAX,%R10 |
(393) 0x42ac9e JE 42b100 |
(393) 0x42aca4 VMOVQ %R13,%XMM3 |
(393) 0x42aca9 VBROADCASTSD %XMM2,%YMM4 |
(393) 0x42acae MOV 0x28(%RSP),%RAX |
(393) 0x42acb3 LEA (%RAX,%R12,8),%RBX |
(393) 0x42acb7 XOR %R14D,%R14D |
(393) 0x42acba XOR %ESI,%ESI |
(393) 0x42acbc JMP 42acdb |
(396) 0x42acc0 KMOVD %K1,%EAX |
(396) 0x42acc4 MOVZX %AX,%EAX |
(396) 0x42acc7 POPCNT %EAX,%EAX |
(396) 0x42accb ADD %EAX,%R14D |
(396) 0x42acce ADD $0x10,%RSI |
(396) 0x42acd2 CMP %R10,%RSI |
(396) 0x42acd5 JAE 42ae00 |
(396) 0x42acdb VMOVUPD (%RBX,%RSI,8),%YMM8 |
(396) 0x42ace0 VMOVUPD 0x20(%RBX,%RSI,8),%YMM6 |
(396) 0x42ace6 VMOVUPD 0x40(%RBX,%RSI,8),%YMM7 |
(396) 0x42acec VMOVUPD 0x60(%RBX,%RSI,8),%YMM5 |
(396) 0x42acf2 VMOVD %ESI,%XMM9 |
(396) 0x42acf6 VPADDD %XMM3,%XMM9,%XMM9 |
(396) 0x42acfa VPBROADCASTD %XMM9,%YMM9 |
(396) 0x42acff VPADDD %YMM21,%YMM9,%YMM10 |
(396) 0x42ad05 VPADDD %YMM22,%YMM9,%YMM9 |
(396) 0x42ad0b VPCMPNEQD %YMM1,%YMM9,%K0 |
(396) 0x42ad12 VPCMPNEQD %YMM1,%YMM10,%K1 |
(396) 0x42ad19 KUNPCKBW %K0,%K1,%K0 |
(396) 0x42ad1d VCMPPD $0x1,%YMM4,%YMM8,%K1 |
(396) 0x42ad24 VCMPPD $0x1,%YMM4,%YMM6,%K2 |
(396) 0x42ad2b KSHIFTLB $0x4,%K2,%K2 |
(396) 0x42ad31 KORB %K2,%K1,%K1 |
(396) 0x42ad35 VCMPPD $0x1,%YMM4,%YMM7,%K2 |
(396) 0x42ad3c VCMPPD $0x1,%YMM4,%YMM5,%K3 |
(396) 0x42ad43 KSHIFTLB $0x4,%K3,%K3 |
(396) 0x42ad49 KORB %K3,%K2,%K2 |
(396) 0x42ad4d KUNPCKBW %K1,%K2,%K1 |
(396) 0x42ad51 KANDW %K0,%K1,%K1 |
(396) 0x42ad55 KORTESTW %K1,%K1 |
(396) 0x42ad59 JE 42acc0 |
(396) 0x42ad5f MOVSXD %R14D,%RDI |
(396) 0x42ad62 LEA (%RDX,%RDI,8),%RAX |
(396) 0x42ad66 VCOMPRESSPD %YMM8,(%RDX,%RDI,8){%K1} |
(396) 0x42ad6d KSHIFTRW $0x8,%K1,%K2 |
(396) 0x42ad73 KMOVB %K1,%EDI |
(396) 0x42ad77 POPCNT %EDI,%EDI |
(396) 0x42ad7b VCOMPRESSPD %YMM7,(%RAX,%RDI,8){%K2} |
(396) 0x42ad82 KSHIFTLB $0x4,%K1,%K0 |
(396) 0x42ad88 KSHIFTRB $0x4,%K0,%K0 |
(396) 0x42ad8e KMOVB %K0,0xf(%RSP) |
(396) 0x42ad94 KSHIFTRB $0x4,%K1,%K3 |
(396) 0x42ad9a MOVZX 0xf(%RSP),%R8D |
(396) 0x42ada0 POPCNT %R8D,%R8D |
(396) 0x42ada5 VCOMPRESSPD %YMM6,(%RAX,%R8,8){%K3} |
(396) 0x42adac KSHIFTLB $0x4,%K2,%K0 |
(396) 0x42adb2 KSHIFTRB $0x4,%K0,%K0 |
(396) 0x42adb8 KMOVB %K0,0xe(%RSP) |
(396) 0x42adbe LEA (%RAX,%RDI,8),%RAX |
(396) 0x42adc2 KSHIFTRB $0x4,%K2,%K2 |
(396) 0x42adc8 MOVZX 0xe(%RSP),%EDI |
(396) 0x42adcd POPCNT %EDI,%EDI |
(396) 0x42add1 VCOMPRESSPD %YMM5,(%RAX,%RDI,8){%K2} |
(396) 0x42add8 JMP 42acc0 |
(393) 0x42ae00 CMP %R15,%R10 |
(393) 0x42ae03 JNE 42b106 |
(393) 0x42ae09 TEST %R14D,%R14D |
(393) 0x42ae0c JLE 42b180 |
(393) 0x42ae12 VMOVSD 0x238(%R11),%XMM19 |
(393) 0x42ae19 MOV 0x218(%R11),%R10 |
(393) 0x42ae20 VMOVSD 0x18(%R11),%XMM16 |
(393) 0x42ae27 VMOVSD 0x20(%R11),%XMM17 |
(393) 0x42ae2e VMOVSD 0x28(%R11),%XMM8 |
(393) 0x42ae34 VMOVSD 0x30(%R11),%XMM18 |
(393) 0x42ae3b VMOVSD 0x38(%R11),%XMM9 |
(393) 0x42ae41 VMOVSD 0x40(%R11),%XMM10 |
(393) 0x42ae47 VMOVSD 0x48(%R11),%XMM13 |
(393) 0x42ae4d VMOVSD 0x50(%R11),%XMM15 |
(393) 0x42ae53 VMOVSD 0x58(%R11),%XMM29 |
(393) 0x42ae5a VMOVSD 0x60(%R11),%XMM30 |
(393) 0x42ae61 VMOVSD 0x68(%R11),%XMM31 |
(393) 0x42ae68 VMOVSD 0x70(%R11),%XMM2 |
(393) 0x42ae6e VMOVSD 0x78(%R11),%XMM3 |
(393) 0x42ae74 VMOVSD 0x80(%R11),%XMM4 |
(393) 0x42ae7d VMOVSD 0x88(%R11),%XMM5 |
(393) 0x42ae86 VMOVSD 0x90(%R11),%XMM6 |
(393) 0x42ae8f MOV %R14D,%R14D |
(393) 0x42ae92 MOV %R14,%R11 |
(393) 0x42ae95 VPBROADCASTQ %R14,%YMM20 |
(393) 0x42ae9b MOV $-0x4,%EAX |
(393) 0x42aea0 AND %RAX,%R11 |
(393) 0x42aea3 VMOVUPD %XMM3,0x110(%RSP) |
(393) 0x42aeac VMOVUPD %XMM4,0x100(%RSP) |
(393) 0x42aeb5 VMOVUPD %XMM5,0xf0(%RSP) |
(393) 0x42aebe VMOVUPD %XMM6,0xe0(%RSP) |
(393) 0x42aec7 VMOVUPD %XMM2,0xd0(%RSP) |
(393) 0x42aed0 JE 42b1c0 |
(393) 0x42aed6 VBROADCASTSD %XMM19,%YMM11 |
(393) 0x42aedc VBROADCASTSD %XMM16,%YMM21 |
(393) 0x42aee2 VBROADCASTSD %XMM17,%YMM22 |
(393) 0x42aee8 VMOVUPD %XMM8,0x90(%RSP) |
(393) 0x42aef1 VBROADCASTSD %XMM8,%YMM23 |
(393) 0x42aef7 VBROADCASTSD %XMM18,%YMM24 |
(393) 0x42aefd VMOVUPD %XMM9,0x80(%RSP) |
(393) 0x42af06 VBROADCASTSD %XMM9,%YMM25 |
(393) 0x42af0c VMOVUPD %XMM10,0x70(%RSP) |
(393) 0x42af12 VBROADCASTSD %XMM10,%YMM26 |
(393) 0x42af18 VMOVUPD %XMM13,0x60(%RSP) |
(393) 0x42af1e VBROADCASTSD %XMM13,%YMM27 |
(393) 0x42af24 VBROADCASTSD %XMM15,%YMM28 |
(393) 0x42af2a VMOVUPD %XMM29,0xc0(%RSP) |
(393) 0x42af32 VBROADCASTSD %XMM29,%YMM29 |
(393) 0x42af38 VMOVUPD %XMM30,0xb0(%RSP) |
(393) 0x42af40 VBROADCASTSD %XMM30,%YMM30 |
(393) 0x42af46 VMOVUPD %XMM31,0xa0(%RSP) |
(393) 0x42af4e VBROADCASTSD %XMM31,%YMM31 |
(393) 0x42af54 VBROADCASTSD %XMM2,%YMM2 |
(393) 0x42af59 VBROADCASTSD %XMM3,%YMM3 |
(393) 0x42af5e VBROADCASTSD %XMM4,%YMM4 |
(393) 0x42af63 VBROADCASTSD %XMM5,%YMM5 |
(393) 0x42af68 VBROADCASTSD %XMM6,%YMM6 |
(393) 0x42af6d VXORPD %XMM7,%XMM7,%XMM7 |
(393) 0x42af71 XOR %ESI,%ESI |
(393) 0x42af73 NOPW %CS:(%RAX,%RAX,1) |
(394) 0x42af80 VMULPD (%RDX,%RSI,8),%YMM11,%YMM8 |
(394) 0x42af85 VCVTTPD2DQ %YMM8,%XMM9 |
(394) 0x42af8a KXNORW %K0,%K0,%K1 |
(394) 0x42af8e VXORPD %XMM10,%XMM10,%XMM10 |
(394) 0x42af93 VGATHERDPD (%R10,%XMM9,8),%YMM10{%K1} |
(394) 0x42af9a KXNORW %K0,%K0,%K1 |
(394) 0x42af9e VXORPD %XMM12,%XMM12,%XMM12 |
(394) 0x42afa3 VGATHERDPD 0x8(%R10,%XMM9,8),%YMM12{%K1} |
(394) 0x42afab KXNORW %K0,%K0,%K1 |
(394) 0x42afaf VXORPD %XMM13,%XMM13,%XMM13 |
(394) 0x42afb4 VGATHERDPD 0x10(%R10,%XMM9,8),%YMM13{%K1} |
(394) 0x42afbc VROUNDPD $0xb,%YMM8,%YMM14 |
(394) 0x42afc2 VSUBPD %YMM14,%YMM8,%YMM8 |
(394) 0x42afc7 KXNORW %K0,%K0,%K1 |
(394) 0x42afcb VXORPD %XMM14,%XMM14,%XMM14 |
(394) 0x42afd0 VGATHERDPD 0x18(%R10,%XMM9,8),%YMM14{%K1} |
(394) 0x42afd8 VMOVAPD %YMM8,%YMM9 |
(394) 0x42afdd VFMADD213PD %YMM22,%YMM21,%YMM9 |
(394) 0x42afe3 VFMADD213PD %YMM23,%YMM8,%YMM9 |
(394) 0x42afe9 VFMADD213PD %YMM24,%YMM8,%YMM9 |
(394) 0x42afef VFMADD213PD %YMM7,%YMM10,%YMM9 |
(394) 0x42aff4 VMOVAPD %YMM8,%YMM7 |
(394) 0x42aff8 VFMADD213PD %YMM26,%YMM25,%YMM7 |
(394) 0x42affe VFMADD213PD %YMM27,%YMM8,%YMM7 |
(394) 0x42b004 VFMADD213PD %YMM28,%YMM8,%YMM7 |
(394) 0x42b00a VFMADD213PD %YMM9,%YMM12,%YMM7 |
(394) 0x42b00f VMOVAPD %YMM8,%YMM9 |
(394) 0x42b014 VFMADD213PD %YMM30,%YMM29,%YMM9 |
(394) 0x42b01a VFMADD213PD %YMM31,%YMM8,%YMM9 |
(394) 0x42b020 VFMADD213PD %YMM2,%YMM8,%YMM9 |
(394) 0x42b025 VFMADD213PD %YMM7,%YMM13,%YMM9 |
(394) 0x42b02a VMOVAPD %YMM8,%YMM7 |
(394) 0x42b02e VFMADD213PD %YMM4,%YMM3,%YMM7 |
(394) 0x42b033 VFMADD213PD %YMM5,%YMM8,%YMM7 |
(394) 0x42b038 VFMADD213PD %YMM6,%YMM8,%YMM7 |
(394) 0x42b03d VFMADD213PD %YMM9,%YMM14,%YMM7 |
(394) 0x42b042 ADD $0x4,%RSI |
(394) 0x42b046 CMP %R11,%RSI |
(394) 0x42b049 JB 42af80 |
(393) 0x42b04f VEXTRACTF128 $0x1,%YMM7,%XMM2 |
(393) 0x42b055 VADDPD %XMM2,%XMM7,%XMM2 |
(393) 0x42b059 VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
(393) 0x42b05e VADDSD %XMM3,%XMM2,%XMM11 |
(393) 0x42b062 CMP %R14,%R11 |
(393) 0x42b065 VMOVDQU64 0x104371(%RIP),%YMM21 |
(393) 0x42b06f VMOVDQU64 0x104387(%RIP),%YMM22 |
(393) 0x42b079 MOV %RCX,%R14 |
(393) 0x42b07c MOV 0x18(%RSP),%RCX |
(393) 0x42b081 VMOVUPD 0xc0(%RSP),%XMM29 |
(393) 0x42b089 VMOVUPD 0xb0(%RSP),%XMM30 |
(393) 0x42b091 VMOVUPD 0xa0(%RSP),%XMM31 |
(393) 0x42b099 VMOVUPD 0x90(%RSP),%XMM8 |
(393) 0x42b0a2 VMOVUPD 0x80(%RSP),%XMM9 |
(393) 0x42b0ab VMOVUPD 0x70(%RSP),%XMM10 |
(393) 0x42b0b1 VMOVUPD 0x60(%RSP),%XMM13 |
(393) 0x42b0b7 JE 42ac45 |
(393) 0x42b0bd JMP 42b1d0 |
(393) 0x42b100 XOR %R10D,%R10D |
(393) 0x42b103 XOR %R14D,%R14D |
(393) 0x42b106 MOV %ECX,%ESI |
(393) 0x42b108 SUB %R13D,%ESI |
(393) 0x42b10b MOV 0x28(%RSP),%RAX |
(393) 0x42b110 LEA (%RAX,%R12,8),%RBX |
(393) 0x42b114 JMP 42b14c |
(395) 0x42b140 INC %R10 |
(395) 0x42b143 CMP %R10,%R15 |
(395) 0x42b146 JE 42ae09 |
(395) 0x42b14c VMOVSD (%RBX,%R10,8),%XMM3 |
(395) 0x42b152 VUCOMISD %XMM3,%XMM2 |
(395) 0x42b156 JBE 42b140 |
(395) 0x42b158 CMP %R10D,%ESI |
(395) 0x42b15b JE 42b140 |
(395) 0x42b15d MOVSXD %R14D,%R14 |
(395) 0x42b160 VMOVSD %XMM3,(%RDX,%R14,8) |
(395) 0x42b166 INC %R14D |
(395) 0x42b169 JMP 42b140 |
(393) 0x42b180 VXORPD %XMM11,%XMM11,%XMM11 |
(393) 0x42b185 MOV %RCX,%R14 |
(393) 0x42b188 MOV 0x18(%RSP),%RCX |
(393) 0x42b18d JMP 42ac45 |
(393) 0x42b1c0 VXORPD %XMM11,%XMM11,%XMM11 |
(393) 0x42b1c5 XOR %R11D,%R11D |
(393) 0x42b1c8 MOV %RCX,%R14 |
(393) 0x42b1cb MOV 0x18(%RSP),%RCX |
(393) 0x42b1d0 VPBROADCASTQ %R11,%YMM2 |
(393) 0x42b1d6 VPSUBQ %YMM2,%YMM20,%YMM2 |
(393) 0x42b1dc VPCMPNLEUQ 0x1041d9(%RIP),%YMM2,%K1 |
(393) 0x42b1e7 KORTESTB %K1,%K1 |
(393) 0x42b1eb JE 42b3c0 |
(393) 0x42b1f1 VMOVUPD (%RDX,%R11,8),%YMM2{%K1}{z} |
(393) 0x42b1f8 VMOVUPD 0x120(%RSP),%YMM3 |
(393) 0x42b201 VMOVAPD %YMM2,%YMM3{%K1} |
(393) 0x42b207 VBROADCASTSD %XMM19,%YMM2 |
(393) 0x42b20d VMOVUPD %YMM3,0x120(%RSP) |
(393) 0x42b216 VMULPD %YMM3,%YMM2,%YMM2 |
(393) 0x42b21a VCVTTPD2DQ %YMM2,%XMM3 |
(393) 0x42b21e VROUNDPD $0xb,%YMM2,%YMM4 |
(393) 0x42b224 VSUBPD %YMM4,%YMM2,%YMM2 |
(393) 0x42b228 KMOVQ %K1,%K2 |
(393) 0x42b22d VXORPD %XMM4,%XMM4,%XMM4 |
(393) 0x42b231 VGATHERDPD (%R10,%XMM3,8),%YMM4{%K2} |
(393) 0x42b238 VBROADCASTSD %XMM16,%YMM5 |
(393) 0x42b23e VBROADCASTSD %XMM17,%YMM6 |
(393) 0x42b244 VBROADCASTSD %XMM18,%YMM7 |
(393) 0x42b24a VMOVUPD 0x140(%RSP),%YMM20 |
(393) 0x42b252 VMOVAPD %YMM4,%YMM20{%K1} |
(393) 0x42b258 KMOVQ %K1,%K2 |
(393) 0x42b25d VXORPD %XMM4,%XMM4,%XMM4 |
(393) 0x42b261 VGATHERDPD 0x8(%R10,%XMM3,8),%YMM4{%K2} |
(393) 0x42b269 VBROADCASTSD %XMM8,%YMM8 |
(393) 0x42b26e VBROADCASTSD %XMM9,%YMM9 |
(393) 0x42b273 VBROADCASTSD %XMM10,%YMM10 |
(393) 0x42b278 VBROADCASTSD %XMM15,%YMM12 |
(393) 0x42b27d VBROADCASTSD %XMM13,%YMM13 |
(393) 0x42b282 VMOVUPD 0x160(%RSP),%YMM14 |
(393) 0x42b28b VMOVAPD %YMM4,%YMM14{%K1} |
(393) 0x42b291 VFMADD231PD %YMM9,%YMM2,%YMM10 |
(393) 0x42b296 VFMADD213PD %YMM13,%YMM2,%YMM10 |
(393) 0x42b29b VFMADD213PD %YMM12,%YMM2,%YMM10 |
(393) 0x42b2a0 VMOVUPD %YMM14,0x160(%RSP) |
(393) 0x42b2a9 VMULPD %YMM14,%YMM10,%YMM4 |
(393) 0x42b2ae KMOVQ %K1,%K2 |
(393) 0x42b2b3 VXORPD %XMM9,%XMM9,%XMM9 |
(393) 0x42b2b8 VGATHERDPD 0x10(%R10,%XMM3,8),%YMM9{%K2} |
(393) 0x42b2c0 VBROADCASTSD %XMM29,%YMM10 |
(393) 0x42b2c6 VBROADCASTSD %XMM30,%YMM12 |
(393) 0x42b2cc VBROADCASTSD 0xd0(%RSP),%YMM13 |
(393) 0x42b2d6 VBROADCASTSD %XMM31,%YMM14 |
(393) 0x42b2dc KMOVQ %K1,%K2 |
(393) 0x42b2e1 VXORPD %XMM15,%XMM15,%XMM15 |
(393) 0x42b2e6 VGATHERDPD 0x18(%R10,%XMM3,8),%YMM15{%K2} |
(393) 0x42b2ee VMOVUPD 0x180(%RSP),%YMM19 |
(393) 0x42b2f6 VMOVAPD %YMM9,%YMM19{%K1} |
(393) 0x42b2fc VBROADCASTSD 0x110(%RSP),%YMM9 |
(393) 0x42b306 VBROADCASTSD 0x100(%RSP),%YMM3 |
(393) 0x42b310 VBROADCASTSD 0xe0(%RSP),%YMM16 |
(393) 0x42b318 VBROADCASTSD 0xf0(%RSP),%YMM17 |
(393) 0x42b320 VMOVUPD 0x1a0(%RSP),%YMM18 |
(393) 0x42b328 VMOVAPD %YMM15,%YMM18{%K1} |
(393) 0x42b32e VFMADD231PD %YMM5,%YMM2,%YMM6 |
(393) 0x42b333 VFMADD213PD %YMM8,%YMM2,%YMM6 |
(393) 0x42b338 VFMADD213PD %YMM7,%YMM2,%YMM6 |
(393) 0x42b33d VMOVUPD %YMM20,0x140(%RSP) |
(393) 0x42b345 VFMADD213PD %YMM4,%YMM20,%YMM6 |
(393) 0x42b34b VFMADD231PD %YMM10,%YMM2,%YMM12 |
(393) 0x42b350 VFMADD213PD %YMM14,%YMM2,%YMM12 |
(393) 0x42b355 VFMADD213PD %YMM13,%YMM2,%YMM12 |
(393) 0x42b35a VMOVUPD %YMM19,0x180(%RSP) |
(393) 0x42b362 VFMADD213PD %YMM6,%YMM19,%YMM12 |
(393) 0x42b368 VFMADD231PD %YMM9,%YMM2,%YMM3 |
(393) 0x42b36d VFMADD213PD %YMM17,%YMM2,%YMM3 |
(393) 0x42b373 VFMADD213PD %YMM16,%YMM2,%YMM3 |
(393) 0x42b379 VMOVUPD %YMM18,0x1a0(%RSP) |
(393) 0x42b381 VFMADD213PD %YMM12,%YMM18,%YMM3 |
(393) 0x42b387 JMP 42b3c4 |
(393) 0x42b3c0 VXORPD %XMM3,%XMM3,%XMM3 |
(393) 0x42b3c4 VMOVAPD %YMM3,%YMM2{%K1}{z} |
(393) 0x42b3ca VEXTRACTF128 $0x1,%YMM2,%XMM3 |
(393) 0x42b3d0 VADDPD %XMM3,%XMM2,%XMM2 |
(393) 0x42b3d4 VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
(393) 0x42b3d9 VADDSD %XMM3,%XMM2,%XMM2 |
(393) 0x42b3dd VADDSD %XMM2,%XMM11,%XMM11 |
(393) 0x42b3e1 JMP 42ac45 |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/optional: 433 - 950 |
-------------------------------------------------------------------------------- |
433: { return static_cast<const _Dp*>(this)->_M_payload._M_engaged; } |
[...] |
950: if (this->_M_is_engaged()) |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/refwrap.h: 338 - 338 |
-------------------------------------------------------------------------------- |
338: { return *_M_data; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.31 |
CQA speedup if FP arith vectorized | 2.21 |
CQA speedup if fully vectorized | 14.28 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,shared_ptr_base.h:1296-1296,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.00 |
CQA cycles if no scalar integer | 4.33 |
CQA cycles if FP arith vectorized | 4.53 |
CQA cycles if fully vectorized | 0.70 |
Front-end cycles | 10.00 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 5.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 8.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.90 |
Stall cycles (UFS) | 1.34 |
Nb insns | 56.00 |
Nb uops | 60.00 |
Nb loads | 25.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 229.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 24.00 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 15.81 |
Vector-efficiency ratio load | 19.87 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.45 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.31 |
CQA speedup if FP arith vectorized | 2.21 |
CQA speedup if fully vectorized | 14.28 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,shared_ptr_base.h:1296-1296,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.00 |
CQA cycles if no scalar integer | 4.33 |
CQA cycles if FP arith vectorized | 4.53 |
CQA cycles if fully vectorized | 0.70 |
Front-end cycles | 10.00 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 5.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 8.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.90 |
Stall cycles (UFS) | 1.34 |
Nb insns | 56.00 |
Nb uops | 60.00 |
Nb loads | 25.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 29.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 229.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 24.00 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 15.81 |
Vector-efficiency ratio load | 19.87 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.45 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 56 |
nb uops | 60 |
loop length | 279 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
cycles | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.90 |
Stall cycles | 1.34 |
LM full (events) | 4.04 |
Front-end | 10.00 |
Dispatch | 8.33 |
Overall L1 | 10.00 |
all | 22% |
load | 40% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 24% |
load | 28% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 15% |
load | 22% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 15% |
load | 19% |
store | 12% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 51aab0 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42b400 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42b440 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x9c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R8),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 47b090 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 42ab00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0x1047da(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0x1047f0(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42ac56 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1d6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 56 |
nb uops | 60 |
loop length | 279 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
cycles | 2.40 | 2.50 | 8.33 | 8.33 | 5.00 | 2.30 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 8.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.90 |
Stall cycles | 1.34 |
LM full (events) | 4.04 |
Front-end | 10.00 |
Dispatch | 8.33 |
Overall L1 | 10.00 |
all | 22% |
load | 40% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 24% |
load | 28% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 15% |
load | 22% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 15% |
load | 19% |
store | 12% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 51aab0 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42b400 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42b440 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x9c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R8),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 47b090 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 42ab00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0x1047da(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0x1047f0(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42ac56 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1d6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |