Loop Id: 396 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 396 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x420f80 VXORPD %XMM0,%XMM0,%XMM0 |
0x420f84 VMOVUPD 0x60(%RSP),%YMM31 |
0x420f8c VMOVUPD %YMM31,0x60(%RSP) |
0x420f94 VMOVSD 0x38(%RSP),%XMM1 |
0x420f9a VSUBSD %XMM0,%XMM1,%XMM0 |
0x420f9e VZEROUPPER |
0x420fa1 CALL 4d6ad0 <exp> |
0x420fa6 MOV 0x30(%RSP),%RCX |
0x420fab MOV (%RCX),%RAX |
0x420fae MOV 0x10(%RSP),%RDX |
0x420fb3 VMOVSD %XMM0,(%RAX,%RDX,8) |
0x420fb8 INC %RDX |
0x420fbb MOV 0x8(%RCX),%RCX |
0x420fbf SUB %RAX,%RCX |
0x420fc2 SAR $0x3,%RCX |
0x420fc6 MOV %RDX,0x10(%RSP) |
0x420fcb CMP %RDX,%RCX |
0x420fce MOV 0x28(%RSP),%RCX |
0x420fd3 MOV 0x18(%RSP),%R8 |
0x420fd8 JBE 42165e |
0x420fde CMPB $0,0x298(%RCX) |
0x420fe5 JE 42166d |
0x420feb MOVSXD 0x2a0(%RCX),%R14 |
0x420ff2 MOV 0xd8(%R8),%RAX |
0x420ff9 VMOVSD (%RAX,%R14,8),%XMM0 |
0x420fff VMOVSD %XMM0,0x38(%RSP) |
0x421005 MOV 0x290(%RCX),%RBX |
0x42100c MOV 0x248(%R8),%ESI |
0x421013 MOV %RCX,%RDI |
0x421016 VZEROUPPER |
0x421019 CALL 45f8f0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x42101e MOV 0x18(%RSP),%RDI |
0x421023 MOV 0xa0(%RDI),%R9 |
0x42102a TEST %R9,%R9 |
0x42102d JE 420f80 |
0x421033 MOV 0x48(%RAX),%RAX |
0x421037 MOV 0x10(%RSP),%RDX |
0x42103c LEA (%RDX,%RDX,4),%RDX |
0x421040 MOV 0x18(%RAX,%RDX,8),%RAX |
0x421045 MOV %RAX,0x20(%RSP) |
0x42104a MOV 0x18(%RBX),%RDX |
0x42104e MOV (%RDX,%R14,4),%ESI |
0x421052 IMUL %R9D,%ESI |
0x421056 MOV 0x1d0(%RDI),%RDX |
0x42105d MOV 0x200(%RDI),%RAX |
0x421064 MOV %RAX,0x58(%RSP) |
0x421069 MOVSXD %ESI,%RAX |
0x42106c MOV %RAX,0x50(%RSP) |
0x421071 DEC %R9 |
0x421074 VPBROADCASTD %R14D,%YMM1 |
0x42107a VXORPD %XMM0,%XMM0,%XMM0 |
0x42107e XOR %R8D,%R8D |
0x421081 VMOVDQU64 0xd5335(%RIP),%YMM28 |
0x42108b VMOVDQU64 0xd534b(%RIP),%YMM29 |
0x421095 VMOVDQU64 0xd5361(%RIP),%YMM30 |
0x42109f VMOVUPD 0x60(%RSP),%YMM31 |
0x4210a7 MOV %RBX,%RCX |
0x4210aa MOV %R9,%RDI |
0x4210ad MOV %RBX,0x48(%RSP) |
0x4210b2 MOV %R9,0x40(%RSP) |
0x4210b7 JMP 4210dc |
(397) 0x4210c0 VXORPD %XMM21,%XMM21,%XMM21 |
(397) 0x4210c6 VADDSD %XMM0,%XMM21,%XMM0 |
(397) 0x4210cc LEA 0x1(%R8),%RAX |
(397) 0x4210d0 CMP %RDI,%R8 |
(397) 0x4210d3 MOV %RAX,%R8 |
(397) 0x4210d6 JE 420f8c |
(397) 0x4210dc MOV 0x268(%RCX),%RSI |
(397) 0x4210e3 MOV 0x18(%RSI),%RSI |
(397) 0x4210e7 MOV (%RSI,%R8,4),%R13 |
(397) 0x4210eb MOV %R13,%R15 |
(397) 0x4210ee SHR $0x20,%R15 |
(397) 0x4210f2 SUB %R13D,%R15D |
(397) 0x4210f5 TEST %R15D,%R15D |
(397) 0x4210f8 JLE 4210c0 |
(397) 0x4210fa MOV 0x50(%RSP),%RAX |
(397) 0x4210ff LEA (%R8,%RAX,1),%RSI |
(397) 0x421103 MOV 0x58(%RSP),%RAX |
(397) 0x421108 MOV (%RAX,%RSI,8),%R9 |
(397) 0x42110c VMOVSD 0x8(%R9),%XMM2 |
(397) 0x421112 MOV %R15D,%R15D |
(397) 0x421115 MOV %R15,%R11 |
(397) 0x421118 MOVSXD %R13D,%R12 |
(397) 0x42111b MOV $-0x10,%EAX |
(397) 0x421120 AND %RAX,%R11 |
(397) 0x421123 JE 421490 |
(397) 0x421129 MOV %R14,%RCX |
(397) 0x42112c VMOVQ %R13,%XMM3 |
(397) 0x421131 VBROADCASTSD %XMM2,%YMM4 |
(397) 0x421136 MOV 0x20(%RSP),%RAX |
(397) 0x42113b LEA (%RAX,%R12,8),%RBX |
(397) 0x42113f XOR %R10D,%R10D |
(397) 0x421142 XOR %ESI,%ESI |
(397) 0x421144 NOPW %CS:(%RAX,%RAX,1) |
(400) 0x421150 VMOVUPD (%RBX,%RSI,8),%YMM5 |
(400) 0x421155 VMOVUPD 0x20(%RBX,%RSI,8),%YMM6 |
(400) 0x42115b VMOVUPD 0x40(%RBX,%RSI,8),%YMM7 |
(400) 0x421161 VMOVUPD 0x60(%RBX,%RSI,8),%YMM8 |
(400) 0x421167 VMOVD %ESI,%XMM9 |
(400) 0x42116b VPADDD %XMM3,%XMM9,%XMM9 |
(400) 0x42116f VPBROADCASTD %XMM9,%YMM9 |
(400) 0x421174 VPADDD %YMM29,%YMM9,%YMM10 |
(400) 0x42117a VPADDD %YMM30,%YMM9,%YMM9 |
(400) 0x421180 VPCMPNEQD %YMM1,%YMM9,%K0 |
(400) 0x421187 VPCMPNEQD %YMM1,%YMM10,%K1 |
(400) 0x42118e VCMPPD $0x1,%YMM4,%YMM5,%K5 |
(400) 0x421195 VCMPPD $0x1,%YMM4,%YMM6,%K2 |
(400) 0x42119c KMOVW %K2,0x60(%RSP) |
(400) 0x4211a2 KSHIFTLB $0x4,%K2,%K3 |
(400) 0x4211a8 KORB %K3,%K5,%K3 |
(400) 0x4211ac VCMPPD $0x1,%YMM4,%YMM7,%K6 |
(400) 0x4211b3 VCMPPD $0x1,%YMM4,%YMM8,%K4 |
(400) 0x4211ba MOVSXD %R10D,%R10 |
(400) 0x4211bd LEA (%RDX,%R10,8),%RDI |
(400) 0x4211c1 KANDB %K0,%K3,%K7 |
(400) 0x4211c5 KMOVB %K7,%EAX |
(400) 0x4211c9 KANDW %K1,%K6,%K7 |
(400) 0x4211cd POPCNT %EAX,%EAX |
(400) 0x4211d1 VCOMPRESSPD %YMM7,(%RDI,%RAX,8){%K7} |
(400) 0x4211d8 KANDW %K0,%K5,%K5 |
(400) 0x4211dc VCOMPRESSPD %YMM5,(%RDX,%R10,8){%K5} |
(400) 0x4211e3 KSHIFTLB $0x4,%K4,%K2 |
(400) 0x4211e9 KORB %K2,%K6,%K2 |
(400) 0x4211ed KMOVB %K7,0xe(%RSP) |
(400) 0x4211f3 KSHIFTRB $0x4,%K1,%K6 |
(400) 0x4211f9 KANDW %K6,%K4,%K4 |
(400) 0x4211fd LEA (%RDI,%RAX,8),%RAX |
(400) 0x421201 MOVZX 0xe(%RSP),%R14D |
(400) 0x421207 POPCNT %R14D,%R14D |
(400) 0x42120c VCOMPRESSPD %YMM8,(%RAX,%R14,8){%K4} |
(400) 0x421213 KUNPCKBW %K3,%K2,%K2 |
(400) 0x421217 KMOVB %K5,0xf(%RSP) |
(400) 0x42121d KUNPCKBW %K0,%K1,%K1 |
(400) 0x421221 KSHIFTRB $0x4,%K0,%K0 |
(400) 0x421227 KMOVW 0x60(%RSP),%K3 |
(400) 0x42122d KANDW %K0,%K3,%K3 |
(400) 0x421231 MOVZX 0xf(%RSP),%EAX |
(400) 0x421236 POPCNT %EAX,%EAX |
(400) 0x42123a VCOMPRESSPD %YMM6,(%RDI,%RAX,8){%K3} |
(400) 0x421241 KANDW %K1,%K2,%K0 |
(400) 0x421245 KMOVW %K0,%EAX |
(400) 0x421249 POPCNT %EAX,%EAX |
(400) 0x42124d ADD %EAX,%R10D |
(400) 0x421250 ADD $0x10,%RSI |
(400) 0x421254 CMP %R11,%RSI |
(400) 0x421257 JB 421150 |
(397) 0x42125d CMP %R15,%R11 |
(397) 0x421260 MOV %RCX,%R14 |
(397) 0x421263 MOV 0x48(%RSP),%RCX |
(397) 0x421268 MOV 0x40(%RSP),%RDI |
(397) 0x42126d JNE 421496 |
(397) 0x421273 TEST %R10D,%R10D |
(397) 0x421276 JLE 4210c0 |
(397) 0x42127c VMOVSD 0x238(%R9),%XMM4 |
(397) 0x421285 MOV 0x218(%R9),%R11 |
(397) 0x42128c VMOVSD 0x18(%R9),%XMM21 |
(397) 0x421293 VMOVSD 0x20(%R9),%XMM2 |
(397) 0x421299 VMOVSD 0x28(%R9),%XMM6 |
(397) 0x42129f VMOVSD 0x30(%R9),%XMM5 |
(397) 0x4212a5 VMOVSD 0x38(%R9),%XMM17 |
(397) 0x4212ac VMOVSD 0x40(%R9),%XMM11 |
(397) 0x4212b2 VMOVSD 0x48(%R9),%XMM12 |
(397) 0x4212b8 VMOVSD 0x50(%R9),%XMM15 |
(397) 0x4212be VMOVSD 0x58(%R9),%XMM7 |
(397) 0x4212c4 VMOVSD 0x60(%R9),%XMM3 |
(397) 0x4212ca VMOVSD 0x68(%R9),%XMM10 |
(397) 0x4212d0 VMOVSD 0x70(%R9),%XMM9 |
(397) 0x4212d6 VMOVSD 0x78(%R9),%XMM13 |
(397) 0x4212dc VMOVSD 0x80(%R9),%XMM8 |
(397) 0x4212e5 VMOVSD 0x88(%R9),%XMM16 |
(397) 0x4212ec VMOVSD 0x90(%R9),%XMM18 |
(397) 0x4212f3 MOV %R10D,%R10D |
(397) 0x4212f6 MOV %R10,%R9 |
(397) 0x4212f9 VPBROADCASTQ %R10,%YMM19 |
(397) 0x4212ff VPBROADCASTQ %R11,%YMM14 |
(397) 0x421305 MOV $-0x4,%EAX |
(397) 0x42130a VBROADCASTSD %XMM4,%YMM20 |
(397) 0x421310 VBROADCASTSD %XMM21,%YMM4 |
(397) 0x421316 VBROADCASTSD %XMM2,%YMM2 |
(397) 0x42131b VBROADCASTSD %XMM6,%YMM6 |
(397) 0x421320 VBROADCASTSD %XMM5,%YMM5 |
(397) 0x421325 VBROADCASTSD %XMM17,%YMM17 |
(397) 0x42132b VBROADCASTSD %XMM11,%YMM11 |
(397) 0x421330 VBROADCASTSD %XMM12,%YMM12 |
(397) 0x421335 VBROADCASTSD %XMM15,%YMM15 |
(397) 0x42133a VBROADCASTSD %XMM7,%YMM7 |
(397) 0x42133f VBROADCASTSD %XMM3,%YMM3 |
(397) 0x421344 VBROADCASTSD %XMM10,%YMM10 |
(397) 0x421349 VBROADCASTSD %XMM9,%YMM9 |
(397) 0x42134e VBROADCASTSD %XMM13,%YMM13 |
(397) 0x421353 VBROADCASTSD %XMM8,%YMM8 |
(397) 0x421358 VBROADCASTSD %XMM16,%YMM16 |
(397) 0x42135e VBROADCASTSD %XMM18,%YMM18 |
(397) 0x421364 AND %RAX,%R9 |
(397) 0x421367 JE 4214db |
(397) 0x42136d VXORPD %XMM21,%XMM21,%XMM21 |
(397) 0x421373 XOR %ESI,%ESI |
(397) 0x421375 NOPW %CS:(%RAX,%RAX,1) |
(398) 0x421380 VMULPD (%RDX,%RSI,8),%YMM20,%YMM22 |
(398) 0x421387 VCVTTPD2DQ %YMM22,%XMM23 |
(398) 0x42138d VXORPD %XMM24,%XMM24,%XMM24 |
(398) 0x421393 KXNORW %K0,%K0,%K1 |
(398) 0x421397 VGATHERDPD (%R11,%XMM23,8),%YMM24{%K1} |
(398) 0x42139e VXORPD %XMM25,%XMM25,%XMM25 |
(398) 0x4213a4 KXNORW %K0,%K0,%K1 |
(398) 0x4213a8 VGATHERDPD 0x8(%R11,%XMM23,8),%YMM25{%K1} |
(398) 0x4213b0 VXORPD %XMM26,%XMM26,%XMM26 |
(398) 0x4213b6 KXNORW %K0,%K0,%K1 |
(398) 0x4213ba VGATHERDPD 0x10(%R11,%XMM23,8),%YMM26{%K1} |
(398) 0x4213c2 VRNDSCALEPD $0xb,%YMM22,%YMM27 |
(398) 0x4213c9 VSUBPD %YMM27,%YMM22,%YMM22 |
(398) 0x4213cf VXORPD %XMM27,%XMM27,%XMM27 |
(398) 0x4213d5 KXNORW %K0,%K0,%K1 |
(398) 0x4213d9 VGATHERDPD 0x18(%R11,%XMM23,8),%YMM27{%K1} |
(398) 0x4213e1 VMOVAPD %YMM22,%YMM23 |
(398) 0x4213e7 VFMADD213PD %YMM2,%YMM4,%YMM23 |
(398) 0x4213ed VFMADD213PD %YMM6,%YMM22,%YMM23 |
(398) 0x4213f3 VFMADD213PD %YMM5,%YMM22,%YMM23 |
(398) 0x4213f9 VFMADD213PD %YMM21,%YMM24,%YMM23 |
(398) 0x4213ff VMOVAPD %YMM22,%YMM21 |
(398) 0x421405 VFMADD213PD %YMM11,%YMM17,%YMM21 |
(398) 0x42140b VFMADD213PD %YMM12,%YMM22,%YMM21 |
(398) 0x421411 VFMADD213PD %YMM15,%YMM22,%YMM21 |
(398) 0x421417 VFMADD213PD %YMM23,%YMM25,%YMM21 |
(398) 0x42141d VMOVAPD %YMM22,%YMM23 |
(398) 0x421423 VFMADD213PD %YMM3,%YMM7,%YMM23 |
(398) 0x421429 VFMADD213PD %YMM10,%YMM22,%YMM23 |
(398) 0x42142f VFMADD213PD %YMM9,%YMM22,%YMM23 |
(398) 0x421435 VFMADD213PD %YMM21,%YMM26,%YMM23 |
(398) 0x42143b VMOVAPD %YMM22,%YMM21 |
(398) 0x421441 VFMADD213PD %YMM8,%YMM13,%YMM21 |
(398) 0x421447 VFMADD213PD %YMM16,%YMM22,%YMM21 |
(398) 0x42144d VFMADD213PD %YMM18,%YMM22,%YMM21 |
(398) 0x421453 VFMADD213PD %YMM23,%YMM27,%YMM21 |
(398) 0x421459 ADD $0x4,%RSI |
(398) 0x42145d CMP %R9,%RSI |
(398) 0x421460 JB 421380 |
(397) 0x421466 VEXTRACTF32X4 $0x1,%YMM21,%XMM22 |
(397) 0x42146d VADDPD %XMM22,%XMM21,%XMM21 |
(397) 0x421473 VSHUFPD $0x1,%XMM21,%XMM21,%XMM22 |
(397) 0x42147a VADDSD %XMM22,%XMM21,%XMM21 |
(397) 0x421480 CMP %R10,%R9 |
(397) 0x421483 JE 4210c6 |
(397) 0x421489 JMP 4214e4 |
(397) 0x421490 XOR %R10D,%R10D |
(397) 0x421493 XOR %R11D,%R11D |
(397) 0x421496 MOV %R14D,%ESI |
(397) 0x421499 SUB %R13D,%ESI |
(397) 0x42149c MOV 0x20(%RSP),%RAX |
(397) 0x4214a1 LEA (%RAX,%R12,8),%RBX |
(397) 0x4214a5 JMP 4214bc |
(399) 0x4214b0 INC %R11 |
(399) 0x4214b3 CMP %R11,%R15 |
(399) 0x4214b6 JE 421273 |
(399) 0x4214bc VMOVSD (%RBX,%R11,8),%XMM3 |
(399) 0x4214c2 VUCOMISD %XMM3,%XMM2 |
(399) 0x4214c6 JBE 4214b0 |
(399) 0x4214c8 CMP %R11D,%ESI |
(399) 0x4214cb JE 4214b0 |
(399) 0x4214cd MOVSXD %R10D,%R10 |
(399) 0x4214d0 VMOVSD %XMM3,(%RDX,%R10,8) |
(399) 0x4214d6 INC %R10D |
(399) 0x4214d9 JMP 4214b0 |
(397) 0x4214db VXORPD %XMM21,%XMM21,%XMM21 |
(397) 0x4214e1 XOR %R9D,%R9D |
(397) 0x4214e4 VPBROADCASTQ %R9,%YMM22 |
(397) 0x4214ea VPSUBQ %YMM22,%YMM19,%YMM19 |
(397) 0x4214f0 VPCMPNLEUQ %YMM28,%YMM19,%K1 |
(397) 0x4214f7 VMOVUPD (%RDX,%R9,8),%YMM19{%K1}{z} |
(397) 0x4214fe VMOVUPD 0x80(%RSP),%YMM22 |
(397) 0x421506 VMOVAPD %YMM19,%YMM22{%K1} |
(397) 0x42150c VMOVUPD %YMM22,0x80(%RSP) |
(397) 0x421514 VMULPD %YMM22,%YMM20,%YMM19 |
(397) 0x42151a VCVTTPD2DQ %YMM19,%XMM20 |
(397) 0x421520 VPMOVSXDQ %XMM20,%YMM20 |
(397) 0x421526 VPSLLQ $0x3,%YMM20,%YMM20 |
(397) 0x42152d VPADDQ %YMM20,%YMM14,%YMM14 |
(397) 0x421533 VPXORD %XMM20,%XMM20,%XMM20 |
(397) 0x421539 KMOVQ %K1,%K2 |
(397) 0x42153e VGATHERQPD (,%YMM14,1),%YMM20{%K2} |
(397) 0x421549 VRNDSCALEPD $0xb,%YMM19,%YMM22 |
(397) 0x421550 VXORPD %XMM23,%XMM23,%XMM23 |
(397) 0x421556 KMOVQ %K1,%K2 |
(397) 0x42155b VGATHERQPD 0x8(,%YMM14,1),%YMM23{%K2} |
(397) 0x421566 VSUBPD %YMM22,%YMM19,%YMM19 |
(397) 0x42156c VMOVUPD 0xa0(%RSP),%YMM22 |
(397) 0x421574 VMOVAPD %YMM20,%YMM22{%K1} |
(397) 0x42157a VMOVUPD 0xc0(%RSP),%YMM20 |
(397) 0x421582 VMOVAPD %YMM23,%YMM20{%K1} |
(397) 0x421588 VFMADD231PD %YMM17,%YMM19,%YMM11 |
(397) 0x42158e VXORPD %XMM17,%XMM17,%XMM17 |
(397) 0x421594 KMOVQ %K1,%K2 |
(397) 0x421599 VGATHERQPD 0x10(,%YMM14,1),%YMM17{%K2} |
(397) 0x4215a4 VFMADD213PD %YMM12,%YMM19,%YMM11 |
(397) 0x4215aa VFMADD213PD %YMM15,%YMM19,%YMM11 |
(397) 0x4215b0 VMOVUPD %YMM20,0xc0(%RSP) |
(397) 0x4215b8 VMULPD %YMM20,%YMM11,%YMM11 |
(397) 0x4215be VMOVUPD 0xe0(%RSP),%YMM15 |
(397) 0x4215c7 VMOVAPD %YMM17,%YMM15{%K1} |
(397) 0x4215cd VXORPD %XMM12,%XMM12,%XMM12 |
(397) 0x4215d2 KMOVQ %K1,%K2 |
(397) 0x4215d7 VGATHERQPD 0x18(,%YMM14,1),%YMM12{%K2} |
(397) 0x4215e2 VFMADD231PD %YMM13,%YMM19,%YMM8 |
(397) 0x4215e8 VFMADD213PD %YMM16,%YMM19,%YMM8 |
(397) 0x4215ee VFMADD213PD %YMM18,%YMM19,%YMM8 |
(397) 0x4215f4 VMOVAPD %YMM12,%YMM31{%K1} |
(397) 0x4215fa VFMADD231PD %YMM4,%YMM19,%YMM2 |
(397) 0x421600 VFMADD213PD %YMM6,%YMM19,%YMM2 |
(397) 0x421606 VFMADD213PD %YMM5,%YMM19,%YMM2 |
(397) 0x42160c VMOVUPD %YMM22,0xa0(%RSP) |
(397) 0x421614 VFMADD213PD %YMM11,%YMM22,%YMM2 |
(397) 0x42161a VFMADD231PD %YMM7,%YMM19,%YMM3 |
(397) 0x421620 VFMADD213PD %YMM10,%YMM19,%YMM3 |
(397) 0x421626 VFMADD213PD %YMM9,%YMM19,%YMM3 |
(397) 0x42162c VMOVUPD %YMM15,0xe0(%RSP) |
(397) 0x421635 VFMADD213PD %YMM2,%YMM15,%YMM3 |
(397) 0x42163a VFMADD231PD %YMM8,%YMM31,%YMM3{%K1}{z} |
(397) 0x421640 VEXTRACTF128 $0x1,%YMM3,%XMM2 |
(397) 0x421646 VADDPD %XMM2,%XMM3,%XMM2 |
(397) 0x42164a VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
(397) 0x42164f VADDSD %XMM3,%XMM2,%XMM2 |
(397) 0x421653 VADDSD %XMM2,%XMM21,%XMM21 |
(397) 0x421659 JMP 4210c6 |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/optional: 433 - 950 |
-------------------------------------------------------------------------------- |
433: { return static_cast<const _Dp*>(this)->_M_payload._M_engaged; } |
[...] |
950: if (this->_M_is_engaged()) |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/refwrap.h: 338 - 338 |
-------------------------------------------------------------------------------- |
338: { return *_M_data; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.60 |
CQA speedup if FP arith vectorized | 2.07 |
CQA speedup if fully vectorized | 10.27 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.83 |
CQA cycles if no scalar integer | 4.17 |
CQA cycles if FP arith vectorized | 5.24 |
CQA cycles if fully vectorized | 1.05 |
Front-end cycles | 10.83 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 9.00 |
P2 cycles | 9.00 |
P3 cycles | 5.50 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.50 |
P7 cycles | 5.50 |
P8 cycles | 5.50 |
P9 cycles | 2.40 |
P10 cycles | 9.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 11.57 |
Stall cycles (UFS) | 1.16 |
Nb insns | 61.00 |
Nb uops | 65.00 |
Nb loads | 27.00 |
Nb stores | 9.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 38.12 |
Bytes prefetched | 0.00 |
Bytes loaded | 317.00 |
Bytes stored | 96.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 35.71 |
Vectorization ratio load | 50.00 |
Vectorization ratio store | 11.11 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 21.04 |
Vector-efficiency ratio load | 28.91 |
Vector-efficiency ratio store | 16.67 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.60 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.60 |
CQA speedup if FP arith vectorized | 2.07 |
CQA speedup if fully vectorized | 10.27 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.83 |
CQA cycles if no scalar integer | 4.17 |
CQA cycles if FP arith vectorized | 5.24 |
CQA cycles if fully vectorized | 1.05 |
Front-end cycles | 10.83 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 9.00 |
P2 cycles | 9.00 |
P3 cycles | 5.50 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 5.50 |
P7 cycles | 5.50 |
P8 cycles | 5.50 |
P9 cycles | 2.40 |
P10 cycles | 9.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 11.57 |
Stall cycles (UFS) | 1.16 |
Nb insns | 61.00 |
Nb uops | 65.00 |
Nb loads | 27.00 |
Nb stores | 9.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 38.12 |
Bytes prefetched | 0.00 |
Bytes loaded | 317.00 |
Bytes stored | 96.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 35.71 |
Vectorization ratio load | 50.00 |
Vectorization ratio store | 11.11 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 21.04 |
Vector-efficiency ratio load | 28.91 |
Vector-efficiency ratio store | 16.67 |
Vector-efficiency ratio mul | 6.25 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.60 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 61 |
nb uops | 65 |
loop length | 313 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 5 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 9.00 | 9.00 | 5.50 | 2.30 | 2.40 | 5.50 | 5.50 | 5.50 | 2.40 | 9.00 |
cycles | 2.40 | 2.50 | 9.00 | 9.00 | 5.50 | 2.30 | 2.40 | 5.50 | 5.50 | 5.50 | 2.40 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.57 |
Stall cycles | 1.16 |
LM full (events) | 3.65 |
Front-end | 10.83 |
Dispatch | 9.00 |
Overall L1 | 10.83 |
all | 27% |
load | 50% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 33% |
all | 50% |
load | 50% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 35% |
load | 50% |
store | 11% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 18% |
load | 27% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 26% |
load | 31% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 21% |
load | 28% |
store | 16% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x60(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD 0x38(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d6ad0 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42165e <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x72e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42166d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x73d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R8),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 45f8f0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 420f80 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R9D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0xd5335(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0xd534b(%RIP),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0xd5361(%RIP),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4210dc <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 61 |
nb uops | 65 |
loop length | 313 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 5 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 9.00 | 9.00 | 5.50 | 2.30 | 2.40 | 5.50 | 5.50 | 5.50 | 2.40 | 9.00 |
cycles | 2.40 | 2.50 | 9.00 | 9.00 | 5.50 | 2.30 | 2.40 | 5.50 | 5.50 | 5.50 | 2.40 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.57 |
Stall cycles | 1.16 |
LM full (events) | 3.65 |
Front-end | 10.83 |
Dispatch | 9.00 |
Overall L1 | 10.83 |
all | 27% |
load | 50% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 33% |
all | 50% |
load | 50% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 35% |
load | 50% |
store | 11% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 18% |
load | 27% |
store | 12% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 26% |
load | 31% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 21% |
load | 28% |
store | 16% |
mul | 6% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x60(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD %YMM31,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD 0x38(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM0,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d6ad0 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42165e <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x72e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42166d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x73d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R14,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R8),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 45f8f0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 420f80 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RDX,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%R14,4),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R9D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
DEC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD %R14D,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQU64 0xd5335(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0xd534b(%RIP),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU64 0xd5361(%RIP),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x60(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4210dc <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |