Function: _ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i | Module: libqmcwfs.so | Source: TinyVector.h:62-62 [...] | Coverage: 0.12% |
---|
Function: _ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i | Module: libqmcwfs.so | Source: TinyVector.h:62-62 [...] | Coverage: 0.12% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 149 - 159 |
-------------------------------------------------------------------------------- |
149: inline TinyVector<T, D> dot(const T* a, const TinyVector<T, D>* b, int n) |
[...] |
155: for (int i = 0; i < n; i++) |
156: res += a[i] * b[i]; |
157: #endif |
158: return res; |
159: } |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/home/eoseret/qaas_runs_CPU_9468/171-143-7755/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
94: (const_cast<T1&>(a) += b); |
0x6a8c0 VXORPD %XMM0,%XMM0,%XMM0 |
0x6a8c4 MOV %RDX,%R8 |
0x6a8c7 MOVQ $0,0x10(%RDI) |
0x6a8cf MOV %ECX,%EDX |
0x6a8d1 VMOVUPD %XMM0,(%RDI) |
0x6a8d5 TEST %ECX,%ECX |
0x6a8d7 JLE 6ace9 |
0x6a8dd LEA -0x1(%RCX),%EAX |
0x6a8e0 CMP $0x2,%EAX |
0x6a8e3 JBE 6acf2 |
0x6a8e9 SHR $0x2,%ECX |
0x6a8ec VXORPD %XMM4,%XMM4,%XMM4 |
0x6a8f0 VMOVDQA 0x1dac8(%RIP),%YMM10 |
0x6a8f8 VMOVDQA 0x1dae0(%RIP),%YMM9 |
0x6a900 SAL $0x5,%RCX |
0x6a904 VMOVDQA 0x1daf4(%RIP),%YMM8 |
0x6a90c VMOVDQA 0x1db0c(%RIP),%YMM7 |
0x6a914 MOV %RSI,%R9 |
0x6a917 LEA (%RCX,%RSI,1),%R10 |
0x6a91b SUB $0x20,%RCX |
0x6a91f VMOVDQA 0x1d879(%RIP),%YMM6 |
0x6a927 VMOVDQA 0x1d891(%RIP),%YMM5 |
0x6a92f SHR $0x5,%RCX |
0x6a933 VMOVAPD %YMM4,%YMM3 |
0x6a937 VMOVAPD %YMM4,%YMM15 |
0x6a93b MOV %R8,%R11 |
0x6a93e INC %RCX |
0x6a941 AND $0x3,%ECX |
0x6a944 JE 6aa70 |
0x6a94a CMP $0x1,%RCX |
0x6a94e JE 6aa0d |
0x6a954 CMP $0x2,%RCX |
0x6a958 JE 6a9b2 |
0x6a95a VMOVUPD (%R8),%YMM15 |
0x6a95f VMOVUPD 0x20(%R8),%YMM12 |
0x6a965 LEA 0x20(%RSI),%R9 |
0x6a969 LEA 0x60(%R8),%R11 |
0x6a96d VMOVUPD 0x40(%R8),%YMM11 |
0x6a973 VMOVUPD (%RSI),%YMM2 |
0x6a977 VMOVAPD %YMM15,%YMM3 |
0x6a97b VMOVAPD %YMM15,%YMM1 |
0x6a97f VPERMT2PD %YMM12,%YMM6,%YMM15 |
0x6a985 VPERMT2PD %YMM12,%YMM10,%YMM3 |
0x6a98b VPERMT2PD %YMM12,%YMM8,%YMM1 |
0x6a991 VPERMT2PD %YMM11,%YMM5,%YMM15 |
0x6a997 VPERMT2PD %YMM11,%YMM9,%YMM3 |
0x6a99d VFMADD132PD %YMM2,%YMM4,%YMM15 |
0x6a9a2 VPERMT2PD %YMM11,%YMM7,%YMM1 |
0x6a9a8 VFMADD132PD %YMM2,%YMM4,%YMM3 |
0x6a9ad VFMADD231PD %YMM1,%YMM2,%YMM4 |
0x6a9b2 VMOVUPD (%R11),%YMM11 |
0x6a9b7 VMOVUPD 0x20(%R11),%YMM14 |
0x6a9bd ADD $0x20,%R9 |
0x6a9c1 ADD $0x60,%R11 |
0x6a9c5 VMOVUPD -0x20(%R11),%YMM13 |
0x6a9cb VMOVUPD -0x20(%R9),%YMM0 |
0x6a9d1 VMOVAPD %YMM11,%YMM12 |
0x6a9d6 VMOVAPD %YMM11,%YMM2 |
0x6a9da VPERMT2PD %YMM14,%YMM6,%YMM11 |
0x6a9e0 VPERMT2PD %YMM14,%YMM10,%YMM12 |
0x6a9e6 VPERMT2PD %YMM14,%YMM8,%YMM2 |
0x6a9ec VPERMT2PD %YMM13,%YMM5,%YMM11 |
0x6a9f2 VPERMT2PD %YMM13,%YMM9,%YMM12 |
0x6a9f8 VFMADD231PD %YMM11,%YMM0,%YMM15 |
0x6a9fd VPERMT2PD %YMM13,%YMM7,%YMM2 |
0x6aa03 VFMADD231PD %YMM12,%YMM0,%YMM3 |
0x6aa08 VFMADD231PD %YMM2,%YMM0,%YMM4 |
0x6aa0d VMOVUPD (%R11),%YMM1 |
0x6aa12 VMOVUPD 0x20(%R11),%YMM11 |
0x6aa18 ADD $0x20,%R9 |
0x6aa1c ADD $0x60,%R11 |
0x6aa20 VMOVUPD -0x20(%R11),%YMM14 |
0x6aa26 VMOVUPD -0x20(%R9),%YMM0 |
0x6aa2c VMOVAPD %YMM1,%YMM13 |
0x6aa30 VMOVAPD %YMM1,%YMM12 |
0x6aa34 VPERMT2PD %YMM11,%YMM6,%YMM1 |
0x6aa3a VPERMT2PD %YMM11,%YMM10,%YMM13 |
0x6aa40 VPERMT2PD %YMM11,%YMM8,%YMM12 |
0x6aa46 VPERMT2PD %YMM14,%YMM5,%YMM1 |
0x6aa4c VPERMT2PD %YMM14,%YMM9,%YMM13 |
0x6aa52 VFMADD231PD %YMM1,%YMM0,%YMM15 |
0x6aa57 VPERMT2PD %YMM14,%YMM7,%YMM12 |
0x6aa5d VFMADD231PD %YMM13,%YMM0,%YMM3 |
0x6aa62 VFMADD231PD %YMM12,%YMM0,%YMM4 |
0x6aa67 CMP %R9,%R10 |
0x6aa6a JE 6abe6 |
(672) 0x6aa70 VMOVUPD (%R11),%YMM1 |
(672) 0x6aa75 VMOVUPD 0x20(%R11),%YMM0 |
(672) 0x6aa7b SUB $-0x80,%R9 |
(672) 0x6aa7f ADD $0x180,%R11 |
(672) 0x6aa86 VMOVUPD -0x140(%R11),%YMM14 |
(672) 0x6aa8f VMOVUPD -0x80(%R9),%YMM2 |
(672) 0x6aa95 VMOVAPD %YMM1,%YMM11 |
(672) 0x6aa99 VMOVAPD %YMM1,%YMM13 |
(672) 0x6aa9d VPERMT2PD %YMM0,%YMM6,%YMM1 |
(672) 0x6aaa3 VPERMT2PD %YMM0,%YMM10,%YMM11 |
(672) 0x6aaa9 VPERMT2PD %YMM0,%YMM8,%YMM13 |
(672) 0x6aaaf VPERMT2PD %YMM14,%YMM5,%YMM1 |
(672) 0x6aab5 VMOVUPD -0xe0(%R11),%YMM0 |
(672) 0x6aabe VPERMT2PD %YMM14,%YMM7,%YMM13 |
(672) 0x6aac4 VFMADD132PD %YMM2,%YMM15,%YMM1 |
(672) 0x6aac9 VPERMT2PD %YMM14,%YMM9,%YMM11 |
(672) 0x6aacf VMOVUPD -0x100(%R11),%YMM15 |
(672) 0x6aad8 VFMADD132PD %YMM2,%YMM3,%YMM11 |
(672) 0x6aadd VFMADD231PD %YMM13,%YMM2,%YMM4 |
(672) 0x6aae2 VMOVUPD -0x120(%R11),%YMM2 |
(672) 0x6aaeb VMOVUPD -0x60(%R9),%YMM3 |
(672) 0x6aaf1 VMOVAPD %YMM2,%YMM12 |
(672) 0x6aaf5 VMOVAPD %YMM2,%YMM14 |
(672) 0x6aaf9 VPERMT2PD %YMM15,%YMM6,%YMM2 |
(672) 0x6aaff VPERMT2PD %YMM15,%YMM10,%YMM12 |
(672) 0x6ab05 VPERMT2PD %YMM15,%YMM8,%YMM14 |
(672) 0x6ab0b VPERMT2PD %YMM0,%YMM5,%YMM2 |
(672) 0x6ab11 VMOVUPD -0xa0(%R11),%YMM15 |
(672) 0x6ab1a VPERMT2PD %YMM0,%YMM7,%YMM14 |
(672) 0x6ab20 VFMADD132PD %YMM3,%YMM1,%YMM2 |
(672) 0x6ab25 VPERMT2PD %YMM0,%YMM9,%YMM12 |
(672) 0x6ab2b VMOVUPD -0xc0(%R11),%YMM1 |
(672) 0x6ab34 VFMADD132PD %YMM3,%YMM11,%YMM12 |
(672) 0x6ab39 VFMADD231PD %YMM14,%YMM3,%YMM4 |
(672) 0x6ab3e VMOVUPD -0x80(%R11),%YMM0 |
(672) 0x6ab44 VMOVUPD -0x40(%R9),%YMM3 |
(672) 0x6ab4a VMOVAPD %YMM1,%YMM11 |
(672) 0x6ab4e VMOVAPD %YMM1,%YMM13 |
(672) 0x6ab52 VPERMT2PD %YMM15,%YMM6,%YMM1 |
(672) 0x6ab58 VMOVUPD -0x40(%R11),%YMM14 |
(672) 0x6ab5e VPERMT2PD %YMM15,%YMM10,%YMM11 |
(672) 0x6ab64 VPERMT2PD %YMM15,%YMM8,%YMM13 |
(672) 0x6ab6a VPERMT2PD %YMM0,%YMM5,%YMM1 |
(672) 0x6ab70 VMOVUPD -0x60(%R11),%YMM15 |
(672) 0x6ab76 VPERMT2PD %YMM0,%YMM9,%YMM11 |
(672) 0x6ab7c VPERMT2PD %YMM0,%YMM7,%YMM13 |
(672) 0x6ab82 VMOVUPD -0x20(%R11),%YMM0 |
(672) 0x6ab88 VFMADD132PD %YMM3,%YMM2,%YMM1 |
(672) 0x6ab8d VFMADD132PD %YMM3,%YMM12,%YMM11 |
(672) 0x6ab92 VFMADD132PD %YMM13,%YMM4,%YMM3 |
(672) 0x6ab97 VMOVAPD %YMM15,%YMM12 |
(672) 0x6ab9c VMOVUPD -0x20(%R9),%YMM4 |
(672) 0x6aba2 VPERMT2PD %YMM14,%YMM8,%YMM12 |
(672) 0x6aba8 VPERMT2PD %YMM0,%YMM7,%YMM12 |
(672) 0x6abae VMOVAPD %YMM3,%YMM2 |
(672) 0x6abb2 VMOVAPD %YMM15,%YMM3 |
(672) 0x6abb6 VPERMT2PD %YMM14,%YMM6,%YMM15 |
(672) 0x6abbc VPERMT2PD %YMM14,%YMM10,%YMM3 |
(672) 0x6abc2 VPERMT2PD %YMM0,%YMM5,%YMM15 |
(672) 0x6abc8 VPERMT2PD %YMM0,%YMM9,%YMM3 |
(672) 0x6abce VFMADD132PD %YMM4,%YMM1,%YMM15 |
(672) 0x6abd3 VFMADD132PD %YMM4,%YMM11,%YMM3 |
(672) 0x6abd8 VFMADD132PD %YMM12,%YMM2,%YMM4 |
(672) 0x6abdd CMP %R9,%R10 |
(672) 0x6abe0 JNE 6aa70 |
0x6abe6 VEXTRACTF64X2 $0x1,%YMM3,%XMM7 |
0x6abed VEXTRACTF64X2 $0x1,%YMM15,%XMM13 |
0x6abf4 VEXTRACTF64X2 $0x1,%YMM4,%XMM10 |
0x6abfb MOV %EDX,%EAX |
0x6abfd VADDPD %XMM15,%XMM13,%XMM14 |
0x6ac02 VADDPD %XMM3,%XMM7,%XMM6 |
0x6ac06 AND $-0x4,%EAX |
0x6ac09 VADDPD %XMM4,%XMM10,%XMM9 |
0x6ac0d VADDPD %XMM13,%XMM15,%XMM15 |
0x6ac12 MOV %EAX,%ECX |
0x6ac14 VADDPD %XMM7,%XMM3,%XMM12 |
0x6ac18 VADDPD %XMM10,%XMM4,%XMM4 |
0x6ac1d VUNPCKHPD %XMM6,%XMM6,%XMM5 |
0x6ac21 VUNPCKHPD %XMM14,%XMM14,%XMM2 |
0x6ac26 VADDPD %XMM6,%XMM5,%XMM11 |
0x6ac2a VADDPD %XMM14,%XMM2,%XMM0 |
0x6ac2f VUNPCKHPD %XMM9,%XMM9,%XMM8 |
0x6ac34 VADDPD %XMM9,%XMM8,%XMM9 |
0x6ac39 VUNPCKLPD %XMM11,%XMM0,%XMM8 |
0x6ac3e CMP %EAX,%EDX |
0x6ac40 JE 6aced |
0x6ac46 VZEROUPPER |
0x6ac49 SUB %ECX,%EDX |
0x6ac4b CMP $0x1,%EDX |
0x6ac4e JE 6acc1 |
0x6ac50 LEA (%RCX,%RCX,2),%R10 |
0x6ac54 VMOVUPD (%RSI,%RCX,8),%XMM3 |
0x6ac59 LEA (%R8,%R10,8),%R9 |
0x6ac5d VMOVUPD (%R9),%XMM10 |
0x6ac62 VMOVUPD 0x10(%R9),%XMM9 |
0x6ac68 VMOVUPD 0x20(%R9),%XMM8 |
0x6ac6e VPERMILPD $0x1,%XMM10,%XMM1 |
0x6ac74 VBLENDPD $0x2,%XMM9,%XMM10,%XMM11 |
0x6ac7a VUNPCKLPD %XMM10,%XMM9,%XMM6 |
0x6ac7f VUNPCKLPD %XMM8,%XMM1,%XMM7 |
0x6ac84 VFMADD132PD %XMM3,%XMM15,%XMM11 |
0x6ac89 VBLENDPD $0x2,%XMM8,%XMM6,%XMM5 |
0x6ac8f VFMADD132PD %XMM3,%XMM12,%XMM7 |
0x6ac94 VFMADD132PD %XMM5,%XMM4,%XMM3 |
0x6ac99 VUNPCKHPD %XMM11,%XMM11,%XMM2 |
0x6ac9e VUNPCKHPD %XMM7,%XMM7,%XMM14 |
0x6aca2 VADDPD %XMM11,%XMM2,%XMM15 |
0x6aca7 VUNPCKHPD %XMM3,%XMM3,%XMM13 |
0x6acab VADDPD %XMM7,%XMM14,%XMM0 |
0x6acaf VADDPD %XMM3,%XMM13,%XMM9 |
0x6acb3 VUNPCKLPD %XMM0,%XMM15,%XMM8 |
0x6acb7 TEST $0x1,%DL |
0x6acba JE 6ace0 |
0x6acbc AND $-0x2,%EDX |
0x6acbf ADD %EDX,%EAX |
0x6acc1 CLTQ |
0x6acc3 VMOVSD (%RSI,%RAX,8),%XMM4 |
0x6acc8 LEA (%RAX,%RAX,2),%RDX |
0x6accc VMOVDDUP (%RSI,%RAX,8),%XMM12 |
0x6acd1 LEA (%R8,%RDX,8),%R8 |
0x6acd5 VFMADD231PD (%R8),%XMM12,%XMM8 |
0x6acda VFMADD231SD 0x10(%R8),%XMM4,%XMM9 |
0x6ace0 VMOVUPD %XMM8,(%RDI) |
0x6ace4 VMOVSD %XMM9,0x10(%RDI) |
0x6ace9 MOV %RDI,%RAX |
0x6acec RET |
0x6aced VZEROUPPER |
0x6acf0 JMP 6ace0 |
0x6acf2 VXORPD %XMM8,%XMM8,%XMM8 |
0x6acf7 XOR %ECX,%ECX |
0x6acf9 VXORPD %XMM9,%XMM9,%XMM9 |
0x6acfe XOR %EAX,%EAX |
0x6ad00 VMOVAPD %XMM8,%XMM4 |
0x6ad04 VMOVAPD %XMM8,%XMM12 |
0x6ad09 VMOVAPD %XMM8,%XMM15 |
0x6ad0e JMP 6ac49 |
0x6ad13 NOP |
0x6ad14 NOPW %CS:(%RAX,%RAX,1) |
0x6ad1f NOP |
Path / |
Source file and lines | TinyVector.h:62-62 |
Module | libqmcwfs.so |
nb instructions | 160 |
nb uops | 162 |
loop length | 746 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 16 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 27.00 cycles |
front end | 27.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 14.00 | 19.50 | 9.00 | 9.00 | 2.00 | 32.00 | 11.30 | 2.00 | 2.00 | 2.00 | 11.20 | 9.00 |
cycles | 14.00 | 19.50 | 9.00 | 9.00 | 2.00 | 32.00 | 11.30 | 2.00 | 2.00 | 2.00 | 11.20 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 36.30 |
Stall cycles | 8.80 |
ROB full (events) | 12.25 |
Front-end | 27.00 |
Dispatch | 32.00 |
Overall L1 | 32.00 |
all | 47% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 85% |
load | 85% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 92% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 79% |
all | 79% |
load | 88% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 92% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 69% |
all | 25% |
load | 50% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 35% |
load | 38% |
store | 20% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | 40% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 35% |
all | 33% |
load | 40% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | 40% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD %XMM0,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 6ace9 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x429> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 6acf2 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SHR $0x2,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x1dac8(%RIP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1dae0(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SAL $0x5,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x1daf4(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1db0c(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RSI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RCX,%RSI,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1d879(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1d891(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SHR $0x5,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVAPD %YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM4,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R8,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 6aa70 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x1b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6aa0d <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x14d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6a9b2 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%R8),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R8),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA 0x20(%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x40(%R8),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RSI),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM15,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM12,%YMM6,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM12,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM12,%YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM5,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM2,%YMM4,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM11,%YMM7,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM2,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM2,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R11),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R11),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x60,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD -0x20(%R11),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD -0x20(%R9),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM11,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM11,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM14,%YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM10,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM13,%YMM5,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM13,%YMM9,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM11,%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM13,%YMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM12,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM2,%YMM0,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R11),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R11),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x60,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD -0x20(%R11),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD -0x20(%R9),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM1,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM1,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM11,%YMM6,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM10,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM8,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM5,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM9,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM1,%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM14,%YMM7,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM13,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM0,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6abe6 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x326> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VEXTRACTF64X2 $0x1,%YMM15,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VEXTRACTF64X2 $0x1,%YMM4,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EDX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM15,%XMM13,%XMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM3,%XMM7,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VADDPD %XMM4,%XMM10,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM13,%XMM15,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM7,%XMM3,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM10,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM6,%XMM6,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKHPD %XMM14,%XMM14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM6,%XMM5,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM14,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM8,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKLPD %XMM11,%XMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6aced <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x42d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6acc1 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x401> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RCX,%RCX,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RSI,%RCX,8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA (%R8,%R10,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R9),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%R9),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R9),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPERMILPD $0x1,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBLENDPD $0x2,%XMM9,%XMM10,%XMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM10,%XMM9,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKLPD %XMM8,%XMM1,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD132PD %XMM3,%XMM15,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDPD $0x2,%XMM8,%XMM6,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VFMADD132PD %XMM3,%XMM12,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %XMM5,%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM11,%XMM11,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKHPD %XMM7,%XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM11,%XMM2,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM3,%XMM3,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM7,%XMM14,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM3,%XMM13,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKLPD %XMM0,%XMM15,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
TEST $0x1,%DL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 6ace0 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x420> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD (%RSI,%RAX,8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP (%RSI,%RAX,8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD (%R8),%XMM12,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x10(%R8),%XMM4,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD %XMM8,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM9,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 6ace0 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x420> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM8,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %XMM8,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %XMM8,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
JMP 6ac49 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x389> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | TinyVector.h:62-62 |
Module | libqmcwfs.so |
nb instructions | 160 |
nb uops | 162 |
loop length | 746 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 16 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 27.00 cycles |
front end | 27.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 14.00 | 19.50 | 9.00 | 9.00 | 2.00 | 32.00 | 11.30 | 2.00 | 2.00 | 2.00 | 11.20 | 9.00 |
cycles | 14.00 | 19.50 | 9.00 | 9.00 | 2.00 | 32.00 | 11.30 | 2.00 | 2.00 | 2.00 | 11.20 | 9.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 36.30 |
Stall cycles | 8.80 |
ROB full (events) | 12.25 |
Front-end | 27.00 |
Dispatch | 32.00 |
Overall L1 | 32.00 |
all | 47% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 85% |
load | 85% |
store | 66% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 92% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 79% |
all | 79% |
load | 88% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 92% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 69% |
all | 25% |
load | 50% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 35% |
load | 38% |
store | 20% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | 40% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 35% |
all | 33% |
load | 40% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | 40% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD %XMM0,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 6ace9 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x429> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 6acf2 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SHR $0x2,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA 0x1dac8(%RIP),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1dae0(%RIP),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SAL $0x5,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA 0x1daf4(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1db0c(%RIP),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %RSI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RCX,%RSI,1),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQA 0x1d879(%RIP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQA 0x1d891(%RIP),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
SHR $0x5,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVAPD %YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM4,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R8,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 6aa70 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x1b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6aa0d <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x14d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6a9b2 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0xf2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD (%R8),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R8),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA 0x20(%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD 0x40(%R8),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD (%RSI),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM15,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM12,%YMM6,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM12,%YMM10,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM12,%YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM5,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM2,%YMM4,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM11,%YMM7,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD132PD %YMM2,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM2,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R11),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R11),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x60,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD -0x20(%R11),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD -0x20(%R9),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM11,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM11,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM14,%YMM6,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM10,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM13,%YMM5,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM13,%YMM9,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM11,%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM13,%YMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM12,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM2,%YMM0,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R11),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R11),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x60,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVUPD -0x20(%R11),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD -0x20(%R9),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM1,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM1,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPERMT2PD %YMM11,%YMM6,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM10,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM11,%YMM8,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM5,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPERMT2PD %YMM14,%YMM9,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM1,%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMT2PD %YMM14,%YMM7,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VFMADD231PD %YMM13,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM0,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6abe6 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x326> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VEXTRACTF64X2 $0x1,%YMM15,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VEXTRACTF64X2 $0x1,%YMM4,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EDX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM15,%XMM13,%XMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM3,%XMM7,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VADDPD %XMM4,%XMM10,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM13,%XMM15,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM7,%XMM3,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM10,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM6,%XMM6,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKHPD %XMM14,%XMM14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM6,%XMM5,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM14,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM8,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKLPD %XMM11,%XMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6aced <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x42d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 6acc1 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x401> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RCX,%RCX,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RSI,%RCX,8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
LEA (%R8,%R10,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R9),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x10(%R9),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%R9),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPERMILPD $0x1,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBLENDPD $0x2,%XMM9,%XMM10,%XMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM10,%XMM9,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKLPD %XMM8,%XMM1,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD132PD %XMM3,%XMM15,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDPD $0x2,%XMM8,%XMM6,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VFMADD132PD %XMM3,%XMM12,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %XMM5,%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKHPD %XMM11,%XMM11,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VUNPCKHPD %XMM7,%XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM11,%XMM2,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM3,%XMM3,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM7,%XMM14,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %XMM3,%XMM13,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKLPD %XMM0,%XMM15,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
TEST $0x1,%DL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 6ace0 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x420> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD (%RSI,%RAX,8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP (%RSI,%RAX,8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD (%R8),%XMM12,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x10(%R8),%XMM4,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD %XMM8,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVSD %XMM9,0x10(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 6ace0 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x420> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM8,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %XMM8,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %XMM8,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
JMP 6ac49 <_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i+0x389> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN11qmcplusplus4simd3dotIdLj3EEENS_10TinyVectorIT_XT0_EEEPKS3_PKS4_i– | 0.12 | 0.12 |
○Loop 672 - inner_product.hpp:155-155 - libqmcwfs.so | 0.12 | 0.1 |