Loop Id: 326 | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage: 2.33% |
---|
Loop Id: 326 | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage: 2.33% |
---|
0x43c910 MOVQ %XMM10,%RSI |
0x43c915 MOVQ %XMM12,%R11 |
0x43c91a MOV %RSI,%RAX |
0x43c91d XOR %EDX,%EDX |
0x43c91f DIV %R11 |
0x43c922 MOVQ %RAX,%XMM1 |
0x43c927 PSHUFD $-0x12,%XMM10,%XMM2 |
0x43c92d MOVQ %XMM2,%RDI |
0x43c932 MOVDQA -0xb0(%RBP),%XMM2 [10] |
0x43c93a MOVQ %XMM2,%R10 |
0x43c93f MOV %RDI,%RAX |
0x43c942 XOR %EDX,%EDX |
0x43c944 DIV %R10 |
0x43c947 MOVQ %RAX,%XMM2 |
0x43c94c PUNPCKLQDQ %XMM2,%XMM1 |
0x43c950 PSHUFD $-0x18,%XMM1,%XMM1 |
0x43c955 PADDD -0x100(%RBP),%XMM1 [10] |
0x43c95d MOV %RSI,%RAX |
0x43c960 CQTO |
0x43c962 IDIV %R11 |
0x43c965 MOVQ %RDX,%XMM3 |
0x43c96a MOV %RDI,%RAX |
0x43c96d CQTO |
0x43c96f IDIV %R10 |
0x43c972 MOVQ %RDX,%XMM2 |
0x43c977 PUNPCKLQDQ %XMM2,%XMM3 |
0x43c97b PADDQ -0xf0(%RBP),%XMM3 [10] |
0x43c983 PSLLQ $0x20,%XMM3 |
0x43c988 PXOR %XMM15,%XMM15 |
0x43c98d PCMPGTD %XMM1,%XMM15 |
0x43c992 MOVDQA %XMM1,%XMM2 |
0x43c996 PSHUFD $-0x13,%XMM3,%XMM13 |
0x43c99c PUNPCKLDQ %XMM15,%XMM2 |
0x43c9a1 MOVDQA %XMM4,%XMM3 |
0x43c9a5 PMULUDQ %XMM2,%XMM3 |
0x43c9a9 MOVDQA %XMM13,%XMM5 |
0x43c9ae MOVDQA %XMM11,%XMM7 |
0x43c9b3 PMULUDQ %XMM2,%XMM7 |
0x43c9b7 PUNPCKLDQ 0x4de30(%RIP),%XMM15 [7] |
0x43c9c0 PSRAD $0x1f,%XMM5 |
0x43c9c5 MOVDQA %XMM4,%XMM14 |
0x43c9ca PMULUDQ %XMM15,%XMM14 |
0x43c9cf PADDQ %XMM7,%XMM14 |
0x43c9d4 PUNPCKLDQ %XMM5,%XMM13 |
0x43c9d9 PSLLQ $0x20,%XMM14 |
0x43c9df PADDQ %XMM13,%XMM3 |
0x43c9e4 MOVDQA %XMM0,%XMM5 |
0x43c9e8 PADDQ %XMM14,%XMM3 |
0x43c9ed PMULUDQ %XMM2,%XMM5 |
0x43c9f1 MOVDQA %XMM0,%XMM7 |
0x43c9f5 PSRLQ $0x20,%XMM7 |
0x43c9fa PSLLQ $0x3,%XMM3 |
0x43c9ff PMULUDQ %XMM2,%XMM7 |
0x43ca03 MOVDQA %XMM0,%XMM14 |
0x43ca08 PMULUDQ %XMM15,%XMM14 |
0x43ca0d PADDQ -0xe0(%RBP),%XMM3 [10] |
0x43ca15 PADDQ %XMM7,%XMM14 |
0x43ca1a PSLLQ $0x20,%XMM14 |
0x43ca20 PADDQ %XMM13,%XMM5 |
0x43ca25 MOVQ %XMM3,%RAX |
0x43ca2a PADDQ %XMM14,%XMM5 |
0x43ca2f PSUBD 0x4ecf9(%RIP),%XMM1 [7] |
0x43ca37 PXOR %XMM14,%XMM14 |
0x43ca3c PSHUFD $-0x12,%XMM3,%XMM7 |
0x43ca41 PCMPGTD %XMM1,%XMM14 |
0x43ca46 PUNPCKLDQ %XMM14,%XMM1 |
0x43ca4b MOVDQA %XMM6,%XMM3 |
0x43ca4f MOVQ %XMM7,%RDX |
0x43ca54 PMULUDQ %XMM1,%XMM3 |
0x43ca58 MOVDQA %XMM6,%XMM7 |
0x43ca5c PSRLQ $0x20,%XMM7 |
0x43ca61 PMULUDQ %XMM7,%XMM1 |
0x43ca65 PSHUFD $-0x2c,%XMM14,%XMM14 |
0x43ca6b PMULUDQ %XMM6,%XMM14 |
0x43ca70 PADDQ %XMM1,%XMM14 |
0x43ca75 PSLLQ $0x20,%XMM14 |
0x43ca7b PADDQ %XMM13,%XMM3 |
0x43ca80 PADDQ %XMM14,%XMM3 |
0x43ca85 MOVDQA %XMM6,%XMM1 |
0x43ca89 PMULUDQ %XMM2,%XMM7 |
0x43ca8d PMULUDQ %XMM15,%XMM1 |
0x43ca92 PADDQ %XMM7,%XMM1 |
0x43ca96 MOVDQA %XMM6,%XMM7 |
0x43ca9a PMULUDQ %XMM2,%XMM7 |
0x43ca9e PSLLQ $0x20,%XMM1 |
0x43caa3 PADDQ %XMM13,%XMM7 |
0x43caa8 PADDQ %XMM1,%XMM7 |
0x43caac PSLLQ $0x3,%XMM5 |
0x43cab1 PADDQ -0xd0(%RBP),%XMM5 [10] |
0x43cab9 MOVQ %XMM5,%RSI |
0x43cabe PSHUFD $-0x12,%XMM5,%XMM1 |
0x43cac3 MOVQ %XMM1,%RDI |
0x43cac8 PSLLQ $0x3,%XMM3 |
0x43cacd PADDQ %XMM8,%XMM3 |
0x43cad2 MOVQ %XMM3,%R10 |
0x43cad7 PSHUFD $-0x12,%XMM3,%XMM1 |
0x43cadc PSLLQ $0x3,%XMM7 |
0x43cae1 PADDQ %XMM8,%XMM7 |
0x43cae6 MOVQ %XMM7,%R11 |
0x43caeb MOVQ %XMM1,%R14 |
0x43caf0 PSHUFD $-0x12,%XMM7,%XMM1 |
0x43caf5 MOVDQA %XMM9,%XMM3 |
0x43cafa PMULUDQ %XMM2,%XMM3 |
0x43cafe MOVQ %XMM1,%R8 |
0x43cb03 MOVDQA %XMM9,%XMM1 |
0x43cb08 PSRLQ $0x20,%XMM1 |
0x43cb0d PMULUDQ %XMM2,%XMM1 |
0x43cb11 PMULUDQ %XMM9,%XMM15 |
0x43cb16 PADDQ %XMM1,%XMM15 |
0x43cb1b MOVSD (%RAX),%XMM1 [12] |
0x43cb1f PADDQ %XMM13,%XMM3 |
0x43cb24 MOVSD (%RDX),%XMM2 [5] |
0x43cb28 MOVSD %XMM1,(%RSI) [9] |
0x43cb2c MOVSD %XMM2,(%RDI) [3] |
0x43cb30 UNPCKLPD %XMM2,%XMM1 |
0x43cb34 MOVSD (%R10),%XMM2 [4] |
0x43cb39 MOVHPD (%R14),%XMM2 [2] |
0x43cb3e ADDPD %XMM1,%XMM2 |
0x43cb42 MOVSD (%R11),%XMM1 [11] |
0x43cb47 MOVHPD (%R8),%XMM1 [1] |
0x43cb4c SUBPD %XMM1,%XMM2 |
0x43cb50 PSLLQ $0x20,%XMM15 |
0x43cb56 PADDQ %XMM15,%XMM3 |
0x43cb5b PSLLQ $0x3,%XMM3 |
0x43cb60 PADDQ -0xc0(%RBP),%XMM3 [10] |
0x43cb68 MOVQ %XMM3,%RAX |
0x43cb6d PSHUFD $-0x12,%XMM3,%XMM1 |
0x43cb72 MOVQ %XMM1,%RDX |
0x43cb77 MOVLPD %XMM2,(%RAX) [6] |
0x43cb7b MOVHPD %XMM2,(%RDX) [8] |
0x43cb7f PADDQ 0x4ea28(%RIP),%XMM10 [7] |
0x43cb88 ADD $0x2,%R9 |
0x43cb8c CMP %R15,%R9 |
0x43cb8f JB 43c910 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 62 - 66 |
-------------------------------------------------------------------------------- |
62: #pragma omp parallel for simd collapse(2) |
63: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
64: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
65: post_vol(i, j) = volume(i, j); |
66: pre_vol(i, j) = post_vol(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.33 - 2.29 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 - 2.06 |
Bottlenecks | P8, P9, |
Function | _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47 |
Source | advec_mom.cpp:62-66,context.h:69-69 |
Source loop unroll info | unrolled by 2 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 2 |
CQA cycles | 28.00 - 48.00 |
CQA cycles if no scalar integer | 21.00 |
CQA cycles if FP arith vectorized | 28.00 - 48.00 |
CQA cycles if fully vectorized | 7.00 - 12.00 |
Front-end cycles | 23.33 |
DIV/SQRT cycles | 8.00 |
P0 cycles | 0.50 |
P1 cycles | 0.25 |
P2 cycles | 0.25 |
P3 cycles | 1.00 |
P4 cycles | 6.33 |
P5 cycles | 6.33 |
P6 cycles | 6.33 |
P7 cycles | 20.25 |
P8 cycles | 20.25 |
P9 cycles | 20.25 |
P10 cycles | 20.25 |
P11 cycles | 9.00 |
P12 cycles | 9.00 |
P13 cycles | 28.00 - 48.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 134.00 |
Nb uops | 140.00 |
Nb loads | 15.00 |
Nb stores | 4.00 |
Nb stack references | 6.00 |
FLOP/cycle | 0.14 - 0.08 |
Nb FLOP add-sub | 4.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 4.67 - 8.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 192.00 |
Bytes stored | 32.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 8.00 |
Vectorization ratio all | 70.40 |
Vectorization ratio load | 60.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 66.67 |
Vector-efficiency ratio all | 20.90 |
Vector-efficiency ratio load | 19.17 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 25.00 |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 20.11 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.33 - 2.29 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.20 - 2.06 |
Bottlenecks | P8, P9, |
Function | _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47 |
Source | advec_mom.cpp:62-66,context.h:69-69 |
Source loop unroll info | unrolled by 2 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 2 |
CQA cycles | 28.00 - 48.00 |
CQA cycles if no scalar integer | 21.00 |
CQA cycles if FP arith vectorized | 28.00 - 48.00 |
CQA cycles if fully vectorized | 7.00 - 12.00 |
Front-end cycles | 23.33 |
DIV/SQRT cycles | 8.00 |
P0 cycles | 0.50 |
P1 cycles | 0.25 |
P2 cycles | 0.25 |
P3 cycles | 1.00 |
P4 cycles | 6.33 |
P5 cycles | 6.33 |
P6 cycles | 6.33 |
P7 cycles | 20.25 |
P8 cycles | 20.25 |
P9 cycles | 20.25 |
P10 cycles | 20.25 |
P11 cycles | 9.00 |
P12 cycles | 9.00 |
P13 cycles | 28.00 - 48.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 134.00 |
Nb uops | 140.00 |
Nb loads | 15.00 |
Nb stores | 4.00 |
Nb stack references | 6.00 |
FLOP/cycle | 0.14 - 0.08 |
Nb FLOP add-sub | 4.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 4.67 - 8.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 192.00 |
Bytes stored | 32.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 8.00 |
Vectorization ratio all | 70.40 |
Vectorization ratio load | 60.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 66.67 |
Vector-efficiency ratio all | 20.90 |
Vector-efficiency ratio load | 19.17 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 25.00 |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 20.11 |
Path / |
Function | _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47 |
Source file and lines | advec_mom.cpp:62-66 |
Module | exec |
nb instructions | 134 |
nb uops | 140 |
loop length | 645 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 23.33 cycles |
front end | 23.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 0.50 | 0.25 | 0.25 | 1.00 | 6.33 | 6.33 | 6.33 | 20.25 | 20.25 | 20.25 | 20.25 | 9.00 | 9.00 |
cycles | 8.00 | 0.50 | 0.25 | 0.25 | 1.00 | 6.33 | 6.33 | 6.33 | 20.25 | 20.25 | 20.25 | 20.25 | 9.00 | 9.00 |
Cycles executing div or sqrt instructions | 28.00-48.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 23.33 |
Dispatch | 20.25 |
DIV/SQRT | 28.00-48.00 |
Data deps. | 1.00 |
Overall L1 | 28.00-48.00 |
all | 76% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 63% |
all | 15% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 70% |
load | 60% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 66% |
all | 21% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 25% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 19% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 20% |
load | 19% |
store | 12% |
mul | 25% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVQ %XMM10,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVQ %XMM12,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
MOVQ %RAX,%XMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PSHUFD $-0x12,%XMM10,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM2,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVDQA -0xb0(%RBP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVQ %XMM2,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %R10 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
MOVQ %RAX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PUNPCKLQDQ %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PSHUFD $-0x18,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PADDD -0x100(%RBP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
IDIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOVQ %RDX,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
IDIV %R10 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOVQ %RDX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PUNPCKLQDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PADDQ -0xf0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PSLLQ $0x20,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PXOR %XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PCMPGTD %XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSHUFD $-0x13,%XMM3,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PUNPCKLDQ %XMM15,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVDQA %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVDQA %XMM11,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PUNPCKLDQ 0x4de30(%RIP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
PSRAD $0x1f,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVDQA %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM15,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PUNPCKLDQ %XMM5,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PADDQ %XMM14,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PMULUDQ %XMM2,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM15,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ -0xe0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM3,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PADDQ %XMM14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSUBD 0x4ecf9(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PXOR %XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PSHUFD $-0x12,%XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PCMPGTD %XMM1,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PUNPCKLDQ %XMM14,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVDQA %XMM6,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ %XMM7,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PMULUDQ %XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM6,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PSHUFD $-0x2c,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PMULUDQ %XMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM1,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PADDQ %XMM14,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PMULUDQ %XMM15,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM6,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PSLLQ $0x20,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PADDQ %XMM1,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x3,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ -0xd0(%RBP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOVQ %XMM5,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM1,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM8,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM3,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PSLLQ $0x3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM7,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVQ %XMM1,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVDQA %XMM9,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVDQA %XMM9,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PMULUDQ %XMM9,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVSD (%RDX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSD %XMM1,(%RSI) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSD %XMM2,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
UNPCKLPD %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVSD (%R10),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVHPD (%R14),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
ADDPD %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVSD (%R11),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVHPD (%R8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
SUBPD %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
PSLLQ $0x20,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM15,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ -0xc0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOVQ %XMM3,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM1,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVLPD %XMM2,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVHPD %XMM2,(%RDX) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 2 | 1 |
PADDQ 0x4ea28(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
ADD $0x2,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 43c910 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
Function | _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47 |
Source file and lines | advec_mom.cpp:62-66 |
Module | exec |
nb instructions | 134 |
nb uops | 140 |
loop length | 645 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 23.33 cycles |
front end | 23.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 0.50 | 0.25 | 0.25 | 1.00 | 6.33 | 6.33 | 6.33 | 20.25 | 20.25 | 20.25 | 20.25 | 9.00 | 9.00 |
cycles | 8.00 | 0.50 | 0.25 | 0.25 | 1.00 | 6.33 | 6.33 | 6.33 | 20.25 | 20.25 | 20.25 | 20.25 | 9.00 | 9.00 |
Cycles executing div or sqrt instructions | 28.00-48.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 23.33 |
Dispatch | 20.25 |
DIV/SQRT | 28.00-48.00 |
Data deps. | 1.00 |
Overall L1 | 28.00-48.00 |
all | 76% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 63% |
all | 15% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 70% |
load | 60% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 66% |
all | 21% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 25% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 19% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 20% |
load | 19% |
store | 12% |
mul | 25% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVQ %XMM10,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVQ %XMM12,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
MOVQ %RAX,%XMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PSHUFD $-0x12,%XMM10,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM2,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVDQA -0xb0(%RBP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVQ %XMM2,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %R10 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
MOVQ %RAX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PUNPCKLQDQ %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PSHUFD $-0x18,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PADDD -0x100(%RBP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
IDIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOVQ %RDX,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
IDIV %R10 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOVQ %RDX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
PUNPCKLQDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PADDQ -0xf0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PSLLQ $0x20,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PXOR %XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PCMPGTD %XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSHUFD $-0x13,%XMM3,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PUNPCKLDQ %XMM15,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVDQA %XMM4,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVDQA %XMM11,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PUNPCKLDQ 0x4de30(%RIP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
PSRAD $0x1f,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVDQA %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM15,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PUNPCKLDQ %XMM5,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM0,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PADDQ %XMM14,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PMULUDQ %XMM2,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM15,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ -0xe0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM7,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM3,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PADDQ %XMM14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSUBD 0x4ecf9(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
PXOR %XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PSHUFD $-0x12,%XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PCMPGTD %XMM1,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PUNPCKLDQ %XMM14,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVDQA %XMM6,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ %XMM7,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PMULUDQ %XMM1,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA %XMM6,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PSHUFD $-0x2c,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PMULUDQ %XMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM1,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x20,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PADDQ %XMM14,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PMULUDQ %XMM15,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA %XMM6,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PSLLQ $0x20,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PADDQ %XMM1,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x3,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ -0xd0(%RBP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOVQ %XMM5,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM5,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM1,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM8,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM3,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
PSLLQ $0x3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVQ %XMM7,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVQ %XMM1,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM7,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVDQA %XMM9,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PMULUDQ %XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVDQA %XMM9,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PSRLQ $0x20,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PMULUDQ %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PMULUDQ %XMM9,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PADDQ %XMM1,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVSD (%RDX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSD %XMM1,(%RSI) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVSD %XMM2,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
UNPCKLPD %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.40 |
MOVSD (%R10),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVHPD (%R14),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
ADDPD %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
MOVSD (%R11),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVHPD (%R8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.50 |
SUBPD %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
PSLLQ $0x20,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ %XMM15,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
PSLLQ $0x3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
PADDQ -0xc0(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
MOVQ %XMM3,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
PSHUFD $-0x12,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
MOVQ %XMM1,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVLPD %XMM2,(%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
MOVHPD %XMM2,(%RDX) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 2 | 1 |
PADDQ 0x4ea28(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
ADD $0x2,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 43c910 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.47+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |