Loop Id: 203 | Module: exec | Source: build_field.cpp:118-128 | Coverage: 0.01% |
---|
Loop Id: 203 | Module: exec | Source: build_field.cpp:118-128 | Coverage: 0.01% |
---|
0x425870 LEA (%RSI,%R12,1),%RAX |
0x425874 VPBROADCASTQ %RAX,%YMM0 |
0x42587a VPADDQ %YMM24,%YMM0,%YMM9 |
0x425880 VPADDQ %YMM25,%YMM0,%YMM10 |
0x425886 VPADDQ %YMM26,%YMM0,%YMM11 |
0x42588c VPADDQ %YMM27,%YMM0,%YMM12 |
0x425892 VMOVDQA %YMM12,%YMM0 |
0x425896 VMOVDQA %YMM8,%YMM1 |
0x42589a MOV %RBX,%R15 |
0x42589d MOV %RDI,%RBX |
0x4258a0 MOV %R10,%RDI |
0x4258a3 MOV $0x452610,%R13 |
0x4258aa CALL %R13 |
0x4258ad VMOVDQA %YMM0,%YMM13 |
0x4258b1 VMOVDQA %YMM11,%YMM0 |
0x4258b5 VMOVDQA %YMM8,%YMM1 |
0x4258b9 CALL %R13 |
0x4258bc VMOVDQA %YMM0,%YMM14 |
0x4258c0 VMOVDQA %YMM10,%YMM0 |
0x4258c4 VMOVDQA %YMM8,%YMM1 |
0x4258c8 CALL %R13 |
0x4258cb VMOVDQA %YMM0,%YMM15 |
0x4258cf VMOVDQA %YMM9,%YMM0 |
0x4258d3 VMOVDQA %YMM8,%YMM1 |
0x4258d7 CALL %R13 |
0x4258da MOV -0x68(%RBP),%RAX [31] |
0x4258de MOV %RDI,%R10 |
0x4258e1 MOV %RBX,%RDI |
0x4258e4 MOV %R15,%RBX |
0x4258e7 VPMULLQ %YMM8,%YMM0,%YMM7 |
0x4258ed VPMULLQ %YMM8,%YMM15,%YMM5 |
0x4258f3 VXORPS %XMM2,%XMM2,%XMM2 |
0x4258f7 VPMULLQ %YMM8,%YMM14,%YMM2 |
0x4258fd VXORPS %XMM1,%XMM1,%XMM1 |
0x425901 VPMULLQ %YMM8,%YMM13,%YMM1 |
0x425907 VPSUBQ %YMM1,%YMM12,%YMM1 |
0x42590b VPSUBQ %YMM2,%YMM11,%YMM2 |
0x42590f VPSLLQ $0x20,%YMM0,%YMM0 |
0x425914 VPSRAQ $0x20,%YMM0,%YMM0 |
0x42591b VPSLLQ $0x20,%YMM15,%YMM3 |
0x425921 VPSRAQ $0x20,%YMM3,%YMM3 |
0x425928 VPSLLQ $0x20,%YMM14,%YMM4 |
0x42592e VPSRAQ $0x20,%YMM4,%YMM4 |
0x425935 VPSLLQ $0x20,%YMM13,%YMM6 |
0x42593b VPSRAQ $0x20,%YMM6,%YMM6 |
0x425942 VXORPS %XMM11,%XMM11,%XMM11 |
0x425947 VPMULLQ %YMM6,%YMM16,%YMM11 |
0x42594d VXORPS %XMM12,%XMM12,%XMM12 |
0x425952 VPMULLQ %YMM4,%YMM16,%YMM12 |
0x425958 VXORPS %XMM13,%XMM13,%XMM13 |
0x42595d VPMULLQ %YMM3,%YMM16,%YMM13 |
0x425963 VXORPS %XMM14,%XMM14,%XMM14 |
0x425968 VPMULLQ %YMM0,%YMM16,%YMM14 |
0x42596e VPSUBQ %YMM5,%YMM10,%YMM5 |
0x425972 VPSUBQ %YMM7,%YMM9,%YMM7 |
0x425976 VPADDQ %YMM7,%YMM14,%YMM9 |
0x42597a VPADDQ %YMM5,%YMM13,%YMM10 |
0x42597e VPADDQ %YMM2,%YMM12,%YMM12 |
0x425982 VPADDQ %YMM1,%YMM11,%YMM11 |
0x425986 KXNORW %K0,%K0,%K1 |
0x42598a VSCATTERQPD %YMM28,(%R10,%YMM11,8){%K1} [22] |
0x425991 KXNORW %K0,%K0,%K1 |
0x425995 VSCATTERQPD %YMM28,(%R10,%YMM12,8){%K1} [13] |
0x42599c KXNORW %K0,%K0,%K1 |
0x4259a0 VSCATTERQPD %YMM28,(%R10,%YMM10,8){%K1} [2] |
0x4259a7 KXNORW %K0,%K0,%K1 |
0x4259ab VSCATTERQPD %YMM28,(%R10,%YMM9,8){%K1} [23] |
0x4259b2 VXORPS %XMM9,%XMM9,%XMM9 |
0x4259b7 VPMULLQ %YMM6,%YMM17,%YMM9 |
0x4259bd VXORPS %XMM10,%XMM10,%XMM10 |
0x4259c2 VPMULLQ %YMM4,%YMM17,%YMM10 |
0x4259c8 VXORPS %XMM11,%XMM11,%XMM11 |
0x4259cd VPMULLQ %YMM3,%YMM17,%YMM11 |
0x4259d3 VXORPS %XMM12,%XMM12,%XMM12 |
0x4259d8 VPMULLQ %YMM0,%YMM17,%YMM12 |
0x4259de VPADDQ %YMM7,%YMM12,%YMM12 |
0x4259e2 VPADDQ %YMM5,%YMM11,%YMM11 |
0x4259e6 VPADDQ %YMM2,%YMM10,%YMM10 |
0x4259ea VPADDQ %YMM1,%YMM9,%YMM9 |
0x4259ee KXNORW %K0,%K0,%K1 |
0x4259f2 VSCATTERQPD %YMM28,(%RDI,%YMM9,8){%K1} [33] |
0x4259f9 KXNORW %K0,%K0,%K1 |
0x4259fd VSCATTERQPD %YMM28,(%RDI,%YMM10,8){%K1} [25] |
0x425a04 KXNORW %K0,%K0,%K1 |
0x425a08 VSCATTERQPD %YMM28,(%RDI,%YMM11,8){%K1} [19] |
0x425a0f KXNORW %K0,%K0,%K1 |
0x425a13 VSCATTERQPD %YMM28,(%RDI,%YMM12,8){%K1} [6] |
0x425a1a VXORPS %XMM9,%XMM9,%XMM9 |
0x425a1f VPMULLQ %YMM6,%YMM18,%YMM9 |
0x425a25 VXORPS %XMM10,%XMM10,%XMM10 |
0x425a2a VPMULLQ %YMM4,%YMM18,%YMM10 |
0x425a30 VXORPS %XMM11,%XMM11,%XMM11 |
0x425a35 VPMULLQ %YMM3,%YMM18,%YMM11 |
0x425a3b VXORPS %XMM12,%XMM12,%XMM12 |
0x425a40 VPMULLQ %YMM0,%YMM18,%YMM12 |
0x425a46 VPADDQ %YMM7,%YMM12,%YMM12 |
0x425a4a VPADDQ %YMM5,%YMM11,%YMM11 |
0x425a4e VPADDQ %YMM2,%YMM10,%YMM10 |
0x425a52 VPADDQ %YMM1,%YMM9,%YMM9 |
0x425a56 KXNORW %K0,%K0,%K1 |
0x425a5a VSCATTERQPD %YMM28,(%R15,%YMM9,8){%K1} [17] |
0x425a61 KXNORW %K0,%K0,%K1 |
0x425a65 VSCATTERQPD %YMM28,(%R15,%YMM10,8){%K1} [11] |
0x425a6c KXNORW %K0,%K0,%K1 |
0x425a70 VSCATTERQPD %YMM28,(%R15,%YMM11,8){%K1} [1] |
0x425a77 KXNORW %K0,%K0,%K1 |
0x425a7b VSCATTERQPD %YMM28,(%R15,%YMM12,8){%K1} [26] |
0x425a82 VXORPS %XMM9,%XMM9,%XMM9 |
0x425a87 VPMULLQ %YMM6,%YMM19,%YMM9 |
0x425a8d VXORPS %XMM10,%XMM10,%XMM10 |
0x425a92 VPMULLQ %YMM4,%YMM19,%YMM10 |
0x425a98 VXORPS %XMM11,%XMM11,%XMM11 |
0x425a9d VPMULLQ %YMM3,%YMM19,%YMM11 |
0x425aa3 VXORPS %XMM12,%XMM12,%XMM12 |
0x425aa8 VPMULLQ %YMM0,%YMM19,%YMM12 |
0x425aae VPADDQ %YMM7,%YMM12,%YMM12 |
0x425ab2 VPADDQ %YMM5,%YMM11,%YMM11 |
0x425ab6 VPADDQ %YMM2,%YMM10,%YMM10 |
0x425aba VPADDQ %YMM1,%YMM9,%YMM9 |
0x425abe KXNORW %K0,%K0,%K1 |
0x425ac2 MOV -0x58(%RBP),%RCX [31] |
0x425ac6 VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} [20] |
0x425acd KXNORW %K0,%K0,%K1 |
0x425ad1 VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} [30] |
0x425ad8 KXNORW %K0,%K0,%K1 |
0x425adc VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} [14] |
0x425ae3 KXNORW %K0,%K0,%K1 |
0x425ae7 VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} [29] |
0x425aee VXORPS %XMM9,%XMM9,%XMM9 |
0x425af3 VPMULLQ %YMM6,%YMM20,%YMM9 |
0x425af9 VXORPS %XMM10,%XMM10,%XMM10 |
0x425afe VPMULLQ %YMM4,%YMM20,%YMM10 |
0x425b04 VXORPS %XMM11,%XMM11,%XMM11 |
0x425b09 VPMULLQ %YMM3,%YMM20,%YMM11 |
0x425b0f VXORPS %XMM12,%XMM12,%XMM12 |
0x425b14 VPMULLQ %YMM0,%YMM20,%YMM12 |
0x425b1a VPADDQ %YMM7,%YMM12,%YMM12 |
0x425b1e VPADDQ %YMM5,%YMM11,%YMM11 |
0x425b22 VPADDQ %YMM2,%YMM10,%YMM10 |
0x425b26 VPADDQ %YMM1,%YMM9,%YMM9 |
0x425b2a KXNORW %K0,%K0,%K1 |
0x425b2e VSCATTERQPD %YMM28,(%R14,%YMM9,8){%K1} [18] |
0x425b35 KXNORW %K0,%K0,%K1 |
0x425b39 VSCATTERQPD %YMM28,(%R14,%YMM10,8){%K1} [10] |
0x425b40 KXNORW %K0,%K0,%K1 |
0x425b44 VSCATTERQPD %YMM28,(%R14,%YMM11,8){%K1} [27] |
0x425b4b KXNORW %K0,%K0,%K1 |
0x425b4f VSCATTERQPD %YMM28,(%R14,%YMM12,8){%K1} [9] |
0x425b56 VXORPS %XMM9,%XMM9,%XMM9 |
0x425b5b VPMULLQ %YMM6,%YMM21,%YMM9 |
0x425b61 VXORPS %XMM10,%XMM10,%XMM10 |
0x425b66 VPMULLQ %YMM4,%YMM21,%YMM10 |
0x425b6c VXORPS %XMM11,%XMM11,%XMM11 |
0x425b71 VPMULLQ %YMM3,%YMM21,%YMM11 |
0x425b77 VXORPS %XMM12,%XMM12,%XMM12 |
0x425b7c VPMULLQ %YMM0,%YMM21,%YMM12 |
0x425b82 VPADDQ %YMM7,%YMM12,%YMM12 |
0x425b86 VPADDQ %YMM5,%YMM11,%YMM11 |
0x425b8a VPADDQ %YMM2,%YMM10,%YMM10 |
0x425b8e VPADDQ %YMM1,%YMM9,%YMM9 |
0x425b92 KXNORW %K0,%K0,%K1 |
0x425b96 MOV -0x70(%RBP),%RCX [31] |
0x425b9a VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} [12] |
0x425ba1 KXNORW %K0,%K0,%K1 |
0x425ba5 VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} [4] |
0x425bac KXNORW %K0,%K0,%K1 |
0x425bb0 VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} [21] |
0x425bb7 KXNORW %K0,%K0,%K1 |
0x425bbb VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} [3] |
0x425bc2 VXORPS %XMM9,%XMM9,%XMM9 |
0x425bc7 VPMULLQ %YMM6,%YMM22,%YMM9 |
0x425bcd VXORPS %XMM10,%XMM10,%XMM10 |
0x425bd2 VPMULLQ %YMM4,%YMM22,%YMM10 |
0x425bd8 VXORPS %XMM11,%XMM11,%XMM11 |
0x425bdd VPMULLQ %YMM3,%YMM22,%YMM11 |
0x425be3 VXORPS %XMM12,%XMM12,%XMM12 |
0x425be8 VPMULLQ %YMM0,%YMM22,%YMM12 |
0x425bee VPADDQ %YMM7,%YMM12,%YMM12 |
0x425bf2 VPADDQ %YMM5,%YMM11,%YMM11 |
0x425bf6 VPADDQ %YMM2,%YMM10,%YMM10 |
0x425bfa VPADDQ %YMM1,%YMM9,%YMM9 |
0x425bfe KXNORW %K0,%K0,%K1 |
0x425c02 VSCATTERQPD %YMM28,(%RAX,%YMM9,8){%K1} [28] |
0x425c09 KXNORW %K0,%K0,%K1 |
0x425c0d VSCATTERQPD %YMM28,(%RAX,%YMM10,8){%K1} [5] |
0x425c14 KXNORW %K0,%K0,%K1 |
0x425c18 VSCATTERQPD %YMM28,(%RAX,%YMM11,8){%K1} [32] |
0x425c1f KXNORW %K0,%K0,%K1 |
0x425c23 VSCATTERQPD %YMM28,(%RAX,%YMM12,8){%K1} [16] |
0x425c2a VPMULLQ %YMM6,%YMM23,%YMM6 |
0x425c30 VPMULLQ %YMM4,%YMM23,%YMM4 |
0x425c36 VPMULLQ %YMM3,%YMM23,%YMM3 |
0x425c3c VPMULLQ %YMM0,%YMM23,%YMM0 |
0x425c42 VPADDQ %YMM0,%YMM7,%YMM0 |
0x425c46 VPADDQ %YMM3,%YMM5,%YMM3 |
0x425c4a VPADDQ %YMM4,%YMM2,%YMM2 |
0x425c4e VPADDQ %YMM6,%YMM1,%YMM1 |
0x425c52 KXNORW %K0,%K0,%K1 |
0x425c56 MOV -0x60(%RBP),%RAX [31] |
0x425c5a VSCATTERQPD %YMM28,(%RAX,%YMM1,8){%K1} [8] |
0x425c61 KXNORW %K0,%K0,%K1 |
0x425c65 VSCATTERQPD %YMM28,(%RAX,%YMM2,8){%K1} [7] |
0x425c6c KXNORW %K0,%K0,%K1 |
0x425c70 VSCATTERQPD %YMM28,(%RAX,%YMM3,8){%K1} [24] |
0x425c77 KXNORW %K0,%K0,%K1 |
0x425c7b VSCATTERQPD %YMM28,(%RAX,%YMM0,8){%K1} [15] |
0x425c82 ADD $0x10,%R12 |
0x425c86 CMP -0xc0(%RBP),%R12 [31] |
0x425c8d JBE 425870 |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/build_field.cpp: 118 - 128 |
-------------------------------------------------------------------------------- |
118: #pragma omp parallel for simd collapse(2) |
119: for (int j = (0); j < (yrange); j++) { |
120: for (int i = (0); i < (xrange); i++) { |
121: field.density0(i, j) = 0.0; |
122: field.density1(i, j) = 0.0; |
123: field.energy0(i, j) = 0.0; |
124: field.energy1(i, j) = 0.0; |
125: field.pressure(i, j) = 0.0; |
126: field.viscosity(i, j) = 0.0; |
127: field.soundspeed(i, j) = 0.0; |
128: field.volume(i, j) = 0.0; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.08 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 2.57 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.11 |
Bottlenecks | micro-operation queue, |
Function | _Z11build_fieldR16global_variables.extracted.27 |
Source | build_field.cpp:118-128 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 118.00 |
CQA cycles if no scalar integer | 109.33 |
CQA cycles if FP arith vectorized | 118.00 |
CQA cycles if fully vectorized | 46.00 |
Front-end cycles | 118.00 |
DIV/SQRT cycles | 106.00 |
P0 cycles | 106.00 |
P1 cycles | 1.67 |
P2 cycles | 1.67 |
P3 cycles | 66.00 |
P4 cycles | 41.00 |
P5 cycles | 19.00 |
P6 cycles | 66.00 |
P7 cycles | 66.00 |
P8 cycles | 66.00 |
P9 cycles | 19.00 |
P10 cycles | 1.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | 106.50 - 106.51 |
Stall cycles (UFS) | 21.10 - 21.12 |
Nb insns | 209.00 |
Nb uops | 708.00 |
Nb loads | 5.00 |
Nb stores | 32.00 |
Nb stack references | 5.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 9.02 |
Bytes prefetched | 0.00 |
Bytes loaded | 40.00 |
Bytes stored | 1024.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 8.00 |
Stride unknown | 0.00 |
Stride indirect | 20.00 |
Vectorization ratio all | 97.52 |
Vectorization ratio load | NA |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 92.45 |
Vector-efficiency ratio all | 44.37 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 32.90 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.08 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 2.57 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.11 |
Bottlenecks | micro-operation queue, |
Function | _Z11build_fieldR16global_variables.extracted.27 |
Source | build_field.cpp:118-128 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 118.00 |
CQA cycles if no scalar integer | 109.33 |
CQA cycles if FP arith vectorized | 118.00 |
CQA cycles if fully vectorized | 46.00 |
Front-end cycles | 118.00 |
DIV/SQRT cycles | 106.00 |
P0 cycles | 106.00 |
P1 cycles | 1.67 |
P2 cycles | 1.67 |
P3 cycles | 66.00 |
P4 cycles | 41.00 |
P5 cycles | 19.00 |
P6 cycles | 66.00 |
P7 cycles | 66.00 |
P8 cycles | 66.00 |
P9 cycles | 19.00 |
P10 cycles | 1.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | 106.50 - 106.51 |
Stall cycles (UFS) | 21.10 - 21.12 |
Nb insns | 209.00 |
Nb uops | 708.00 |
Nb loads | 5.00 |
Nb stores | 32.00 |
Nb stack references | 5.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 9.02 |
Bytes prefetched | 0.00 |
Bytes loaded | 40.00 |
Bytes stored | 1024.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 8.00 |
Stride unknown | 0.00 |
Stride indirect | 20.00 |
Vectorization ratio all | 97.52 |
Vectorization ratio load | NA |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 92.45 |
Vector-efficiency ratio all | 44.37 |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 32.90 |
Path / |
Function | _Z11build_fieldR16global_variables.extracted.27 |
Source file and lines | build_field.cpp:118-128 |
Module | exec |
nb instructions | 209 |
nb uops | 708 |
loop length | 1059 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 29 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 118.00 cycles |
front end | 118.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 106.00 | 106.00 | 1.67 | 1.67 | 66.00 | 41.00 | 19.00 | 66.00 | 66.00 | 66.00 | 19.00 | 1.67 |
cycles | 106.00 | 106.00 | 1.67 | 1.67 | 66.00 | 41.00 | 19.00 | 66.00 | 66.00 | 66.00 | 19.00 | 1.67 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
FE+BE cycles | 106.50-106.51 |
Stall cycles | 21.10-21.12 |
RS full (events) | 67.72-60.05 |
Front-end | 118.00 |
Dispatch | 106.00 |
Data deps. | 2.00 |
Overall L1 | 118.00 |
all | 95% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 82% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 97% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 92% |
all | 48% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 43% |
all | 37% |
load | NA (no load vectorizable/vectorized instructions) |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 44% |
load | NA (no load vectorizable/vectorized instructions) |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LEA (%RSI,%R12,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %YMM24,%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM25,%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM26,%YMM0,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM27,%YMM0,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVDQA %YMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x452610,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPMULLQ %YMM8,%YMM0,%YMM7 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM8,%YMM15,%YMM5 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM8,%YMM14,%YMM2 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM8,%YMM13,%YMM1 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %YMM1,%YMM12,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSUBQ %YMM2,%YMM11,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSLLQ $0x20,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM15,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM14,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM13,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM16,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM16,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM16,%YMM13 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM16,%YMM14 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %YMM5,%YMM10,%YMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSUBQ %YMM7,%YMM9,%YMM7 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPADDQ %YMM7,%YMM14,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM13,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM17,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM17,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM17,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM17,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM18,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM18,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM18,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM18,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM19,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM19,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM19,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM19,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM20,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM20,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM20,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM20,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM21,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM21,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM21,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM21,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x70(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM22,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM22,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM22,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM22,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VPMULLQ %YMM6,%YMM23,%YMM6 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM4,%YMM23,%YMM4 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM3,%YMM23,%YMM3 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM0,%YMM23,%YMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM0,%YMM7,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM5,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM4,%YMM2,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM6,%YMM1,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RAX,%YMM1,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM2,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM3,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM0,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
ADD $0x10,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xc0(%RBP),%R12 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JBE 425870 <_Z11build_fieldR16global_variables.extracted.27+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | _Z11build_fieldR16global_variables.extracted.27 |
Source file and lines | build_field.cpp:118-128 |
Module | exec |
nb instructions | 209 |
nb uops | 708 |
loop length | 1059 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 29 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 118.00 cycles |
front end | 118.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 106.00 | 106.00 | 1.67 | 1.67 | 66.00 | 41.00 | 19.00 | 66.00 | 66.00 | 66.00 | 19.00 | 1.67 |
cycles | 106.00 | 106.00 | 1.67 | 1.67 | 66.00 | 41.00 | 19.00 | 66.00 | 66.00 | 66.00 | 19.00 | 1.67 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
FE+BE cycles | 106.50-106.51 |
Stall cycles | 21.10-21.12 |
RS full (events) | 67.72-60.05 |
Front-end | 118.00 |
Dispatch | 106.00 |
Data deps. | 2.00 |
Overall L1 | 118.00 |
all | 95% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 82% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 97% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 92% |
all | 48% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 43% |
all | 37% |
load | NA (no load vectorizable/vectorized instructions) |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 44% |
load | NA (no load vectorizable/vectorized instructions) |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LEA (%RSI,%R12,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %YMM24,%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM25,%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM26,%YMM0,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM27,%YMM0,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVDQA %YMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x452610,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VMOVDQA %YMM0,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA %YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL %R13 | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPMULLQ %YMM8,%YMM0,%YMM7 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM8,%YMM15,%YMM5 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM8,%YMM14,%YMM2 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM8,%YMM13,%YMM1 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %YMM1,%YMM12,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSUBQ %YMM2,%YMM11,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSLLQ $0x20,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM15,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM14,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSLLQ $0x20,%YMM13,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPSRAQ $0x20,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM16,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM16,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM16,%YMM13 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM16,%YMM14 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %YMM5,%YMM10,%YMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPSUBQ %YMM7,%YMM9,%YMM7 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPADDQ %YMM7,%YMM14,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM13,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R10,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM17,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM17,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM17,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM17,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RDI,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM18,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM18,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM18,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM18,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R15,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM19,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM19,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM19,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM19,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM20,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM20,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM20,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM20,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%R14,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM21,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM21,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM21,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM21,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x70(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RCX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RCX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VXORPS %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM6,%YMM22,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM4,%YMM22,%YMM10 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM3,%YMM22,%YMM11 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM0,%YMM22,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM7,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM5,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM2,%YMM10,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM1,%YMM9,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM9,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM10,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM11,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM12,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
VPMULLQ %YMM6,%YMM23,%YMM6 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM4,%YMM23,%YMM4 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM3,%YMM23,%YMM3 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM0,%YMM23,%YMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM0,%YMM7,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM5,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM4,%YMM2,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM6,%YMM1,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %YMM28,(%RAX,%YMM1,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM2,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM3,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %YMM28,(%RAX,%YMM0,8){%K1} | 12 | 1.70 | 0.70 | 0 | 0 | 2 | 0.20 | 0.20 | 2 | 2 | 2 | 0.20 | 0 | 2-12 | 5 |
ADD $0x10,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xc0(%RBP),%R12 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JBE 425870 <_Z11build_fieldR16global_variables.extracted.27+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |