| Loop Id: 35 | Module: attention-aocc-znver5-256 | Source: attention_v2.cpp:52-53 | Coverage: 0.51% |
|---|
| Loop Id: 35 | Module: attention-aocc-znver5-256 | Source: attention_v2.cpp:52-53 | Coverage: 0.51% |
|---|
0x5a90 VMOVAPS %YMM1,0x360(%RSP) [1] |
0x5a99 VMOVUPS -0x60(%RSI,%R12,4),%YMM0 [2] |
0x5aa0 VMOVAPS 0x340(%RSP),%YMM4 [1] |
0x5aa9 VMOVUPS -0x40(%RSI,%R12,4),%YMM1 [2] |
0x5ab0 VMOVUPS -0x20(%RSI,%R12,4),%YMM2 [2] |
0x5ab7 VMOVUPS (%RSI,%R12,4),%YMM3 [2] |
0x5abd VSUBPS %YMM4,%YMM0,%YMM5 |
0x5ac1 VSUBPS %YMM4,%YMM1,%YMM0 |
0x5ac5 VMOVAPS %YMM0,0xa0(%RSP) [1] |
0x5ace VSUBPS %YMM4,%YMM2,%YMM0 |
0x5ad2 VMOVAPS %YMM5,0x80(%RSP) [1] |
0x5adb VMOVAPS %YMM0,0x120(%RSP) [1] |
0x5ae4 VSUBPS %YMM4,%YMM3,%YMM0 |
0x5ae8 VMOVAPS %YMM0,0xe0(%RSP) [1] |
0x5af1 VEXTRACTF128 $0x1,%YMM5,%XMM0 |
0x5af7 VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5afd VZEROUPPER |
0x5b00 CALL 6fd0 <@plt_start@+0x20> |
0x5b05 VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5b0e VMOVSHDUP 0x40(%RSP),%XMM0 [1] |
0x5b14 CALL 6fd0 <@plt_start@+0x20> |
0x5b19 VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5b22 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5b28 VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5b31 VPERMILPD $0x1,0x40(%RSP),%XMM0 [1] |
0x5b39 CALL 6fd0 <@plt_start@+0x20> |
0x5b3e VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5b47 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5b4d VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5b56 VPERMILPS $-0x1,0x40(%RSP),%XMM0 [1] |
0x5b5e CALL 6fd0 <@plt_start@+0x20> |
0x5b63 VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5b6c VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5b72 VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5b78 VMOVAPS 0x80(%RSP),%YMM0 [1] |
0x5b81 VZEROUPPER |
0x5b84 CALL 6fd0 <@plt_start@+0x20> |
0x5b89 VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5b92 VMOVSHDUP 0x80(%RSP),%XMM0 [1] |
0x5b9b CALL 6fd0 <@plt_start@+0x20> |
0x5ba0 VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5ba9 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5baf VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5bb8 VPERMILPD $0x1,0x80(%RSP),%XMM0 [1] |
0x5bc3 CALL 6fd0 <@plt_start@+0x20> |
0x5bc8 VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5bd1 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5bd7 VMOVAPS %XMM0,0x180(%RSP) [1] |
0x5be0 VPERMILPS $-0x1,0x80(%RSP),%XMM0 [1] |
0x5beb CALL 6fd0 <@plt_start@+0x20> |
0x5bf0 VMOVAPS 0x180(%RSP),%XMM1 [1] |
0x5bf9 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5bff VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 [1] |
0x5c07 VMOVAPS 0x300(%RSP),%YMM1 [1] |
0x5c10 VADDPS %YMM1,%YMM0,%YMM1 |
0x5c14 VMOVAPS 0xa0(%RSP),%YMM0 [1] |
0x5c1d VMOVAPS %YMM1,0x300(%RSP) [1] |
0x5c26 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
0x5c2c VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5c35 VZEROUPPER |
0x5c38 CALL 6fd0 <@plt_start@+0x20> |
0x5c3d VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5c43 VMOVSHDUP 0x80(%RSP),%XMM0 [1] |
0x5c4c CALL 6fd0 <@plt_start@+0x20> |
0x5c51 VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5c57 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5c5d VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5c63 VPERMILPD $0x1,0x80(%RSP),%XMM0 [1] |
0x5c6e CALL 6fd0 <@plt_start@+0x20> |
0x5c73 VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5c79 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5c7f VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5c85 VPERMILPS $-0x1,0x80(%RSP),%XMM0 [1] |
0x5c90 CALL 6fd0 <@plt_start@+0x20> |
0x5c95 VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5c9b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5ca1 VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5caa VMOVAPS 0xa0(%RSP),%YMM0 [1] |
0x5cb3 VZEROUPPER |
0x5cb6 CALL 6fd0 <@plt_start@+0x20> |
0x5cbb VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5cc1 VMOVSHDUP 0xa0(%RSP),%XMM0 [1] |
0x5cca CALL 6fd0 <@plt_start@+0x20> |
0x5ccf VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5cd5 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5cdb VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5ce1 VPERMILPD $0x1,0xa0(%RSP),%XMM0 [1] |
0x5cec CALL 6fd0 <@plt_start@+0x20> |
0x5cf1 VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5cf7 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5cfd VMOVAPS %XMM0,0x40(%RSP) [1] |
0x5d03 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 [1] |
0x5d0e CALL 6fd0 <@plt_start@+0x20> |
0x5d13 VMOVAPS 0x40(%RSP),%XMM1 [1] |
0x5d19 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5d1f VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 [1] |
0x5d2a VMOVAPS 0x2e0(%RSP),%YMM1 [1] |
0x5d33 VADDPS %YMM1,%YMM0,%YMM1 |
0x5d37 VMOVAPS 0x120(%RSP),%YMM0 [1] |
0x5d40 VMOVAPS %YMM1,0x2e0(%RSP) [1] |
0x5d49 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
0x5d4f VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5d58 VZEROUPPER |
0x5d5b CALL 6fd0 <@plt_start@+0x20> |
0x5d60 VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5d69 VMOVSHDUP 0xa0(%RSP),%XMM0 [1] |
0x5d72 CALL 6fd0 <@plt_start@+0x20> |
0x5d77 VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5d80 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5d86 VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5d8f VPERMILPD $0x1,0xa0(%RSP),%XMM0 [1] |
0x5d9a CALL 6fd0 <@plt_start@+0x20> |
0x5d9f VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5da8 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5dae VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5db7 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 [1] |
0x5dc2 CALL 6fd0 <@plt_start@+0x20> |
0x5dc7 VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5dd0 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5dd6 VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5ddf VMOVAPS 0x120(%RSP),%YMM0 [1] |
0x5de8 VZEROUPPER |
0x5deb CALL 6fd0 <@plt_start@+0x20> |
0x5df0 VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5df9 VMOVSHDUP 0x120(%RSP),%XMM0 [1] |
0x5e02 CALL 6fd0 <@plt_start@+0x20> |
0x5e07 VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5e10 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5e16 VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5e1f VPERMILPD $0x1,0x120(%RSP),%XMM0 [1] |
0x5e2a CALL 6fd0 <@plt_start@+0x20> |
0x5e2f VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5e38 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5e3e VMOVAPS %XMM0,0x80(%RSP) [1] |
0x5e47 VPERMILPS $-0x1,0x120(%RSP),%XMM0 [1] |
0x5e52 CALL 6fd0 <@plt_start@+0x20> |
0x5e57 VMOVAPS 0x80(%RSP),%XMM1 [1] |
0x5e60 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5e66 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 [1] |
0x5e71 VMOVAPS 0x320(%RSP),%YMM1 [1] |
0x5e7a VADDPS %YMM1,%YMM0,%YMM1 |
0x5e7e VMOVAPS 0xe0(%RSP),%YMM0 [1] |
0x5e87 VMOVAPS %YMM1,0x320(%RSP) [1] |
0x5e90 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
0x5e96 VMOVAPS %XMM0,0x120(%RSP) [1] |
0x5e9f VZEROUPPER |
0x5ea2 CALL 6fd0 <@plt_start@+0x20> |
0x5ea7 VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5eb0 VMOVSHDUP 0x120(%RSP),%XMM0 [1] |
0x5eb9 CALL 6fd0 <@plt_start@+0x20> |
0x5ebe VMOVAPS 0xa0(%RSP),%XMM1 [1] |
0x5ec7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5ecd VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5ed6 VPERMILPD $0x1,0x120(%RSP),%XMM0 [1] |
0x5ee1 CALL 6fd0 <@plt_start@+0x20> |
0x5ee6 VMOVAPS 0xa0(%RSP),%XMM1 [1] |
0x5eef VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5ef5 VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5efe VPERMILPS $-0x1,0x120(%RSP),%XMM0 [1] |
0x5f09 CALL 6fd0 <@plt_start@+0x20> |
0x5f0e VMOVAPS 0xa0(%RSP),%XMM1 [1] |
0x5f17 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
0x5f1d VMOVAPS %XMM0,0x120(%RSP) [1] |
0x5f26 VMOVAPS 0xe0(%RSP),%YMM0 [1] |
0x5f2f VZEROUPPER |
0x5f32 CALL 6fd0 <@plt_start@+0x20> |
0x5f37 VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5f40 VMOVSHDUP 0xe0(%RSP),%XMM0 [1] |
0x5f49 CALL 6fd0 <@plt_start@+0x20> |
0x5f4e VMOVAPS 0xa0(%RSP),%XMM1 [1] |
0x5f57 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
0x5f5d VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5f66 VPERMILPD $0x1,0xe0(%RSP),%XMM0 [1] |
0x5f71 CALL 6fd0 <@plt_start@+0x20> |
0x5f76 VMOVAPS 0xa0(%RSP),%XMM1 [1] |
0x5f7f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
0x5f85 VMOVAPS %XMM0,0xa0(%RSP) [1] |
0x5f8e VPERMILPS $-0x1,0xe0(%RSP),%XMM0 [1] |
0x5f99 CALL 6fd0 <@plt_start@+0x20> |
0x5f9e VMOVAPS 0xa0(%RSP),%XMM2 [1] |
0x5fa7 VMOVAPS 0x360(%RSP),%YMM1 [1] |
0x5fb0 MOV 0x2d0(%RSP),%R11 [1] |
0x5fb8 MOV 0x270(%RSP),%RSI [1] |
0x5fc0 ADD $0x20,%R12 |
0x5fc4 VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
0x5fca VINSERTF128 $0x1,0x120(%RSP),%YMM0,%YMM0 [1] |
0x5fd5 VADDPS %YMM1,%YMM0,%YMM1 |
0x5fd9 CMP %R12,%R11 |
0x5fdc JNE 5a90 |
/home/eoseret/llm-attention/attention_v2.cpp: 52 - 53 |
-------------------------------------------------------------------------------- |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.01 |
| CQA speedup if FP arith vectorized | 1.01 |
| CQA speedup if fully vectorized | 3.42 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.38 |
| Bottlenecks | micro-operation queue, P6, P7, P8, P9, |
| Function | main |
| Source | attention_v2.cpp:52-53 |
| Source loop unroll info | unrolled by 32 |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | main |
| Unroll factor | 32 |
| CQA cycles | 27.50 |
| CQA cycles if no scalar integer | 27.25 |
| CQA cycles if FP arith vectorized | 27.25 |
| CQA cycles if fully vectorized | 8.03 |
| Front-end cycles | 27.50 |
| P0 cycles | 0.33 |
| P1 cycles | 0.33 |
| P2 cycles | 0.33 |
| P3 cycles | 0.33 |
| P4 cycles | 0.33 |
| P5 cycles | 0.33 |
| P6 cycles | 27.50 |
| P7 cycles | 27.50 |
| P8 cycles | 27.50 |
| P9 cycles | 27.50 |
| P10 cycles | 14.00 |
| P11 cycles | 14.00 |
| P12 cycles | 14.00 |
| P13 cycles | 14.00 |
| P14 cycles | 20.00 |
| P15 cycles | 20.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 189.00 |
| Nb uops | 220.00 |
| Nb loads | 70.00 |
| Nb stores | 40.00 |
| Nb stack references | 13.00 |
| FLOP/cycle | 2.33 |
| Nb FLOP add-sub | 64.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 77.38 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 1360.00 |
| Bytes stored | 768.00 |
| Stride 0 | 1.00 |
| Stride 1 | 1.00 |
| Stride n | 0.00 |
| Stride unknown | 0.00 |
| Stride indirect | 0.00 |
| Vectorization ratio all | 84.21 |
| Vectorization ratio load | 100.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 57.14 |
| Vector-efficiency ratio all | 26.64 |
| Vector-efficiency ratio load | 29.41 |
| Vector-efficiency ratio store | 30.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 50.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.96 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.01 |
| CQA speedup if FP arith vectorized | 1.01 |
| CQA speedup if fully vectorized | 3.42 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.38 |
| Bottlenecks | micro-operation queue, P6, P7, P8, P9, |
| Function | main |
| Source | attention_v2.cpp:52-53 |
| Source loop unroll info | unrolled by 32 |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | main |
| Unroll factor | 32 |
| CQA cycles | 27.50 |
| CQA cycles if no scalar integer | 27.25 |
| CQA cycles if FP arith vectorized | 27.25 |
| CQA cycles if fully vectorized | 8.03 |
| Front-end cycles | 27.50 |
| P0 cycles | 0.33 |
| P1 cycles | 0.33 |
| P2 cycles | 0.33 |
| P3 cycles | 0.33 |
| P4 cycles | 0.33 |
| P5 cycles | 0.33 |
| P6 cycles | 27.50 |
| P7 cycles | 27.50 |
| P8 cycles | 27.50 |
| P9 cycles | 27.50 |
| P10 cycles | 14.00 |
| P11 cycles | 14.00 |
| P12 cycles | 14.00 |
| P13 cycles | 14.00 |
| P14 cycles | 20.00 |
| P15 cycles | 20.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 189.00 |
| Nb uops | 220.00 |
| Nb loads | 70.00 |
| Nb stores | 40.00 |
| Nb stack references | 13.00 |
| FLOP/cycle | 2.33 |
| Nb FLOP add-sub | 64.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 77.38 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 1360.00 |
| Bytes stored | 768.00 |
| Stride 0 | 1.00 |
| Stride 1 | 1.00 |
| Stride n | 0.00 |
| Stride unknown | 0.00 |
| Stride indirect | 0.00 |
| Vectorization ratio all | 84.21 |
| Vectorization ratio load | 100.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 57.14 |
| Vector-efficiency ratio all | 26.64 |
| Vector-efficiency ratio load | 29.41 |
| Vector-efficiency ratio store | 30.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 50.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.96 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:52-53 |
| Module | attention-aocc-znver5-256 |
| nb instructions | 189 |
| nb uops | 220 |
| loop length | 1362 |
| used x86 registers | 4 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 6 |
| used zmm registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 27.50 cycles |
| front end | 27.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 27.50 | 27.50 | 27.50 | 27.50 | 14.00 | 14.00 | 14.00 | 14.00 | 20.00 | 20.00 |
| cycles | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 27.50 | 27.50 | 27.50 | 27.50 | 14.00 | 14.00 | 14.00 | 14.00 | 20.00 | 20.00 |
| Cycles executing div or sqrt instructions | NA |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 27.50 |
| Dispatch | 27.50 |
| Data deps. | 1.00 |
| Overall L1 | 27.50 |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 100% |
| all | 83% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 84% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 57% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 29% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 15% |
| all | 26% |
| load | 29% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VMOVAPS %YMM1,0x360(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVUPS -0x60(%RSI,%R12,4),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS 0x340(%RSP),%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS -0x40(%RSI,%R12,4),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS -0x20(%RSI,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS (%RSI,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM0,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM5,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0x80(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x300(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x2e0(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0x120(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0x120(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x320(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0x360(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x2d0(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x270(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| ADD $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x120(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CMP %R12,%R11 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JNE 5a90 <main+0x1e80> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:52-53 |
| Module | attention-aocc-znver5-256 |
| nb instructions | 189 |
| nb uops | 220 |
| loop length | 1362 |
| used x86 registers | 4 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 6 |
| used zmm registers | 0 |
| nb stack references | 13 |
| micro-operation queue | 27.50 cycles |
| front end | 27.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 27.50 | 27.50 | 27.50 | 27.50 | 14.00 | 14.00 | 14.00 | 14.00 | 20.00 | 20.00 |
| cycles | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | 27.50 | 27.50 | 27.50 | 27.50 | 14.00 | 14.00 | 14.00 | 14.00 | 20.00 | 20.00 |
| Cycles executing div or sqrt instructions | NA |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 27.50 |
| Dispatch | 27.50 |
| Data deps. | 1.00 |
| Overall L1 | 27.50 |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 100% |
| all | 83% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 84% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 57% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 26% |
| load | 29% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 15% |
| all | 26% |
| load | 29% |
| store | 30% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VMOVAPS %YMM1,0x360(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVUPS -0x60(%RSI,%R12,4),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS 0x340(%RSP),%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS -0x40(%RSI,%R12,4),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS -0x20(%RSI,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVUPS (%RSI,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM0,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM5,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VSUBPS %YMM4,%YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x40(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0x80(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x180(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x300(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x80(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0xa0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x40(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x2e0(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0x120(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xa0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0x120(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x80(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVAPS 0x320(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMOVAPS 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMOVAPS %XMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0x120(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP 0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPD $0x1,0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VMOVAPS %XMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VPERMILPS $-0x1,0xe0(%RSP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.50 | vect (25.0%) |
| CALL 6fd0 <@plt_start@+0x20> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xa0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0x360(%RSP),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x2d0(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x270(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| ADD $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | scal (6.3%) |
| VINSERTF128 $0x1,0x120(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VADDPS %YMM1,%YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| CMP %R12,%R11 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JNE 5a90 <main+0x1e80> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
