Loop Id: 105 | Module: exec | Source: accelerate.cpp:41-54 [...] | Coverage: 3.43% |
---|
Loop Id: 105 | Module: exec | Source: accelerate.cpp:41-54 [...] | Coverage: 3.43% |
---|
0x42a420 VMOVDQA64 %ZMM17,%ZMM0 |
0x42a426 VMOVDQA64 %ZMM16,%ZMM1 |
0x42a42c MOV %R12,%RBX |
0x42a42f MOV %R15,%R12 |
0x42a432 MOV %R9,%R15 |
0x42a435 MOV $0x466620,%RAX |
0x42a43c CALL %RAX |
0x42a43e VPMOVQD %ZMM0,%YMM0 |
0x42a444 VPADDD 0x140(%RSP),%YMM0,%YMM31 [19] |
0x42a44c VMOVDQA64 %ZMM17,%ZMM0 |
0x42a452 VMOVDQA64 %ZMM16,%ZMM1 |
0x42a458 CALL 4664a0 <__svml_i64rem8_z0> |
0x42a45e MOV %R15,%R9 |
0x42a461 MOV %R12,%R15 |
0x42a464 MOV %RBX,%R12 |
0x42a467 VPMOVQD %ZMM0,%YMM0 |
0x42a46d VPADDD 0x120(%RSP),%YMM0,%YMM2 [19] |
0x42a476 VPCMPEQD %YMM1,%YMM1,%YMM1 |
0x42a47a VPADDD %YMM1,%YMM2,%YMM0 |
0x42a47e VPADDD %YMM1,%YMM31,%YMM1 |
0x42a484 VPMOVSXDQ %YMM1,%ZMM1 |
0x42a48a VPXOR %XMM5,%XMM5,%XMM5 |
0x42a48e VPMULLQ %ZMM1,%ZMM20,%ZMM5 |
0x42a494 VPMOVSXDQ %YMM0,%ZMM0 |
0x42a49a KXNORW %K0,%K0,%K1 |
0x42a49e VXORPD %XMM4,%XMM4,%XMM4 |
0x42a4a2 VPMULLQ %ZMM1,%ZMM21,%ZMM9 |
0x42a4a8 VPADDQ %ZMM0,%ZMM5,%ZMM7 |
0x42a4ae KXNORW %K0,%K0,%K2 |
0x42a4b2 VPMOVSXDQ %YMM31,%ZMM3 |
0x42a4b8 VPMULLQ %ZMM3,%ZMM20,%ZMM10 |
0x42a4be VPADDQ %ZMM0,%ZMM9,%ZMM8 |
0x42a4c4 VXORPD %XMM6,%XMM6,%XMM6 |
0x42a4c8 VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} [26] |
0x42a4cf VPADDQ %ZMM0,%ZMM10,%ZMM11 |
0x42a4d5 VGATHERQPD (%RBX,%ZMM8,8),%ZMM6{%K2} [18] |
0x42a4dc KXNORW %K0,%K0,%K1 |
0x42a4e0 VXORPD %XMM7,%XMM7,%XMM7 |
0x42a4e4 VPMULLQ %ZMM3,%ZMM21,%ZMM12 |
0x42a4ea VPADDQ %ZMM0,%ZMM12,%ZMM13 |
0x42a4f0 KXNORW %K0,%K0,%K2 |
0x42a4f4 VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} [20] |
0x42a4fb VXORPD %XMM8,%XMM8,%XMM8 |
0x42a500 VGATHERQPD (%RBX,%ZMM13,8),%ZMM8{%K2} [12] |
0x42a507 VPMOVSXDQ %YMM2,%ZMM2 |
0x42a50d VPADDQ %ZMM2,%ZMM10,%ZMM10 |
0x42a513 KXNORW %K0,%K0,%K1 |
0x42a517 VXORPD %XMM13,%XMM13,%XMM13 |
0x42a51c VPADDQ %ZMM2,%ZMM12,%ZMM11 |
0x42a522 KXNORW %K0,%K0,%K2 |
0x42a526 VGATHERQPD (%R15,%ZMM10,8),%ZMM13{%K1} [23] |
0x42a52d VXORPD %XMM10,%XMM10,%XMM10 |
0x42a532 VGATHERQPD (%RBX,%ZMM11,8),%ZMM10{%K2} [24] |
0x42a539 VPADDQ %ZMM2,%ZMM5,%ZMM5 |
0x42a53f KXNORW %K0,%K0,%K1 |
0x42a543 VXORPD %XMM14,%XMM14,%XMM14 |
0x42a548 VPADDQ %ZMM2,%ZMM9,%ZMM9 |
0x42a54e KXNORW %K0,%K0,%K2 |
0x42a552 VGATHERQPD (%R15,%ZMM5,8),%ZMM14{%K1} [2] |
0x42a559 VXORPD %XMM15,%XMM15,%XMM15 |
0x42a55e VGATHERQPD (%RBX,%ZMM9,8),%ZMM15{%K2} [25] |
0x42a565 VPXOR %XMM5,%XMM5,%XMM5 |
0x42a569 VPMULLQ %ZMM3,%ZMM24,%ZMM5 |
0x42a56f KXNORW %K0,%K0,%K1 |
0x42a573 VPXORD %XMM31,%XMM31,%XMM31 |
0x42a579 VPADDQ %ZMM2,%ZMM5,%ZMM5 |
0x42a57f VPXOR %XMM9,%XMM9,%XMM9 |
0x42a584 VPMULLQ %ZMM3,%ZMM25,%ZMM9 |
0x42a58a KXNORW %K0,%K0,%K2 |
0x42a58e VXORPD %XMM18,%XMM18,%XMM18 |
0x42a594 VPADDQ %ZMM2,%ZMM9,%ZMM11 |
0x42a59a VPADDQ %ZMM0,%ZMM9,%ZMM9 |
0x42a5a0 VGATHERQPD (%R14,%ZMM5,8),%ZMM31{%K1} [13] |
0x42a5a7 KXNORW %K0,%K0,%K1 |
0x42a5ab VGATHERQPD (%RSI,%ZMM11,8),%ZMM18{%K2} [4] |
0x42a5b2 VXORPD %XMM19,%XMM19,%XMM19 |
0x42a5b8 VGATHERQPD (%RSI,%ZMM9,8),%ZMM19{%K1} [5] |
0x42a5bf VPXOR %XMM12,%XMM12,%XMM12 |
0x42a5c4 VPMULLQ %ZMM1,%ZMM25,%ZMM12 |
0x42a5ca KXNORW %K0,%K0,%K1 |
0x42a5ce VPADDQ %ZMM2,%ZMM12,%ZMM22 |
0x42a5d4 VXORPD %XMM23,%XMM23,%XMM23 |
0x42a5da VGATHERQPD (%RSI,%ZMM22,8),%ZMM23{%K1} [14] |
0x42a5e1 VPADDQ %ZMM0,%ZMM12,%ZMM12 |
0x42a5e7 KXNORW %K0,%K0,%K1 |
0x42a5eb VMULPD %ZMM4,%ZMM6,%ZMM4 |
0x42a5f1 VXORPD %XMM26,%XMM26,%XMM26 |
0x42a5f7 VGATHERQPD (%RSI,%ZMM12,8),%ZMM26{%K1} [6] |
0x42a5fe VFMADD213PD %ZMM4,%ZMM7,%ZMM8 |
0x42a604 VMOVDQU64 0x1c0(%RSP),%ZMM4 [19] |
0x42a60c VPMULLQ %ZMM3,%ZMM4,%ZMM4 |
0x42a612 KXNORW %K0,%K0,%K1 |
0x42a616 VFMADD213PD %ZMM8,%ZMM13,%ZMM10 |
0x42a61c VXORPD %XMM8,%XMM8,%XMM8 |
0x42a621 VPXOR %XMM6,%XMM6,%XMM6 |
0x42a625 VPMULLQ %ZMM1,%ZMM24,%ZMM6 |
0x42a62b VPADDQ %ZMM2,%ZMM6,%ZMM6 |
0x42a631 VPADDQ %ZMM2,%ZMM4,%ZMM4 |
0x42a637 KXNORW %K0,%K0,%K2 |
0x42a63b MOV 0xb8(%RSP),%RAX [19] |
0x42a643 VGATHERQPD (%RAX,%ZMM4,8),%ZMM8{%K1} [17] |
0x42a64a VXORPD %XMM7,%XMM7,%XMM7 |
0x42a64e VGATHERQPD (%R14,%ZMM6,8),%ZMM7{%K2} [22] |
0x42a655 VFMADD213PD %ZMM10,%ZMM14,%ZMM15 |
0x42a65b VSUBPD %ZMM18,%ZMM19,%ZMM10 |
0x42a661 VMULPD 0x4e105(%RIP){1to8},%ZMM15,%ZMM4 [16] |
0x42a66b VMOVUPD 0x200(%RSP),%ZMM13 [19] |
0x42a673 VDIVPD %ZMM4,%ZMM13,%ZMM4 |
0x42a679 VMULPD %ZMM31,%ZMM10,%ZMM10 |
0x42a67f VSUBPD %ZMM23,%ZMM26,%ZMM13 |
0x42a685 VMOVDQU64 0x180(%RSP),%ZMM14 [19] |
0x42a68d VPMULLQ %ZMM3,%ZMM14,%ZMM14 |
0x42a693 VFMADD213PD %ZMM10,%ZMM7,%ZMM13 |
0x42a699 VPADDQ %ZMM2,%ZMM14,%ZMM7 |
0x42a69f KXNORW %K0,%K0,%K1 |
0x42a6a3 VPXOR %XMM10,%XMM10,%XMM10 |
0x42a6a8 VPMULLQ %ZMM3,%ZMM27,%ZMM10 |
0x42a6ae VFMADD213PD %ZMM8,%ZMM4,%ZMM13 |
0x42a6b4 VPXOR %XMM14,%XMM14,%XMM14 |
0x42a6b9 VPMULLQ %ZMM3,%ZMM28,%ZMM14 |
0x42a6bf KXNORW %K0,%K0,%K2 |
0x42a6c3 VXORPD %XMM15,%XMM15,%XMM15 |
0x42a6c8 MOV 0xb0(%RSP),%RCX [19] |
0x42a6d0 VSCATTERQPD %ZMM13,(%RCX,%ZMM7,8){%K1} [8] |
0x42a6d7 KXNORW %K0,%K0,%K1 |
0x42a6db VGATHERQPD (%RSI,%ZMM11,8),%ZMM15{%K2} [4] |
0x42a6e2 VXORPD %XMM11,%XMM11,%XMM11 |
0x42a6e7 VGATHERQPD (%RSI,%ZMM22,8),%ZMM11{%K1} [14] |
0x42a6ee VPADDQ %ZMM2,%ZMM14,%ZMM8 |
0x42a6f4 KXNORW %K0,%K0,%K1 |
0x42a6f8 VXORPD %XMM13,%XMM13,%XMM13 |
0x42a6fd KXNORW %K0,%K0,%K2 |
0x42a701 VXORPD %XMM18,%XMM18,%XMM18 |
0x42a707 MOV 0xa0(%RSP),%RAX [19] |
0x42a70f VGATHERQPD (%RAX,%ZMM8,8),%ZMM13{%K1} [21] |
0x42a716 KXNORW %K0,%K0,%K1 |
0x42a71a VGATHERQPD (%RSI,%ZMM9,8),%ZMM18{%K2} [5] |
0x42a721 VXORPD %XMM19,%XMM19,%XMM19 |
0x42a727 VGATHERQPD (%RSI,%ZMM12,8),%ZMM19{%K1} [6] |
0x42a72e KXNORW %K0,%K0,%K1 |
0x42a732 VXORPD %XMM12,%XMM12,%XMM12 |
0x42a737 VPADDQ %ZMM0,%ZMM14,%ZMM9 |
0x42a73d VPADDQ %ZMM2,%ZMM10,%ZMM10 |
0x42a743 KXNORW %K0,%K0,%K2 |
0x42a747 MOV 0xa8(%RSP),%RDX [19] |
0x42a74f VGATHERQPD (%RDX,%ZMM10,8),%ZMM12{%K1} [7] |
0x42a756 VXORPD %XMM10,%XMM10,%XMM10 |
0x42a75b VGATHERQPD (%RAX,%ZMM9,8),%ZMM10{%K2} [3] |
0x42a762 VSUBPD %ZMM15,%ZMM11,%ZMM11 |
0x42a768 VMULPD %ZMM13,%ZMM11,%ZMM11 |
0x42a76e VSUBPD %ZMM18,%ZMM19,%ZMM13 |
0x42a774 VPXOR %XMM14,%XMM14,%XMM14 |
0x42a779 VPMULLQ %ZMM3,%ZMM29,%ZMM14 |
0x42a77f VFMADD213PD %ZMM11,%ZMM10,%ZMM13 |
0x42a785 VPADDQ %ZMM2,%ZMM14,%ZMM10 |
0x42a78b VPMULLQ %ZMM3,%ZMM30,%ZMM3 |
0x42a791 VFMADD213PD %ZMM12,%ZMM4,%ZMM13 |
0x42a797 KXNORW %K0,%K0,%K1 |
0x42a79b VPADDQ %ZMM2,%ZMM3,%ZMM11 |
0x42a7a1 KXNORW %K0,%K0,%K2 |
0x42a7a5 VXORPD %XMM12,%XMM12,%XMM12 |
0x42a7aa VPADDQ %ZMM0,%ZMM3,%ZMM3 |
0x42a7b0 MOV 0x58(%RSP),%RDX [19] |
0x42a7b5 VSCATTERQPD %ZMM13,(%RDX,%ZMM10,8){%K1} [1] |
0x42a7bc KXNORW %K0,%K0,%K1 |
0x42a7c0 VGATHERQPD (%RDI,%ZMM11,8),%ZMM12{%K2} [15] |
0x42a7c7 VXORPD %XMM13,%XMM13,%XMM13 |
0x42a7cc VGATHERQPD (%RDI,%ZMM3,8),%ZMM13{%K1} [9] |
0x42a7d3 VPMULLQ %ZMM1,%ZMM30,%ZMM1 |
0x42a7d9 KXNORW %K0,%K0,%K1 |
0x42a7dd VPXOR %XMM14,%XMM14,%XMM14 |
0x42a7e2 VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} [13] |
0x42a7e9 KXNORW %K0,%K0,%K1 |
0x42a7ed VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x42a7f3 KXNORW %K0,%K0,%K2 |
0x42a7f7 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
0x42a7fd VPXOR %XMM1,%XMM1,%XMM1 |
0x42a801 VXORPD %XMM5,%XMM5,%XMM5 |
0x42a805 VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} [22] |
0x42a80c KXNORW %K0,%K0,%K1 |
0x42a810 VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} [10] |
0x42a817 VXORPD %XMM6,%XMM6,%XMM6 |
0x42a81b VGATHERQPD (%RDI,%ZMM0,8),%ZMM6{%K1} [11] |
0x42a822 KXNORW %K0,%K0,%K1 |
0x42a826 VSUBPD %ZMM12,%ZMM13,%ZMM12 |
0x42a82c VXORPD %XMM13,%XMM13,%XMM13 |
0x42a831 VGATHERQPD (%RCX,%ZMM7,8),%ZMM13{%K1} [8] |
0x42a838 VMULPD %ZMM14,%ZMM12,%ZMM12 |
0x42a83e VSUBPD %ZMM5,%ZMM6,%ZMM5 |
0x42a844 VFMADD213PD %ZMM12,%ZMM1,%ZMM5 |
0x42a84a VFMADD213PD %ZMM13,%ZMM4,%ZMM5 |
0x42a850 KXNORW %K0,%K0,%K1 |
0x42a854 VSCATTERQPD %ZMM5,(%RCX,%ZMM7,8){%K1} [8] |
0x42a85b KXNORW %K0,%K0,%K1 |
0x42a85f VXORPD %XMM1,%XMM1,%XMM1 |
0x42a863 KXNORW %K0,%K0,%K2 |
0x42a867 VGATHERQPD (%RDI,%ZMM11,8),%ZMM1{%K1} [15] |
0x42a86e VXORPD %XMM5,%XMM5,%XMM5 |
0x42a872 VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} [10] |
0x42a879 KXNORW %K0,%K0,%K1 |
0x42a87d VXORPD %XMM2,%XMM2,%XMM2 |
0x42a881 VGATHERQPD (%RAX,%ZMM8,8),%ZMM2{%K1} [21] |
0x42a888 KXNORW %K0,%K0,%K1 |
0x42a88c VXORPD %XMM6,%XMM6,%XMM6 |
0x42a890 KXNORW %K0,%K0,%K2 |
0x42a894 VGATHERQPD (%RDI,%ZMM3,8),%ZMM6{%K1} [9] |
0x42a89b VXORPD %XMM3,%XMM3,%XMM3 |
0x42a89f VGATHERQPD (%RDI,%ZMM0,8),%ZMM3{%K2} [11] |
0x42a8a6 KXNORW %K0,%K0,%K1 |
0x42a8aa VXORPD %XMM0,%XMM0,%XMM0 |
0x42a8ae VGATHERQPD (%RAX,%ZMM9,8),%ZMM0{%K1} [3] |
0x42a8b5 KXNORW %K0,%K0,%K1 |
0x42a8b9 VSUBPD %ZMM1,%ZMM5,%ZMM1 |
0x42a8bf VXORPD %XMM5,%XMM5,%XMM5 |
0x42a8c3 VGATHERQPD (%RDX,%ZMM10,8),%ZMM5{%K1} [1] |
0x42a8ca VMULPD %ZMM2,%ZMM1,%ZMM1 |
0x42a8d0 VSUBPD %ZMM6,%ZMM3,%ZMM2 |
0x42a8d6 VFMADD213PD %ZMM1,%ZMM0,%ZMM2 |
0x42a8dc VFMADD213PD %ZMM5,%ZMM4,%ZMM2 |
0x42a8e2 KXNORW %K0,%K0,%K1 |
0x42a8e6 VSCATTERQPD %ZMM2,(%RDX,%ZMM10,8){%K1} [1] |
0x42a8ed VPADDQ 0x4de81(%RIP){1to8},%ZMM17,%ZMM17 [16] |
0x42a8f7 ADD $0x8,%R13 |
0x42a8fb CMP %R9,%R13 |
0x42a8fe JB 42a420 |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/accelerate.cpp: 41 - 54 |
-------------------------------------------------------------------------------- |
41: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
42: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
43: double stepbymass_s = halfdt / ((density0(i - 1, j - 1) * volume(i - 1, j - 1) + density0(i - 1, j + 0) * volume(i - 1, j + 0) + |
44: density0(i, j) * volume(i, j) + density0(i + 0, j - 1) * volume(i + 0, j - 1)) * |
45: 0.25); |
46: xvel1(i, j) = xvel0(i, j) - stepbymass_s * (xarea(i, j) * (pressure(i, j) - pressure(i - 1, j + 0)) + |
47: xarea(i + 0, j - 1) * (pressure(i + 0, j - 1) - pressure(i - 1, j - 1))); |
48: yvel1(i, j) = yvel0(i, j) - stepbymass_s * (yarea(i, j) * (pressure(i, j) - pressure(i + 0, j - 1)) + |
49: yarea(i - 1, j + 0) * (pressure(i - 1, j + 0) - pressure(i - 1, j - 1))); |
50: xvel1(i, j) = xvel1(i, j) - stepbymass_s * (xarea(i, j) * (viscosity(i, j) - viscosity(i - 1, j + 0)) + |
51: xarea(i + 0, j - 1) * (viscosity(i + 0, j - 1) - viscosity(i - 1, j - 1))); |
52: yvel1(i, j) = yvel1(i, j) - stepbymass_s * (yarea(i, j) * (viscosity(i, j) - viscosity(i + 0, j - 1)) + |
53: yarea(i - 1, j + 0) * (viscosity(i - 1, j + 0) - viscosity(i - 1, j - 1))); |
54: } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | P0, P5, |
Function | accelerate_kernel(int, int, int, int, double, clover::Buffer2D |
Source | accelerate.cpp:41-54,context.h:69-69 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 130.00 |
CQA cycles if no scalar integer | 110.00 |
CQA cycles if FP arith vectorized | 130.00 |
CQA cycles if fully vectorized | 130.00 |
Front-end cycles | 84.67 |
DIV/SQRT cycles | 130.00 |
P0 cycles | 12.50 |
P1 cycles | 100.00 |
P2 cycles | 100.00 |
P3 cycles | 17.00 |
P4 cycles | 130.00 |
P5 cycles | 3.40 |
P6 cycles | 17.00 |
P7 cycles | 17.00 |
P8 cycles | 17.00 |
P9 cycles | 3.60 |
P10 cycles | 100.00 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | 137.62 - 320.14 |
Stall cycles (UFS) | 74.89 - 257.13 |
Nb insns | 225.00 |
Nb uops | 508.00 |
Nb loads | 48.00 |
Nb stores | 4.00 |
Nb stack references | 10.00 |
FLOP/cycle | 2.28 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 48.00 |
Nb FLOP fma | 88.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.09 |
Bytes prefetched | 0.00 |
Bytes loaded | 2616.00 |
Bytes stored | 256.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 23.00 |
Vectorization ratio all | 98.82 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 97.85 |
Vector-efficiency ratio all | 76.91 |
Vector-efficiency ratio load | 97.67 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 94.59 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 59.95 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | P0, P5, |
Function | accelerate_kernel(int, int, int, int, double, clover::Buffer2D |
Source | accelerate.cpp:41-54,context.h:69-69 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 130.00 |
CQA cycles if no scalar integer | 110.00 |
CQA cycles if FP arith vectorized | 130.00 |
CQA cycles if fully vectorized | 130.00 |
Front-end cycles | 84.67 |
DIV/SQRT cycles | 130.00 |
P0 cycles | 12.50 |
P1 cycles | 100.00 |
P2 cycles | 100.00 |
P3 cycles | 17.00 |
P4 cycles | 130.00 |
P5 cycles | 3.40 |
P6 cycles | 17.00 |
P7 cycles | 17.00 |
P8 cycles | 17.00 |
P9 cycles | 3.60 |
P10 cycles | 100.00 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | 137.62 - 320.14 |
Stall cycles (UFS) | 74.89 - 257.13 |
Nb insns | 225.00 |
Nb uops | 508.00 |
Nb loads | 48.00 |
Nb stores | 4.00 |
Nb stack references | 10.00 |
FLOP/cycle | 2.28 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 48.00 |
Nb FLOP fma | 88.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.09 |
Bytes prefetched | 0.00 |
Bytes loaded | 2616.00 |
Bytes stored | 256.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 23.00 |
Vectorization ratio all | 98.82 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 97.85 |
Vector-efficiency ratio all | 76.91 |
Vector-efficiency ratio load | 97.67 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 94.59 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 59.95 |
Path / |
nb instructions | 225 |
nb uops | 508 |
loop length | 1252 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 4 |
used zmm registers | 32 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 84.67 cycles |
front end | 84.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 130.00 | 6.00 | 100.00 | 100.00 | 17.00 | 130.00 | 3.40 | 17.00 | 17.00 | 17.00 | 3.60 | 100.00 |
cycles | 130.00 | 12.50 | 100.00 | 100.00 | 17.00 | 130.00 | 3.40 | 17.00 | 17.00 | 17.00 | 3.60 | 100.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
FE+BE cycles | 137.62-320.14 |
Stall cycles | 74.89-257.13 |
RS full (events) | 130.75-32.58 |
Front-end | 84.67 |
Dispatch | 130.00 |
DIV/SQRT | 16.00 |
Data deps. | 2.00 |
Overall L1 | 130.00 |
all | 97% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 91% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 98% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 97% |
all | 79% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 47% |
all | 75% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 64% |
all | 76% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 94% |
fma | 100% |
div/sqrt | 100% |
other | 59% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R12,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x466620,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x140(%RSP),%YMM0,%YMM31 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL 4664a0 <__svml_i64rem8_z0> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x120(%RSP),%YMM0,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %YMM1,%YMM2,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDD %YMM1,%YMM31,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPMOVSXDQ %YMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM20,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM21,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPMOVSXDQ %YMM31,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM3,%ZMM20,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM10,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%RBX,%ZMM8,8),%ZMM6{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM21,%ZMM12 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM12,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM13,8),%ZMM8{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMOVSXDQ %YMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM12,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM10,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM11,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM9,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM24,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM31,%XMM31,%XMM31 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM25,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM0,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM31{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM9,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM25,%ZMM12 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM12,%ZMM22 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM22,8),%ZMM23{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM4,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM12,8),%ZMM26{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM4,%ZMM7,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU64 0x1c0(%RSP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM4,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD213PD %ZMM8,%ZMM13,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM24,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM2,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM8{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM7{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM10,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM19,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD 0x4e105(%RIP){1to8},%ZMM15,%ZMM4 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0x200(%RSP),%ZMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VDIVPD %ZMM4,%ZMM13,%ZMM4 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
VMULPD %ZMM31,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM23,%ZMM26,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVDQU64 0x180(%RSP),%ZMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM14,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM10,%ZMM7,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM14,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM27,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM8,%ZMM4,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM28,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xb0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM13,(%RCX,%ZMM7,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM22,8),%ZMM11{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM14,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM8,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM9,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM12,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM14,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xa8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RDX,%ZMM10,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM9,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VSUBPD %ZMM15,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %ZMM13,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM19,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM29,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM11,%ZMM10,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM14,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM30,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM12,%ZMM4,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM13,(%RDX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM11,8),%ZMM12{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM3,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM30,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM0,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM12,%ZMM13,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RCX,%ZMM7,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM14,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM12,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM13,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM7,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM11,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM8,8),%ZMM2{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM3,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM0,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM9,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM1,%ZMM5,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDX,%ZMM10,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM6,%ZMM3,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM1,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM5,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM2,(%RDX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPADDQ 0x4de81(%RIP){1to8},%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R9,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a420 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x320> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
nb instructions | 225 |
nb uops | 508 |
loop length | 1252 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 4 |
used zmm registers | 32 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 84.67 cycles |
front end | 84.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 130.00 | 6.00 | 100.00 | 100.00 | 17.00 | 130.00 | 3.40 | 17.00 | 17.00 | 17.00 | 3.60 | 100.00 |
cycles | 130.00 | 12.50 | 100.00 | 100.00 | 17.00 | 130.00 | 3.40 | 17.00 | 17.00 | 17.00 | 3.60 | 100.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 2.00 |
FE+BE cycles | 137.62-320.14 |
Stall cycles | 74.89-257.13 |
RS full (events) | 130.75-32.58 |
Front-end | 84.67 |
Dispatch | 130.00 |
DIV/SQRT | 16.00 |
Data deps. | 2.00 |
Overall L1 | 130.00 |
all | 97% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 91% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 98% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 97% |
all | 79% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 47% |
all | 75% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 64% |
all | 76% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 94% |
fma | 100% |
div/sqrt | 100% |
other | 59% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R12,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x466620,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x140(%RSP),%YMM0,%YMM31 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALL 4664a0 <__svml_i64rem8_z0> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x120(%RSP),%YMM0,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %YMM1,%YMM2,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDD %YMM1,%YMM31,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPMOVSXDQ %YMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM20,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM21,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPMOVSXDQ %YMM31,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM3,%ZMM20,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM10,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%RBX,%ZMM8,8),%ZMM6{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM21,%ZMM12 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM12,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM13,8),%ZMM8{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMOVSXDQ %YMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM12,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM10,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM11,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM9,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM24,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXORD %XMM31,%XMM31,%XMM31 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM25,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM0,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM31{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM9,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM25,%ZMM12 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM12,%ZMM22 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM22,8),%ZMM23{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM4,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM12,8),%ZMM26{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM4,%ZMM7,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU64 0x1c0(%RSP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM4,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD213PD %ZMM8,%ZMM13,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM24,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM2,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM8{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM7{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM10,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM19,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD 0x4e105(%RIP){1to8},%ZMM15,%ZMM4 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0x200(%RSP),%ZMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VDIVPD %ZMM4,%ZMM13,%ZMM4 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
VMULPD %ZMM31,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM23,%ZMM26,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVDQU64 0x180(%RSP),%ZMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM14,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM10,%ZMM7,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM14,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM27,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM8,%ZMM4,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM28,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xb0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM13,(%RCX,%ZMM7,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM22,8),%ZMM11{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM14,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM8,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM9,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM12,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM14,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xa8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RDX,%ZMM10,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM9,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VSUBPD %ZMM15,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %ZMM13,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM19,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM29,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM11,%ZMM10,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM14,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM30,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM12,%ZMM4,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM13,(%RDX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM11,8),%ZMM12{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM3,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM30,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM0,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM12,%ZMM13,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RCX,%ZMM7,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM14,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM12,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM13,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM7,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM11,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM8,8),%ZMM2{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RDI,%ZMM3,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM0,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM9,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM1,%ZMM5,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDX,%ZMM10,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM6,%ZMM3,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM1,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM5,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM2,(%RDX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPADDQ 0x4de81(%RIP){1to8},%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R9,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a420 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x320> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |