Loop Id: 340 | Module: exec | Source: advec_mom.cpp:219-221 [...] | Coverage: 4.24% |
---|
Loop Id: 340 | Module: exec | Source: advec_mom.cpp:219-221 [...] | Coverage: 4.24% |
---|
0x23c500 VEXTRACTI128 $0x1,%YMM0,%XMM11 |
0x23c506 VPEXTRQ $0x1,%XMM9,%R14 |
0x23c50c VPEXTRQ $0x1,%XMM11,%RAX |
0x23c512 CQTO |
0x23c514 IDIV %R14 |
0x23c517 VMOVQ %XMM9,%R14 |
0x23c51c VMOVQ %RAX,%XMM12 |
0x23c521 VMOVQ %XMM11,%RAX |
0x23c526 CQTO |
0x23c528 IDIV %R14 |
0x23c52b VPEXTRQ $0x1,%XMM1,%R14 |
0x23c531 VMOVQ %RAX,%XMM11 |
0x23c536 VPEXTRQ $0x1,%XMM0,%RAX |
0x23c53c CQTO |
0x23c53e VPUNPCKLQDQ %XMM12,%XMM11,%XMM11 |
0x23c543 IDIV %R14 |
0x23c546 VMOVQ %XMM1,%R14 |
0x23c54b VMOVQ %RAX,%XMM12 |
0x23c550 VMOVQ %XMM0,%RAX |
0x23c555 CQTO |
0x23c557 IDIV %R14 |
0x23c55a ADD $-0x4,%RSI |
0x23c55e VMOVQ %RAX,%XMM13 |
0x23c563 VPUNPCKLQDQ %XMM12,%XMM13,%XMM12 |
0x23c568 VINSERTI128 $0x1,%XMM11,%YMM12,%YMM11 |
0x23c56e VPMOVQD %YMM11,%XMM12 |
0x23c574 VPMULLQ %YMM1,%YMM11,%YMM11 |
0x23c57a VPSUBQ %YMM11,%YMM0,%YMM11 |
0x23c57f VPADDQ %YMM0,%YMM10,%YMM0 |
0x23c583 VPMOVQD %YMM11,%XMM11 |
0x23c589 VPADDD %XMM2,%XMM12,%XMM13 |
0x23c58d VPADDD %XMM6,%XMM12,%XMM12 |
0x23c591 VPMOVSXDQ %XMM13,%YMM13 |
0x23c596 VPMOVSXDQ %XMM12,%YMM12 |
0x23c59b VPMULLQ %YMM13,%YMM4,%YMM14 |
0x23c5a1 VPMULLQ %YMM12,%YMM7,%YMM12 |
0x23c5a7 VPADDD %XMM3,%XMM11,%XMM11 |
0x23c5ab VPMOVSXDQ %XMM11,%YMM11 |
0x23c5b0 VPADDQ %YMM11,%YMM14,%YMM14 |
0x23c5b5 VPADDQ %YMM11,%YMM12,%YMM12 |
0x23c5ba VPEXTRQ $0x1,%XMM14,%RAX |
0x23c5c0 VMOVQ %XMM14,%R12 |
0x23c5c5 VEXTRACTI128 $0x1,%YMM14,%XMM14 |
0x23c5cb VMOVQ %XMM14,%RDX |
0x23c5d0 VMOVSD (%RDI,%R12,8),%XMM15 [5] |
0x23c5d6 VPEXTRQ $0x1,%XMM14,%R14 |
0x23c5dc MOV %RAX,-0xa0(%RBP) [22] |
0x23c5e3 VMOVSD (%RDI,%RDX,8),%XMM14 [4] |
0x23c5e8 VMOVHPD (%RDI,%RAX,8),%XMM15,%XMM15 [21] |
0x23c5ed VMOVHPD (%RDI,%R14,8),%XMM14,%XMM14 [16] |
0x23c5f3 VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 |
0x23c5f9 VPMULLQ %YMM13,%YMM5,%YMM15 |
0x23c5ff VPADDQ %YMM11,%YMM15,%YMM15 |
0x23c604 VMOVQ %XMM15,%R10 |
0x23c609 VPEXTRQ $0x1,%XMM15,%R13 |
0x23c60f VEXTRACTI128 $0x1,%YMM15,%XMM15 |
0x23c615 VMOVQ %XMM15,%RAX |
0x23c61a VMOVSD (%R9,%R10,8),%XMM16 [7] |
0x23c621 VPEXTRQ $0x1,%XMM15,%RBX |
0x23c627 VMOVQ %XMM12,%R10 |
0x23c62c VMOVSD (%R9,%RAX,8),%XMM15 [6] |
0x23c632 VMOVHPD (%R9,%R13,8),%XMM16,%XMM16 [2] |
0x23c639 VPEXTRQ $0x1,%XMM12,%RAX |
0x23c63f VEXTRACTI128 $0x1,%YMM12,%XMM12 |
0x23c645 VMOVHPD (%R9,%RBX,8),%XMM15,%XMM15 [13] |
0x23c64b VMOVQ %XMM12,%RBX |
0x23c650 VPEXTRQ $0x1,%XMM12,%R13 |
0x23c656 VMOVSD (%R11,%RBX,8),%XMM12 [8] |
0x23c65c VMOVHPD (%R11,%R13,8),%XMM12,%XMM12 [10] |
0x23c662 VINSERTF32X4 $0x1,%XMM15,%YMM16,%YMM15 |
0x23c669 VMOVSD (%R11,%R10,8),%XMM16 [3] |
0x23c670 VMOVHPD (%R11,%RAX,8),%XMM16,%XMM16 [18] |
0x23c677 VINSERTF32X4 $0x1,%XMM12,%YMM16,%YMM12 |
0x23c67e VFMADD231PD %YMM15,%YMM14,%YMM12 |
0x23c683 VPMULLQ %YMM13,%YMM7,%YMM14 |
0x23c689 VPMULLQ %YMM13,%YMM8,%YMM13 |
0x23c68f VPADDQ %YMM11,%YMM14,%YMM14 |
0x23c694 VPADDQ %YMM11,%YMM13,%YMM11 |
0x23c699 VMOVQ %XMM14,%RAX |
0x23c69e VPEXTRQ $0x1,%XMM14,%R10 |
0x23c6a4 VEXTRACTI128 $0x1,%YMM14,%XMM14 |
0x23c6aa VMOVQ %XMM14,%RBX |
0x23c6af VMOVSD (%R11,%RAX,8),%XMM15 [12] |
0x23c6b5 VPEXTRQ $0x1,%XMM14,%R13 |
0x23c6bb VPEXTRQ $0x1,%XMM11,%RAX |
0x23c6c1 VMOVSD (%R11,%RBX,8),%XMM14 [11] |
0x23c6c7 VMOVHPD (%R11,%R10,8),%XMM15,%XMM15 [17] |
0x23c6cd VMOVQ %XMM11,%R10 |
0x23c6d2 VEXTRACTI128 $0x1,%YMM11,%XMM11 |
0x23c6d8 VMOVHPD (%R11,%R13,8),%XMM14,%XMM14 [9] |
0x23c6de VMOVQ %XMM11,%RBX |
0x23c6e3 VMOVSD (%R8,%R10,8),%XMM13 [15] |
0x23c6e9 VPEXTRQ $0x1,%XMM11,%R13 |
0x23c6ef VMOVSD (%R8,%RBX,8),%XMM11 [14] |
0x23c6f5 VMOVHPD (%R8,%RAX,8),%XMM13,%XMM13 [19] |
0x23c6fb MOV -0xa0(%RBP),%RAX [22] |
0x23c702 VMOVHPD (%R8,%R13,8),%XMM11,%XMM11 [20] |
0x23c708 VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 |
0x23c70e VSUBPD %YMM14,%YMM12,%YMM12 |
0x23c713 VINSERTF128 $0x1,%XMM11,%YMM13,%YMM11 |
0x23c719 VDIVPD %YMM11,%YMM12,%YMM11 |
0x23c71e VMOVLPD %XMM11,(%RDI,%R12,8) [5] |
0x23c724 VMOVHPD %XMM11,(%RDI,%RAX,8) [1] |
0x23c729 VEXTRACTF128 $0x1,%YMM11,%XMM11 |
0x23c72f VMOVLPD %XMM11,(%RDI,%RDX,8) [4] |
0x23c734 VMOVHPD %XMM11,(%RDI,%R14,8) [16] |
0x23c73a JNE 23c500 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 219 - 221 |
-------------------------------------------------------------------------------- |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.75 - 2.81 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 2.75 - 3.12 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.61 - 2.59 |
Bottlenecks | P8, P9, |
Function | .omp_outlined..22 |
Source | advec_mom.cpp:219-221,context.h:69-69 |
Source loop unroll info | unrolled by 4 |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | main |
Unroll factor | 4 |
CQA cycles | 33.00 - 53.00 |
CQA cycles if no scalar integer | 18.83 |
CQA cycles if FP arith vectorized | 33.00 - 53.00 |
CQA cycles if fully vectorized | 12.00 - 17.00 |
Front-end cycles | 20.50 |
DIV/SQRT cycles | 8.00 |
P0 cycles | 0.50 |
P1 cycles | 4.00 |
P2 cycles | 0.50 |
P3 cycles | 1.00 |
P4 cycles | 8.67 |
P5 cycles | 8.67 |
P6 cycles | 8.67 |
P7 cycles | 16.25 |
P8 cycles | 16.33 |
P9 cycles | 16.17 |
P10 cycles | 16.25 |
P11 cycles | 16.00 |
P12 cycles | 16.00 |
P13 cycles | 33.00 - 53.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 107.00 |
Nb uops | 123.00 |
Nb loads | 25.00 |
Nb stores | 5.00 |
Nb stack references | 1.00 |
FLOP/cycle | 0.48 - 0.30 |
Nb FLOP add-sub | 4.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 4.00 |
Nb FLOP div | 4.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 3.92 - 6.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 168.00 |
Bytes stored | 40.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 16.00 |
Vectorization ratio all | 35.58 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 20.00 |
Vectorization ratio other | 32.14 |
Vector-efficiency ratio all | 21.27 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 43.18 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 20.00 |
Vector-efficiency ratio other | 17.41 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.75 - 2.81 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 2.75 - 3.12 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.61 - 2.59 |
Bottlenecks | P8, P9, |
Function | .omp_outlined..22 |
Source | advec_mom.cpp:219-221,context.h:69-69 |
Source loop unroll info | unrolled by 4 |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | main |
Unroll factor | 4 |
CQA cycles | 33.00 - 53.00 |
CQA cycles if no scalar integer | 18.83 |
CQA cycles if FP arith vectorized | 33.00 - 53.00 |
CQA cycles if fully vectorized | 12.00 - 17.00 |
Front-end cycles | 20.50 |
DIV/SQRT cycles | 8.00 |
P0 cycles | 0.50 |
P1 cycles | 4.00 |
P2 cycles | 0.50 |
P3 cycles | 1.00 |
P4 cycles | 8.67 |
P5 cycles | 8.67 |
P6 cycles | 8.67 |
P7 cycles | 16.25 |
P8 cycles | 16.33 |
P9 cycles | 16.17 |
P10 cycles | 16.25 |
P11 cycles | 16.00 |
P12 cycles | 16.00 |
P13 cycles | 33.00 - 53.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 107.00 |
Nb uops | 123.00 |
Nb loads | 25.00 |
Nb stores | 5.00 |
Nb stack references | 1.00 |
FLOP/cycle | 0.48 - 0.30 |
Nb FLOP add-sub | 4.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 4.00 |
Nb FLOP div | 4.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 3.92 - 6.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 168.00 |
Bytes stored | 40.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 16.00 |
Vectorization ratio all | 35.58 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 20.00 |
Vectorization ratio other | 32.14 |
Vector-efficiency ratio all | 21.27 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 43.18 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 20.00 |
Vector-efficiency ratio other | 17.41 |
Path / |
Function | .omp_outlined..22 |
Source file and lines | advec_mom.cpp:219-221 |
Module | exec |
nb instructions | 107 |
nb uops | 123 |
loop length | 576 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 12 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 20.50 cycles |
front end | 20.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 0.50 | 4.00 | 0.50 | 1.00 | 8.67 | 8.67 | 8.67 | 16.25 | 16.33 | 16.17 | 16.25 | 16.00 | 16.00 |
cycles | 8.00 | 0.50 | 4.00 | 0.50 | 1.00 | 8.67 | 8.67 | 8.67 | 16.25 | 16.33 | 16.17 | 16.25 | 16.00 | 16.00 |
Cycles executing div or sqrt instructions | 33.00-53.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 20.50 |
Dispatch | 16.33 |
DIV/SQRT | 33.00-53.00 |
Data deps. | 1.00 |
Overall L1 | 33.00-53.00 |
all | 39% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 27% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 35% |
load | 0% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 20% |
other | 32% |
all | 22% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | 50% |
add-sub | 42% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 18% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 25% |
all | 21% |
load | 12% |
store | 12% |
mul | 50% |
add-sub | 43% |
fma | 50% |
div/sqrt | 20% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VEXTRACTI128 $0x1,%YMM0,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPEXTRQ $0x1,%XMM9,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VPEXTRQ $0x1,%XMM11,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VMOVQ %XMM9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVQ %RAX,%XMM12 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VMOVQ %XMM11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VPEXTRQ $0x1,%XMM1,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %RAX,%XMM11 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VPEXTRQ $0x1,%XMM0,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
CQTO | |||||||||||||||||
VPUNPCKLQDQ %XMM12,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VMOVQ %XMM1,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVQ %RAX,%XMM12 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VMOVQ %XMM0,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
ADD $-0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVQ %RAX,%XMM13 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VPUNPCKLQDQ %XMM12,%XMM13,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VINSERTI128 $0x1,%XMM11,%YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %YMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPMULLQ %YMM1,%YMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBQ %YMM11,%YMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM0,%YMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %XMM2,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDD %XMM6,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPMOVSXDQ %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPMULLQ %YMM13,%YMM4,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMULLQ %YMM12,%YMM7,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %XMM3,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPADDQ %YMM11,%YMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM11,%YMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPEXTRQ $0x1,%XMM14,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %XMM14,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VEXTRACTI128 $0x1,%YMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%RDI,%R12,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM14,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD (%RDI,%RDX,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%RDI,%RAX,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVHPD (%RDI,%R14,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %YMM13,%YMM5,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM11,%YMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM15,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM15,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R9,%R10,8),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM15,%RBX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %XMM12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R9,%RAX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R9,%R13,8),%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VPEXTRQ $0x1,%XMM12,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVHPD (%R9,%RBX,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM12,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM12,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R11,%RBX,8),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%R13,8),%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF32X4 $0x1,%XMM15,%YMM16,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R11,%R10,8),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%RAX,8),%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF32X4 $0x1,%XMM12,%YMM16,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD231PD %YMM15,%YMM14,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMULLQ %YMM13,%YMM7,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMULLQ %YMM13,%YMM8,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM11,%YMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM11,%YMM13,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM14,%R10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R11,%RAX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM14,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VPEXTRQ $0x1,%XMM11,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R11,%RBX,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%R10,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VEXTRACTI128 $0x1,%YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVHPD (%R11,%R13,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R8,%R10,8),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM11,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R8,%RBX,8),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R8,%RAX,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVHPD (%R8,%R13,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VSUBPD %YMM14,%YMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VINSERTF128 $0x1,%XMM11,%YMM13,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VDIVPD %YMM11,%YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VMOVLPD %XMM11,(%RDI,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVHPD %XMM11,(%RDI,%RAX,8) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
VEXTRACTF128 $0x1,%YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVLPD %XMM11,(%RDI,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVHPD %XMM11,(%RDI,%R14,8) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
JNE 23c500 <.omp_outlined..22+0x1a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
Function | .omp_outlined..22 |
Source file and lines | advec_mom.cpp:219-221 |
Module | exec |
nb instructions | 107 |
nb uops | 123 |
loop length | 576 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 12 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 20.50 cycles |
front end | 20.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 0.50 | 4.00 | 0.50 | 1.00 | 8.67 | 8.67 | 8.67 | 16.25 | 16.33 | 16.17 | 16.25 | 16.00 | 16.00 |
cycles | 8.00 | 0.50 | 4.00 | 0.50 | 1.00 | 8.67 | 8.67 | 8.67 | 16.25 | 16.33 | 16.17 | 16.25 | 16.00 | 16.00 |
Cycles executing div or sqrt instructions | 33.00-53.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 20.50 |
Dispatch | 16.33 |
DIV/SQRT | 33.00-53.00 |
Data deps. | 1.00 |
Overall L1 | 33.00-53.00 |
all | 39% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 22% |
all | 27% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 35% |
load | 0% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 20% |
other | 32% |
all | 22% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | 50% |
add-sub | 42% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 18% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 25% |
all | 21% |
load | 12% |
store | 12% |
mul | 50% |
add-sub | 43% |
fma | 50% |
div/sqrt | 20% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VEXTRACTI128 $0x1,%YMM0,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPEXTRQ $0x1,%XMM9,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VPEXTRQ $0x1,%XMM11,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VMOVQ %XMM9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVQ %RAX,%XMM12 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VMOVQ %XMM11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VPEXTRQ $0x1,%XMM1,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %RAX,%XMM11 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VPEXTRQ $0x1,%XMM0,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
CQTO | |||||||||||||||||
VPUNPCKLQDQ %XMM12,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
VMOVQ %XMM1,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVQ %RAX,%XMM12 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VMOVQ %XMM0,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CQTO | |||||||||||||||||
IDIV %R14 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
ADD $-0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVQ %RAX,%XMM13 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
VPUNPCKLQDQ %XMM12,%XMM13,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VINSERTI128 $0x1,%XMM11,%YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %YMM11,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPMULLQ %YMM1,%YMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBQ %YMM11,%YMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM0,%YMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %XMM2,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDD %XMM6,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPMOVSXDQ %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPMULLQ %YMM13,%YMM4,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMULLQ %YMM12,%YMM7,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %XMM3,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPADDQ %YMM11,%YMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM11,%YMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPEXTRQ $0x1,%XMM14,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %XMM14,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VEXTRACTI128 $0x1,%YMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%RDI,%R12,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM14,%R14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD (%RDI,%RDX,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%RDI,%RAX,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVHPD (%RDI,%R14,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %YMM13,%YMM5,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM11,%YMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM15,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM15,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R9,%R10,8),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM15,%RBX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVQ %XMM12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R9,%RAX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R9,%R13,8),%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VPEXTRQ $0x1,%XMM12,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVHPD (%R9,%RBX,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM12,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM12,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R11,%RBX,8),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%R13,8),%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF32X4 $0x1,%XMM15,%YMM16,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R11,%R10,8),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%RAX,8),%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF32X4 $0x1,%XMM12,%YMM16,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VFMADD231PD %YMM15,%YMM14,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMULLQ %YMM13,%YMM7,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMULLQ %YMM13,%YMM8,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM11,%YMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM11,%YMM13,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VPEXTRQ $0x1,%XMM14,%R10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VEXTRACTI128 $0x1,%YMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVQ %XMM14,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R11,%RAX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM14,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VPEXTRQ $0x1,%XMM11,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R11,%RBX,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R11,%R10,8),%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VEXTRACTI128 $0x1,%YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVHPD (%R11,%R13,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VMOVQ %XMM11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVSD (%R8,%R10,8),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPEXTRQ $0x1,%XMM11,%R13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
VMOVSD (%R8,%RBX,8),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVHPD (%R8,%RAX,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVHPD (%R8,%R13,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 9 | 0.50 |
VINSERTF128 $0x1,%XMM14,%YMM15,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VSUBPD %YMM14,%YMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VINSERTF128 $0x1,%XMM11,%YMM13,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VDIVPD %YMM11,%YMM12,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VMOVLPD %XMM11,(%RDI,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVHPD %XMM11,(%RDI,%RAX,8) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
VEXTRACTF128 $0x1,%YMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVLPD %XMM11,(%RDI,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVHPD %XMM11,(%RDI,%R14,8) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 9-11 | 1 |
JNE 23c500 <.omp_outlined..22+0x1a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |