Loop Id: 127 | Module: exec | Source: advec_cell_kernel.f90:83-157 [...] | Coverage: 3.41% |
---|
Loop Id: 127 | Module: exec | Source: advec_cell_kernel.f90:83-157 [...] | Coverage: 3.41% |
---|
0x428d10 MOV 0x58(%RSP),%R13 [6] |
0x428d15 VMOVUPD (%R13,%RAX,8),%YMM2 [12] |
0x428d1c VCMPPD $0x1,%YMM2,%YMM12,%K1 |
0x428d23 LEA (%RDX,%RAX,1),%R13D |
0x428d27 VPBROADCASTD %R13D,%XMM5 |
0x428d2d VPBROADCASTQ %RAX,%YMM3 |
0x428d33 VPADDQ %YMM3,%YMM4,%YMM3 |
0x428d37 VPMOVQD %YMM3,%XMM3 |
0x428d3d VPADDD %XMM20,%XMM5,%XMM7 |
0x428d43 VPBLENDMD %XMM3,%XMM7,%XMM8{%K1} |
0x428d49 VMOVDQA32 %XMM7,%XMM3{%K1} |
0x428d4f VPMOVSXDQ %XMM3,%YMM3 |
0x428d54 VPSUBQ %YMM6,%YMM3,%YMM9 |
0x428d58 LEA (%R12,%RBX,1),%R13 |
0x428d5c VPXOR %XMM3,%XMM3,%XMM3 |
0x428d60 KXNORW %K0,%K0,%K2 |
0x428d64 VGATHERQPD (%R13,%YMM9,8),%YMM3{%K2} [5] |
0x428d6c VPADDD %XMM19,%XMM5,%XMM10 |
0x428d72 VPMINSD %XMM0,%XMM10,%XMM10 |
0x428d77 VMOVDQA64 %XMM10,%XMM22 |
0x428d7d VMOVDQA32 %XMM7,%XMM10{%K1} |
0x428d83 VPMOVSXDQ %XMM10,%YMM7 |
0x428d88 VPSUBQ %YMM6,%YMM7,%YMM7 |
0x428d8c VPXOR %XMM10,%XMM10,%XMM10 |
0x428d91 KXNORW %K0,%K0,%K2 |
0x428d95 VGATHERQPD (%RSI,%YMM7,8),%YMM10{%K2} [9] |
0x428d9c VPADDD %XMM21,%XMM5,%XMM22{%K1} |
0x428da2 LEA (%R8,%R15,1),%R13 |
0x428da6 VPXOR %XMM5,%XMM5,%XMM5 |
0x428daa KXNORW %K0,%K0,%K1 |
0x428dae VGATHERQPD (%R13,%YMM9,8),%YMM5{%K1} [2] |
0x428db6 VANDPD %YMM2,%YMM14,%YMM7 |
0x428dba VMOVUPD (%RDI,%RAX,8),%YMM23 [11] |
0x428dc1 VPMOVSXDQ %XMM22,%YMM22 |
0x428dc7 VPSUBQ %YMM6,%YMM22,%YMM22 |
0x428dcd VXORPD %XMM25,%XMM25,%XMM25 |
0x428dd3 KXNORW %K0,%K0,%K1 |
0x428dd7 VGATHERQPD (%R13,%YMM22,8),%YMM25{%K1} [7] |
0x428ddf VDIVPD %YMM3,%YMM7,%YMM7 |
0x428de3 VFMADD213PD %YMM23,%YMM7,%YMM23 |
0x428de9 VDIVPD %YMM10,%YMM23,%YMM10 |
0x428def VPMOVSXDQ %XMM8,%YMM8 |
0x428df4 VPSUBQ %YMM6,%YMM8,%YMM8 |
0x428df8 VXORPD %XMM23,%XMM23,%XMM23 |
0x428dfe KXNORW %K0,%K0,%K1 |
0x428e02 VGATHERQPD (%R13,%YMM8,8),%YMM23{%K1} [13] |
0x428e0a VSUBPD %YMM7,%YMM16,%YMM26 |
0x428e10 VSUBPD %YMM25,%YMM5,%YMM25 |
0x428e16 VSUBPD %YMM5,%YMM23,%YMM23 |
0x428e1c VMULPD %YMM25,%YMM23,%YMM28 |
0x428e22 VCMPPD $0x1,%YMM28,%YMM12,%K1 |
0x428e29 VSUBPD %YMM7,%YMM15,%YMM7 |
0x428e2d VCMPPD $0x2,%YMM12,%YMM23,%K2 |
0x428e34 VXORPD %YMM17,%YMM7,%YMM7{%K2} |
0x428e3a VANDPD %YMM14,%YMM25,%YMM25 |
0x428e40 VANDPD %YMM14,%YMM23,%YMM23 |
0x428e46 VMULPD %YMM10,%YMM25,%YMM28 |
0x428e4c VFMADD231PD %YMM26,%YMM23,%YMM28 |
0x428e52 VMULPD %YMM18,%YMM28,%YMM28 |
0x428e58 VCMPPD $0x2,%YMM28,%YMM23,%K2 |
0x428e5f VMOVAPD %YMM23,%YMM28{%K2} |
0x428e65 VCMPPD $0x2,%YMM28,%YMM25,%K2 |
0x428e6c VMOVAPD %YMM25,%YMM28{%K2} |
0x428e72 VMOVAPD %YMM5,%YMM23 |
0x428e78 VFMADD231PD %YMM7,%YMM28,%YMM23{%K1} |
0x428e7e VMULPD %YMM2,%YMM23,%YMM2 |
0x428e84 VMOVUPD %YMM2,(%R11,%RAX,8) [4] |
0x428e8a LEA (%R10,%R14,1),%R13 |
0x428e8e VXORPD %XMM7,%XMM7,%XMM7 |
0x428e92 KXNORW %K0,%K0,%K1 |
0x428e96 VGATHERQPD (%R13,%YMM9,8),%YMM7{%K1} [8] |
0x428e9e VXORPD %XMM9,%XMM9,%XMM9 |
0x428ea3 KXNORW %K0,%K0,%K1 |
0x428ea7 VGATHERQPD (%R13,%YMM22,8),%YMM9{%K1} [3] |
0x428eaf VXORPD %XMM22,%XMM22,%XMM22 |
0x428eb5 KXNORW %K0,%K0,%K1 |
0x428eb9 VGATHERQPD (%R13,%YMM8,8),%YMM22{%K1} [1] |
0x428ec1 VMULPD %YMM3,%YMM5,%YMM3 |
0x428ec5 VSUBPD %YMM9,%YMM7,%YMM5 |
0x428eca VSUBPD %YMM7,%YMM22,%YMM8 |
0x428ed0 VMULPD %YMM5,%YMM8,%YMM9 |
0x428ed4 VCMPPD $0x1,%YMM9,%YMM12,%K1 |
0x428edb VANDPD %YMM2,%YMM14,%YMM9 |
0x428edf VDIVPD %YMM3,%YMM9,%YMM3 |
0x428ee3 VSUBPD %YMM3,%YMM15,%YMM3 |
0x428ee7 VCMPPD $0x2,%YMM12,%YMM8,%K2 |
0x428eee VXORPD %YMM17,%YMM3,%YMM3{%K2} |
0x428ef4 VANDPD %YMM5,%YMM14,%YMM5 |
0x428ef8 VANDPD %YMM14,%YMM8,%YMM8 |
0x428efd VMULPD %YMM5,%YMM10,%YMM9 |
0x428f01 VFMADD231PD %YMM26,%YMM8,%YMM9 |
0x428f07 VMULPD %YMM18,%YMM9,%YMM9 |
0x428f0d VCMPPD $0x2,%YMM9,%YMM8,%K2 |
0x428f14 VMOVAPD %YMM8,%YMM9{%K2} |
0x428f1a VCMPPD $0x2,%YMM9,%YMM5,%K2 |
0x428f21 VMOVAPD %YMM5,%YMM9{%K2} |
0x428f27 VFMADD231PD %YMM3,%YMM9,%YMM7{%K1} |
0x428f2d VMULPD %YMM2,%YMM7,%YMM2 |
0x428f31 VMOVUPD %YMM2,(%R9,%RAX,8) [10] |
0x428f37 ADD $0x4,%RAX |
0x428f3b CMP %RCX,%RAX |
0x428f3e JBE 428d10 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-861-0321/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/advec_cell_kernel.f90: 83 - 157 |
-------------------------------------------------------------------------------- |
83: IF(dir.EQ.g_xdir) THEN |
[...] |
112: IF(vol_flux_x(j,k).GT.0.0)THEN |
[...] |
118: upwind =MIN(j+1,x_max+2) |
[...] |
124: sigmat=ABS(vol_flux_x(j,k))/pre_vol(donor,k) |
125: sigma3=(1.0_8+sigmat)*(vertexdx(j)/vertexdx(dif)) |
126: sigma4=2.0_8-sigmat |
127: |
128: sigma=sigmat |
129: sigmav=sigmat |
130: |
131: diffuw=density1(donor,k)-density1(upwind,k) |
132: diffdw=density1(downwind,k)-density1(donor,k) |
133: wind=1.0_8 |
134: IF(diffdw.LE.0.0) wind=-1.0_8 |
135: IF(diffuw*diffdw.GT.0.0)THEN |
136: limiter=(1.0_8-sigmav)*wind*MIN(ABS(diffuw),ABS(diffdw)& |
137: ,one_by_six*(sigma3*ABS(diffuw)+sigma4*ABS(diffdw))) |
138: ELSE |
139: limiter=0.0 |
140: ENDIF |
141: mass_flux_x(j,k)=vol_flux_x(j,k)*(density1(donor,k)+limiter) |
142: |
143: sigmam=ABS(mass_flux_x(j,k))/(density1(donor,k)*pre_vol(donor,k)) |
144: diffuw=energy1(donor,k)-energy1(upwind,k) |
145: diffdw=energy1(downwind,k)-energy1(donor,k) |
146: wind=1.0_8 |
147: IF(diffdw.LE.0.0) wind=-1.0_8 |
148: IF(diffuw*diffdw.GT.0.0)THEN |
149: limiter=(1.0_8-sigmam)*wind*MIN(ABS(diffuw),ABS(diffdw)& |
150: ,one_by_six*(sigma3*ABS(diffuw)+sigma4*ABS(diffdw))) |
151: ELSE |
152: limiter=0.0 |
153: ENDIF |
154: |
155: ener_flux(j,k)=mass_flux_x(j,k)*(energy1(donor,k)+limiter) |
156: |
157: ENDDO |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.51 |
CQA speedup if fully vectorized | 3.19 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.80 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-157 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 47.83 |
CQA cycles if no scalar integer | 45.67 |
CQA cycles if FP arith vectorized | 31.65 |
CQA cycles if fully vectorized | 15.00 |
Front-end cycles | 47.83 |
DIV/SQRT cycles | 1.50 |
P0 cycles | 1.50 |
P1 cycles | 1.25 |
P2 cycles | 1.25 |
P3 cycles | 0.50 |
P4 cycles | 1.67 |
P5 cycles | 1.67 |
P6 cycles | 1.67 |
P7 cycles | 26.50 |
P8 cycles | 26.58 |
P9 cycles | 26.58 |
P10 cycles | 26.33 |
P11 cycles | 25.00 |
P12 cycles | 25.00 |
P13 cycles | 15.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 102.00 |
Nb uops | 287.00 |
Nb loads | 11.00 |
Nb stores | 2.00 |
Nb stack references | 1.00 |
FLOP/cycle | 2.43 |
Nb FLOP add-sub | 28.00 |
Nb FLOP mul | 36.00 |
Nb FLOP fma | 20.00 |
Nb FLOP div | 12.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 8.20 |
Bytes prefetched | 0.00 |
Bytes loaded | 328.00 |
Bytes stored | 64.00 |
Stride 0 | 1.00 |
Stride 1 | 4.00 |
Stride n | 0.00 |
Stride unknown | 3.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 97.67 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.00 |
Vector-efficiency ratio all | 43.24 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 45.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 39.88 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.51 |
CQA speedup if fully vectorized | 3.19 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.80 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-157 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 47.83 |
CQA cycles if no scalar integer | 45.67 |
CQA cycles if FP arith vectorized | 31.65 |
CQA cycles if fully vectorized | 15.00 |
Front-end cycles | 47.83 |
DIV/SQRT cycles | 1.50 |
P0 cycles | 1.50 |
P1 cycles | 1.25 |
P2 cycles | 1.25 |
P3 cycles | 0.50 |
P4 cycles | 1.67 |
P5 cycles | 1.67 |
P6 cycles | 1.67 |
P7 cycles | 26.50 |
P8 cycles | 26.58 |
P9 cycles | 26.58 |
P10 cycles | 26.33 |
P11 cycles | 25.00 |
P12 cycles | 25.00 |
P13 cycles | 15.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 102.00 |
Nb uops | 287.00 |
Nb loads | 11.00 |
Nb stores | 2.00 |
Nb stack references | 1.00 |
FLOP/cycle | 2.43 |
Nb FLOP add-sub | 28.00 |
Nb FLOP mul | 36.00 |
Nb FLOP fma | 20.00 |
Nb FLOP div | 12.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 8.20 |
Bytes prefetched | 0.00 |
Bytes loaded | 328.00 |
Bytes stored | 64.00 |
Stride 0 | 1.00 |
Stride 1 | 4.00 |
Stride n | 0.00 |
Stride unknown | 3.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 97.67 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.00 |
Vector-efficiency ratio all | 43.24 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 45.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 39.88 |
Path / |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 102 |
nb uops | 287 |
loop length | 564 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 1 |
ADD-SUB / MUL ratio | 0.78 |
micro-operation queue | 47.83 cycles |
front end | 47.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 1.25 | 1.25 | 0.50 | 1.67 | 1.67 | 1.67 | 26.50 | 26.58 | 26.58 | 26.33 | 25.00 | 25.00 |
cycles | 1.50 | 1.50 | 1.25 | 1.25 | 0.50 | 1.67 | 1.67 | 1.67 | 26.50 | 26.58 | 26.58 | 26.33 | 25.00 | 25.00 |
Cycles executing div or sqrt instructions | 15.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 47.83 |
Dispatch | 26.58 |
DIV/SQRT | 15.00 |
Data deps. | 1.00 |
Overall L1 | 47.83 |
all | 91% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 86% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 97% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 30% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 24% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 46% |
all | 43% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 45% |
fma | 50% |
div/sqrt | 50% |
other | 39% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x58(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVUPD (%R13,%RAX,8),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM2,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RDX,%RAX,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTD %R13D,%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %YMM3,%YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %XMM20,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPBLENDMD %XMM3,%XMM7,%XMM8{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA32 %XMM7,%XMM3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM3,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
LEA (%R12,%RBX,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM3{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDD %XMM19,%XMM5,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMINSD %XMM0,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA64 %XMM10,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA32 %XMM7,%XMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM10,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%RSI,%YMM7,8),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDD %XMM21,%XMM5,%XMM22{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%R15,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM5{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VANDPD %YMM2,%YMM14,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVUPD (%RDI,%RAX,8),%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM25,%XMM25,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM22,8),%YMM25{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VDIVPD %YMM3,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VFMADD213PD %YMM23,%YMM7,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM23,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VPMOVSXDQ %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM8,8),%YMM23{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VSUBPD %YMM7,%YMM16,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM25,%YMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM5,%YMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM25,%YMM23,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM28,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM7,%YMM15,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM12,%YMM23,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %YMM17,%YMM7,%YMM7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM10,%YMM25,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM26,%YMM23,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM28,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM28,%YMM23,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM23,%YMM28{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM28,%YMM25,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM25,%YMM28{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM5,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM7,%YMM28,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM23,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %YMM2,(%R11,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
LEA (%R10,%R14,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM7{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM22,8),%YMM9{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM8,8),%YMM22{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMULPD %YMM3,%YMM5,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM9,%YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM7,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM9,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM2,%YMM14,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VDIVPD %YMM3,%YMM9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VSUBPD %YMM3,%YMM15,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM12,%YMM8,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %YMM17,%YMM3,%YMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM5,%YMM14,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM5,%YMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM26,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM9,%YMM8,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM9,%YMM5,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM3,%YMM9,%YMM7{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %YMM2,(%R9,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
ADD $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JBE 428d10 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x3040> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 102 |
nb uops | 287 |
loop length | 564 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 1 |
ADD-SUB / MUL ratio | 0.78 |
micro-operation queue | 47.83 cycles |
front end | 47.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 1.25 | 1.25 | 0.50 | 1.67 | 1.67 | 1.67 | 26.50 | 26.58 | 26.58 | 26.33 | 25.00 | 25.00 |
cycles | 1.50 | 1.50 | 1.25 | 1.25 | 0.50 | 1.67 | 1.67 | 1.67 | 26.50 | 26.58 | 26.58 | 26.33 | 25.00 | 25.00 |
Cycles executing div or sqrt instructions | 15.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 47.83 |
Dispatch | 26.58 |
DIV/SQRT | 15.00 |
Data deps. | 1.00 |
Overall L1 | 47.83 |
all | 91% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 86% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 97% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 30% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 24% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 46% |
all | 43% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 45% |
fma | 50% |
div/sqrt | 50% |
other | 39% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x58(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVUPD (%R13,%RAX,8),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM2,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RDX,%RAX,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTD %R13D,%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %YMM3,%YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %XMM20,%XMM5,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPBLENDMD %XMM3,%XMM7,%XMM8{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA32 %XMM7,%XMM3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM3,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
LEA (%R12,%RBX,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM3{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDD %XMM19,%XMM5,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMINSD %XMM0,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA64 %XMM10,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA32 %XMM7,%XMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM10,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%RSI,%YMM7,8),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDD %XMM21,%XMM5,%XMM22{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%R15,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM5{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VANDPD %YMM2,%YMM14,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVUPD (%RDI,%RAX,8),%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM25,%XMM25,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM22,8),%YMM25{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VDIVPD %YMM3,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VFMADD213PD %YMM23,%YMM7,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM23,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VPMOVSXDQ %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM8,8),%YMM23{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VSUBPD %YMM7,%YMM16,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM25,%YMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM5,%YMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM25,%YMM23,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM28,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM7,%YMM15,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM12,%YMM23,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %YMM17,%YMM7,%YMM7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM25,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM23,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM10,%YMM25,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM26,%YMM23,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM28,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM28,%YMM23,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM23,%YMM28{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM28,%YMM25,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM25,%YMM28{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM5,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM7,%YMM28,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM23,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %YMM2,(%R11,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
LEA (%R10,%R14,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM9,8),%YMM7{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM22,8),%YMM9{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R13,%YMM8,8),%YMM22{%K1} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMULPD %YMM3,%YMM5,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM9,%YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM7,%YMM22,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM9,%YMM12,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM2,%YMM14,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VDIVPD %YMM3,%YMM9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VSUBPD %YMM3,%YMM15,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM12,%YMM8,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %YMM17,%YMM3,%YMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM5,%YMM14,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM14,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM5,%YMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM26,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM9,%YMM8,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM9,%YMM5,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM5,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM3,%YMM9,%YMM7{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM2,%YMM7,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %YMM2,(%R9,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
ADD $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JBE 428d10 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x3040> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |