Loop Id: 506 | Module: exec | Source: viscosity.cpp:39-64 [...] | Coverage: 7.03% |
---|
Loop Id: 506 | Module: exec | Source: viscosity.cpp:39-64 [...] | Coverage: 7.03% |
---|
0x45b660 VCOMISD %XMM24,%XMM5 |
0x45b666 VANDPD %XMM8,%XMM24,%XMM15 |
0x45b66c VMOVSD %XMM10,%XMM10,%XMM2 |
0x45b670 VMAXSD %XMM7,%XMM15,%XMM14 |
0x45b674 JA 45b888 |
0x45b67a VANDPD %XMM8,%XMM0,%XMM0 |
0x45b67f MOV -0x50(%RBP),%R13 [14] |
0x45b683 MOV 0x10(%R14),%RCX [12] |
0x45b687 VMAXSD %XMM7,%XMM0,%XMM15 |
0x45b68b IMUL (%R14),%R13 [12] |
0x45b68f VMULSD %XMM2,%XMM15,%XMM4 |
0x45b693 ADD %RAX,%R13 |
0x45b696 VMULSD %XMM4,%XMM4,%XMM0 |
0x45b69a VFMADD231SD %XMM14,%XMM14,%XMM0 |
0x45b69f VSQRTSD %XMM0,%XMM0,%XMM0 |
0x45b6a3 VMULSD %XMM3,%XMM0,%XMM3 |
0x45b6a7 VMULSD %XMM13,%XMM0,%XMM13 |
0x45b6ac VDIVSD %XMM4,%XMM3,%XMM2 |
0x45b6b0 VMOVSD (%RCX,%R13,8),%XMM3 [11] |
0x45b6b6 VDIVSD %XMM14,%XMM13,%XMM14 |
0x45b6bb VANDPD %XMM8,%XMM2,%XMM15 |
0x45b6c0 VANDPD %XMM8,%XMM14,%XMM4 |
0x45b6c5 VMINSD %XMM4,%XMM15,%XMM0 |
0x45b6c9 VADDSD %XMM3,%XMM3,%XMM15 |
0x45b6cd VMULSD %XMM1,%XMM0,%XMM1 |
0x45b6d1 VMULSD %XMM1,%XMM1,%XMM2 |
0x45b6d5 VMULSD %XMM2,%XMM15,%XMM13 |
0x45b6d9 VMOVSD %XMM13,(%R15,%RAX,8) [5] |
0x45b6df MOV %RDX,%RAX |
0x45b6e2 CMP %RDX,-0x38(%RBP) [14] |
0x45b6e6 JE 45b83c |
0x45b6ec INC %RDX |
0x45b6ef VMOVSD 0x8(%R9,%RAX,8),%XMM2 [7] |
0x45b6f6 VMOVSD (%R9,%RAX,8),%XMM1 [7] |
0x45b6fc VMOVSD 0x8(%R10,%RAX,8),%XMM18 [10] |
0x45b704 VMOVSD (%R10,%RAX,8),%XMM14 [10] |
0x45b70a VADDSD %XMM2,%XMM1,%XMM0 |
0x45b70e VMOVSD 0x8(%R8,%RAX,8),%XMM13 [3] |
0x45b715 VMOVSD 0x8(%RDI,%RAX,8),%XMM15 [2] |
0x45b71b VADDSD %XMM14,%XMM1,%XMM4 |
0x45b720 VADDSD %XMM18,%XMM2,%XMM3 |
0x45b726 VMOVSD (%R8,%RAX,8),%XMM17 [3] |
0x45b72d MOV -0x40(%RBP),%RCX [14] |
0x45b731 VADDSD %XMM13,%XMM15,%XMM16 |
0x45b737 VADDSD %XMM18,%XMM14,%XMM14 |
0x45b73d VMOVSD 0x8(%RSI,%RAX,8),%XMM22 [15] |
0x45b745 MOV -0x48(%RBP),%R13 [14] |
0x45b749 VADDSD %XMM17,%XMM13,%XMM1 |
0x45b74f VMOVSD (%RBX,%RAX,8),%XMM13 [4] |
0x45b754 VSUBSD %XMM4,%XMM3,%XMM2 |
0x45b758 VMOVSD (%RDI,%RAX,8),%XMM4 [2] |
0x45b75d VSUBSD -0x8(%RSI,%RAX,8),%XMM22,%XMM23 [15] |
0x45b765 VSUBSD %XMM14,%XMM0,%XMM0 |
0x45b76a VADDSD %XMM4,%XMM15,%XMM3 |
0x45b76e VADDSD %XMM17,%XMM4,%XMM4 |
0x45b774 VSUBSD %XMM4,%XMM16,%XMM20 |
0x45b77a VSUBSD %XMM3,%XMM1,%XMM1 |
0x45b77e VMOVSD (%RCX),%XMM3 [9] |
0x45b782 VUNPCKLPD %XMM13,%XMM3,%XMM19 |
0x45b788 VMULSD %XMM1,%XMM3,%XMM15 |
0x45b78c VUNPCKLPD %XMM20,%XMM0,%XMM14 |
0x45b792 VMULPD %XMM9,%XMM14,%XMM0 |
0x45b797 VMULSD %XMM6,%XMM1,%XMM1 |
0x45b79b VFMADD231SD %XMM2,%XMM13,%XMM15 |
0x45b7a0 VDIVPD %XMM19,%XMM0,%XMM14 |
0x45b7a6 VADDSD 0x8(%RBX,%RAX,8),%XMM13,%XMM0 [4] |
0x45b7ac VMULSD %XMM6,%XMM2,%XMM2 |
0x45b7b0 VCOMISD %XMM5,%XMM15 |
0x45b7b4 VDIVSD %XMM0,%XMM23,%XMM24 |
0x45b7ba VUNPCKHPD %XMM14,%XMM14,%XMM4 |
0x45b7bf VADDPD %XMM14,%XMM4,%XMM4 |
0x45b7c4 VMOVSD (%R12,%RAX,8),%XMM14 [6] |
0x45b7ca VSUBSD (%R13,%RAX,8),%XMM14,%XMM0 [13] |
0x45b7d1 VMOVSD %XMM4,%XMM4,%XMM21 |
0x45b7d7 VADDSD (%R11),%XMM3,%XMM4 [1] |
0x45b7dc VDIVSD %XMM4,%XMM0,%XMM0 |
0x45b7e0 VMULSD %XMM24,%XMM24,%XMM25 |
0x45b7e6 VDIVSD %XMM13,%XMM2,%XMM2 |
0x45b7eb VMULSD %XMM24,%XMM0,%XMM14 |
0x45b7f1 VMULSD %XMM0,%XMM0,%XMM4 |
0x45b7f5 VMULSD %XMM21,%XMM14,%XMM14 |
0x45b7fb VDIVSD %XMM3,%XMM1,%XMM1 |
0x45b7ff VFMADD132SD %XMM25,%XMM14,%XMM2 |
0x45b805 VFMADD132SD %XMM4,%XMM2,%XMM1 |
0x45b80a VADDSD %XMM25,%XMM4,%XMM4 |
0x45b810 VMAXSD %XMM7,%XMM4,%XMM2 |
0x45b814 VDIVSD %XMM2,%XMM1,%XMM1 |
0x45b818 JAE 45b824 |
0x45b81a VCOMISD %XMM5,%XMM1 |
0x45b81e JBE 45b660 |
0x45b824 VXORPD %XMM13,%XMM13,%XMM13 |
0x45b829 VMOVSD %XMM13,(%R15,%RAX,8) [5] |
0x45b82f MOV %RDX,%RAX |
0x45b832 CMP %RDX,-0x38(%RBP) [14] |
0x45b836 JNE 45b6ec |
0x45b888 VCOMISD %XMM5,%XMM14 |
0x45b88c VXORPD %XMM12,%XMM14,%XMM4 |
0x45b891 VMOVSD %XMM4,%XMM4,%XMM14 |
0x45b895 JBE 45b8b7 |
0x45b897 VMOVSD %XMM11,%XMM11,%XMM2 |
0x45b89b JMP 45b67a |
0x45b8b7 VMOVSD 0x7d19(%RIP),%XMM2 [8] |
0x45b8bf JMP 45b67a |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 39 - 64 |
-------------------------------------------------------------------------------- |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.21 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.79 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.80 |
CQA speedup if fully vectorized | 1.80 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.60 |
Bottlenecks | P0, |
Function | viscosity_kernel(int, int, int, int, clover::Buffer1D |
Source | viscosity.cpp:39-64,context.h:69-69 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.50 |
CQA cycles if no scalar integer | 36.50 |
CQA cycles if FP arith vectorized | 20.25 |
CQA cycles if fully vectorized | 20.25 |
Front-end cycles | 17.67 |
DIV/SQRT cycles | 22.83 |
P0 cycles | 22.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 1.00 |
P4 cycles | 22.67 |
P5 cycles | 6.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 3.00 |
P10 cycles | 8.33 |
P11 cycles | 36.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 39.17 - 38.37 |
Stall cycles (UFS) | 20.77 - 19.99 |
Nb insns | 103.00 |
Nb uops | 103.00 |
Nb loads | 25.00 |
Nb stores | 2.00 |
Nb stack references | 4.00 |
FLOP/cycle | 1.48 |
Nb FLOP add-sub | 20.00 |
Nb FLOP mul | 16.00 |
Nb FLOP fma | 4.00 |
Nb FLOP div | 9.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 5.92 |
Bytes prefetched | 0.00 |
Bytes loaded | 200.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 10.59 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 6.67 |
Vectorization ratio add_sub | 5.26 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 11.11 |
Vectorization ratio other | 27.27 |
Vector-efficiency ratio all | 13.82 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 13.33 |
Vector-efficiency ratio add_sub | 13.16 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 13.89 |
Vector-efficiency ratio other | 15.91 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.80 |
CQA speedup if fully vectorized | 1.80 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.60 |
Bottlenecks | P0, |
Function | viscosity_kernel(int, int, int, int, clover::Buffer1D |
Source | viscosity.cpp:39-64,context.h:69-69 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.50 |
CQA cycles if no scalar integer | 36.50 |
CQA cycles if FP arith vectorized | 20.25 |
CQA cycles if fully vectorized | 20.25 |
Front-end cycles | 17.67 |
DIV/SQRT cycles | 22.83 |
P0 cycles | 22.50 |
P1 cycles | 8.33 |
P2 cycles | 8.33 |
P3 cycles | 1.00 |
P4 cycles | 22.67 |
P5 cycles | 6.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 3.00 |
P10 cycles | 8.33 |
P11 cycles | 36.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 39.17 - 38.37 |
Stall cycles (UFS) | 20.77 - 19.99 |
Nb insns | 103.00 |
Nb uops | 103.00 |
Nb loads | 25.00 |
Nb stores | 2.00 |
Nb stack references | 4.00 |
FLOP/cycle | 1.48 |
Nb FLOP add-sub | 20.00 |
Nb FLOP mul | 16.00 |
Nb FLOP fma | 4.00 |
Nb FLOP div | 9.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 5.92 |
Bytes prefetched | 0.00 |
Bytes loaded | 200.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 10.59 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 6.67 |
Vectorization ratio add_sub | 5.26 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 11.11 |
Vectorization ratio other | 27.27 |
Vector-efficiency ratio all | 13.82 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 13.33 |
Vector-efficiency ratio add_sub | 13.16 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 13.89 |
Vector-efficiency ratio other | 15.91 |
Path / |
Function | viscosity_kernel(int, int, int, int, clover::Buffer1D |
Source file and lines | viscosity.cpp:39-64 |
Module | exec |
nb instructions | 103 |
nb uops | 103 |
loop length | 513 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 26 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 1.27 |
micro-operation queue | 17.67 cycles |
front end | 17.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 22.83 | 22.50 | 8.33 | 8.33 | 1.00 | 22.67 | 6.00 | 1.00 | 1.00 | 1.00 | 3.00 | 8.33 |
cycles | 22.83 | 22.50 | 8.33 | 8.33 | 1.00 | 22.67 | 6.00 | 1.00 | 1.00 | 1.00 | 3.00 | 8.33 |
Cycles executing div or sqrt instructions | 36.50 |
FE+BE cycles | 39.17-38.37 |
Stall cycles | 20.77-19.99 |
PRF_FLOAT full (events) | 23.25-22.03 |
Front-end | 17.67 |
Dispatch | 22.83 |
DIV/SQRT | 36.50 |
Overall L1 | 36.50 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 0% |
store | 0% |
mul | 6% |
add-sub | 5% |
fma | 0% |
div/sqrt | 11% |
other | 28% |
all | 10% |
load | 0% |
store | 0% |
mul | 6% |
add-sub | 5% |
fma | 0% |
div/sqrt | 11% |
other | 27% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | 12% |
mul | 13% |
add-sub | 13% |
fma | 12% |
div/sqrt | 13% |
other | 16% |
all | 13% |
load | 12% |
store | 12% |
mul | 13% |
add-sub | 13% |
fma | 12% |
div/sqrt | 13% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VCOMISD %XMM24,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %XMM8,%XMM24,%XMM15 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD %XMM10,%XMM10,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMAXSD %XMM7,%XMM15,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JA 45b888 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x498> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VANDPD %XMM8,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV -0x50(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMAXSD %XMM7,%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL (%R14),%R13 | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
VMULSD %XMM2,%XMM15,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMULSD %XMM4,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM14,%XMM14,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM0,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50 |
VMULSD %XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM13,%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM4,%XMM3,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMOVSD (%RCX,%R13,8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VDIVSD %XMM14,%XMM13,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VANDPD %XMM8,%XMM2,%XMM15 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %XMM8,%XMM14,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMINSD %XMM4,%XMM15,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM3,%XMM3,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM1,%XMM1,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM2,%XMM15,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM13,(%R15,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,-0x38(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 45b83c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x44c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD 0x8(%R9,%RAX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RAX,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R10,%RAX,8),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RAX,8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM2,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8(%R8,%RAX,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%RDI,%RAX,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM14,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM18,%XMM2,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%R8,%RAX,8),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM13,%XMM15,%XMM16 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM18,%XMM14,%XMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8(%RSI,%RAX,8),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM17,%XMM13,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RBX,%RAX,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM4,%XMM3,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RDI,%RAX,8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD -0x8(%RSI,%RAX,8),%XMM22,%XMM23 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM4,%XMM15,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM17,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM4,%XMM16,%XMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM3,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUNPCKLPD %XMM13,%XMM3,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULSD %XMM1,%XMM3,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKLPD %XMM20,%XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %XMM9,%XMM14,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM13,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %XMM19,%XMM0,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VADDSD 0x8(%RBX,%RAX,8),%XMM13,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM6,%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM5,%XMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VDIVSD %XMM0,%XMM23,%XMM24 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VUNPCKHPD %XMM14,%XMM14,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM14,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%R12,%RAX,8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%R13,%RAX,8),%XMM14,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM4,%XMM4,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VADDSD (%R11),%XMM3,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VDIVSD %XMM4,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMULSD %XMM24,%XMM24,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM13,%XMM2,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMULSD %XMM24,%XMM0,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM21,%XMM14,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM3,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VFMADD132SD %XMM25,%XMM14,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM25,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMAXSD %XMM7,%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM2,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
JAE 45b824 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x434> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD %XMM5,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 45b660 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD %XMM13,(%R15,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,-0x38(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 45b6ec <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x2fc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD %XMM5,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM12,%XMM14,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JBE 45b8b7 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x4c7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD %XMM11,%XMM11,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 45b67a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x28a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD 0x7d19(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 45b67a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x28a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | viscosity_kernel(int, int, int, int, clover::Buffer1D |
Source file and lines | viscosity.cpp:39-64 |
Module | exec |
nb instructions | 103 |
nb uops | 103 |
loop length | 513 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 26 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 1.27 |
micro-operation queue | 17.67 cycles |
front end | 17.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 22.83 | 22.50 | 8.33 | 8.33 | 1.00 | 22.67 | 6.00 | 1.00 | 1.00 | 1.00 | 3.00 | 8.33 |
cycles | 22.83 | 22.50 | 8.33 | 8.33 | 1.00 | 22.67 | 6.00 | 1.00 | 1.00 | 1.00 | 3.00 | 8.33 |
Cycles executing div or sqrt instructions | 36.50 |
FE+BE cycles | 39.17-38.37 |
Stall cycles | 20.77-19.99 |
PRF_FLOAT full (events) | 23.25-22.03 |
Front-end | 17.67 |
Dispatch | 22.83 |
DIV/SQRT | 36.50 |
Overall L1 | 36.50 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 0% |
store | 0% |
mul | 6% |
add-sub | 5% |
fma | 0% |
div/sqrt | 11% |
other | 28% |
all | 10% |
load | 0% |
store | 0% |
mul | 6% |
add-sub | 5% |
fma | 0% |
div/sqrt | 11% |
other | 27% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | 12% |
mul | 13% |
add-sub | 13% |
fma | 12% |
div/sqrt | 13% |
other | 16% |
all | 13% |
load | 12% |
store | 12% |
mul | 13% |
add-sub | 13% |
fma | 12% |
div/sqrt | 13% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VCOMISD %XMM24,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %XMM8,%XMM24,%XMM15 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD %XMM10,%XMM10,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMAXSD %XMM7,%XMM15,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JA 45b888 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x498> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VANDPD %XMM8,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV -0x50(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMAXSD %XMM7,%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL (%R14),%R13 | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
VMULSD %XMM2,%XMM15,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMULSD %XMM4,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM14,%XMM14,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM0,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50 |
VMULSD %XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM13,%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM4,%XMM3,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMOVSD (%RCX,%R13,8),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VDIVSD %XMM14,%XMM13,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VANDPD %XMM8,%XMM2,%XMM15 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %XMM8,%XMM14,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMINSD %XMM4,%XMM15,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM3,%XMM3,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULSD %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM1,%XMM1,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM2,%XMM15,%XMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM13,(%R15,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,-0x38(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 45b83c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x44c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD 0x8(%R9,%RAX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RAX,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R10,%RAX,8),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RAX,8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM2,%XMM1,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8(%R8,%RAX,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%RDI,%RAX,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM14,%XMM1,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM18,%XMM2,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%R8,%RAX,8),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM13,%XMM15,%XMM16 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM18,%XMM14,%XMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x8(%RSI,%RAX,8),%XMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD %XMM17,%XMM13,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RBX,%RAX,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM4,%XMM3,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RDI,%RAX,8),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD -0x8(%RSI,%RAX,8),%XMM22,%XMM23 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM4,%XMM15,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM17,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM4,%XMM16,%XMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBSD %XMM3,%XMM1,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUNPCKLPD %XMM13,%XMM3,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULSD %XMM1,%XMM3,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VUNPCKLPD %XMM20,%XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %XMM9,%XMM14,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM2,%XMM13,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %XMM19,%XMM0,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VADDSD 0x8(%RBX,%RAX,8),%XMM13,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM6,%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM5,%XMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VDIVSD %XMM0,%XMM23,%XMM24 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VUNPCKHPD %XMM14,%XMM14,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM14,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD (%R12,%RAX,8),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%R13,%RAX,8),%XMM14,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM4,%XMM4,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VADDSD (%R11),%XMM3,%XMM4 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VDIVSD %XMM4,%XMM0,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMULSD %XMM24,%XMM24,%XMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM13,%XMM2,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VMULSD %XMM24,%XMM0,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM21,%XMM14,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM3,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
VFMADD132SD %XMM25,%XMM14,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM25,%XMM4,%XMM4 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMAXSD %XMM7,%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVSD %XMM2,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
JAE 45b824 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x434> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD %XMM5,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JBE 45b660 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD %XMM13,(%R15,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,-0x38(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 45b6ec <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x2fc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCOMISD %XMM5,%XMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM12,%XMM14,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JBE 45b8b7 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x4c7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD %XMM11,%XMM11,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 45b67a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x28a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD 0x7d19(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 45b67a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x28a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |