Loop Id: 933 | Module: exec | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.74% |
---|
Loop Id: 933 | Module: exec | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.74% |
---|
0x442840 MOV -0x48(%RBP),%R11 |
0x442844 INC %RDX |
0x442847 CMP %RCX,%RDX |
0x44284a MOV -0x50(%RBP),%R9 |
0x44284e JE 442be0 |
0x442854 MOV -0x170(%RBP),%RSI |
0x44285b MOV (%RSI,%RDX,8),%RSI |
0x44285f MOV -0x98(%RBP),%RDI |
0x442866 MOV -0x30(%RBP),%R8 |
0x44286a CMP %R8,(%RDI,%RSI,8) |
0x44286e JNE 4428b0 |
0x442870 MOV -0x70(%RBP),%R8 |
0x442874 MOV (%R8,%RSI,8),%RDI |
0x442878 MOV 0x8(%R8,%RSI,8),%R8 |
0x44287d MOV %R8,%R11 |
0x442880 SUB %RDI,%R11 |
0x442883 JLE 442a47 |
0x442889 MOV -0x90(%RBP),%R9 |
0x442890 MOV (%R9),%R9 |
0x442893 MOV -0x80(%RBP),%R10 |
0x442897 MOV (%R10),%R10 |
0x44289a CMP $0x4,%R11 |
0x44289e JAE 4428f5 |
0x4428a0 JMP 4429d4 |
0x4428b0 MOV -0x158(%RBP),%RDI |
0x4428b7 CMPQ $-0x3,(%RDI,%RSI,8) |
0x4428bc JE 442844 |
0x4428be CMPQ $0x1,-0xe0(%RBP) |
0x4428c6 JE 4428e1 |
0x4428c8 MOV -0xc8(%RBP),%R8 |
0x4428cf MOV -0x30(%RBP),%RDI |
0x4428d3 MOV (%R8,%RDI,8),%RDI |
0x4428d7 CMP (%R8,%RSI,8),%RDI |
0x4428db JNE 442844 |
0x4428e1 MOV -0x90(%RBP),%RSI |
0x4428e8 MOV (%RSI),%RSI |
0x4428eb VADDSD (%RSI,%RDX,8),%XMM0,%XMM0 |
0x4428f0 JMP 442844 |
0x4428f5 MOV %R11,%RBX |
0x4428f8 SHR $0x2,%RBX |
0x4428fc LEA 0x18(,%RDI,8),%R14 |
0x442904 NOPW %CS:(%RAX,%RAX,1) |
(936) 0x442910 MOV -0x18(%RAX,%R14,1),%R13 |
(936) 0x442915 VMOVSD -0x18(%R10,%R14,1),%XMM2 |
(936) 0x44291c VMOVSD (%R9,%RDX,8),%XMM3 |
(936) 0x442922 MOV (%R12,%R13,8),%R13 |
(936) 0x442926 VMOVSD (%R10,%R13,8),%XMM4 |
(936) 0x44292c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(936) 0x442931 VMOVSD %XMM4,(%R10,%R13,8) |
(936) 0x442937 MOV -0x10(%RAX,%R14,1),%R13 |
(936) 0x44293c VMOVSD -0x10(%R10,%R14,1),%XMM4 |
(936) 0x442943 VMOVSD (%R9,%RDX,8),%XMM5 |
(936) 0x442949 MOV (%R12,%R13,8),%R13 |
(936) 0x44294d VMOVSD (%R10,%R13,8),%XMM6 |
(936) 0x442953 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(936) 0x442958 VMOVSD %XMM6,(%R10,%R13,8) |
(936) 0x44295e VMOVSD -0x8(%R10,%R14,1),%XMM6 |
(936) 0x442965 VMULSD (%R9,%RDX,8),%XMM6,%XMM6 |
(936) 0x44296b MOV -0x8(%RAX,%R14,1),%R13 |
(936) 0x442970 MOV (%R12,%R13,8),%R13 |
(936) 0x442974 VADDSD (%R10,%R13,8),%XMM6,%XMM7 |
(936) 0x44297a VMOVSD %XMM7,(%R10,%R13,8) |
(936) 0x442980 VFMADD213SD %XMM6,%XMM5,%XMM4 |
(936) 0x442985 MOV (%RAX,%R14,1),%R13 |
(936) 0x442989 VMOVSD (%R10,%R14,1),%XMM5 |
(936) 0x44298f VMULSD (%R9,%RDX,8),%XMM5,%XMM18 |
(936) 0x442996 MOV (%R12,%R13,8),%R13 |
(936) 0x44299a VADDSD (%R10,%R13,8),%XMM18,%XMM5 |
(936) 0x4429a1 VMOVSD %XMM5,(%R10,%R13,8) |
(936) 0x4429a7 VADDSD %XMM1,%XMM4,%XMM1 |
(936) 0x4429ab VMOVAPD %XMM2,%XMM5 |
(936) 0x4429af VFMADD213SD %XMM18,%XMM3,%XMM5 |
(936) 0x4429b5 VADDSD %XMM1,%XMM5,%XMM1 |
(936) 0x4429b9 VADDSD %XMM0,%XMM4,%XMM0 |
(936) 0x4429bd VFMADD213SD %XMM18,%XMM3,%XMM2 |
(936) 0x4429c3 VADDSD %XMM0,%XMM2,%XMM0 |
(936) 0x4429c7 ADD $0x20,%R14 |
(936) 0x4429cb DEC %RBX |
(936) 0x4429ce JNE 442910 |
0x4429d4 MOV %R11,%RBX |
0x4429d7 AND $-0x4,%RBX |
0x4429db CMP %R11,%RBX |
0x4429de JAE 442a38 |
0x4429e0 ADD %RBX,%RDI |
0x4429e3 MOV -0xc0(%RBP),%R13 |
0x4429ea MOV -0x38(%RBP),%RBX |
0x4429ee MOV -0x60(%RBP),%R14 |
0x4429f2 NOPW %CS:(%RAX,%RAX,1) |
(937) 0x442a00 MOV (%RAX,%RDI,8),%R11 |
(937) 0x442a04 VMOVSD (%R10,%RDI,8),%XMM2 |
(937) 0x442a0a VMULSD (%R9,%RDX,8),%XMM2,%XMM18 |
(937) 0x442a11 MOV (%R12,%R11,8),%R11 |
(937) 0x442a15 VADDSD (%R10,%R11,8),%XMM18,%XMM2 |
(937) 0x442a1c VMOVSD %XMM2,(%R10,%R11,8) |
(937) 0x442a22 VADDSD %XMM1,%XMM18,%XMM1 |
(937) 0x442a28 VADDSD %XMM0,%XMM18,%XMM0 |
(937) 0x442a2e INC %RDI |
(937) 0x442a31 CMP %RDI,%R8 |
(937) 0x442a34 JNE 442a00 |
0x442a36 JMP 442a47 |
0x442a38 MOV -0xc0(%RBP),%R13 |
0x442a3f MOV -0x38(%RBP),%RBX |
0x442a43 MOV -0x60(%RBP),%R14 |
0x442a47 MOV -0x68(%RBP),%R8 |
0x442a4b MOV (%R8,%RSI,8),%RDI |
0x442a4f MOV 0x8(%R8,%RSI,8),%RSI |
0x442a54 MOV %RSI,%R10 |
0x442a57 SUB %RDI,%R10 |
0x442a5a JLE 442840 |
0x442a60 MOV -0x90(%RBP),%RAX |
0x442a67 MOV (%RAX),%R8 |
0x442a6a MOV -0x88(%RBP),%RAX |
0x442a71 MOV (%RAX),%R9 |
0x442a74 CMP $0x4,%R10 |
0x442a78 JAE 442a7f |
0x442a7a JMP 442b64 |
0x442a7f MOV %R10,%R11 |
0x442a82 SHR $0x2,%R11 |
0x442a86 MOV %RBX,%RAX |
0x442a89 LEA 0x18(,%RDI,8),%RBX |
0x442a91 NOPW %CS:(%RAX,%RAX,1) |
(934) 0x442aa0 MOV -0x18(%RAX,%RBX,1),%R14 |
(934) 0x442aa5 VMOVSD -0x18(%R9,%RBX,1),%XMM2 |
(934) 0x442aac VMOVSD (%R8,%RDX,8),%XMM3 |
(934) 0x442ab2 MOV (%R15,%R14,8),%R14 |
(934) 0x442ab6 VMOVSD (%R9,%R14,8),%XMM4 |
(934) 0x442abc VFMADD231SD %XMM2,%XMM3,%XMM4 |
(934) 0x442ac1 VMOVSD %XMM4,(%R9,%R14,8) |
(934) 0x442ac7 MOV -0x10(%RAX,%RBX,1),%R14 |
(934) 0x442acc VMOVSD -0x10(%R9,%RBX,1),%XMM4 |
(934) 0x442ad3 VMOVSD (%R8,%RDX,8),%XMM5 |
(934) 0x442ad9 MOV (%R15,%R14,8),%R14 |
(934) 0x442add VMOVSD (%R9,%R14,8),%XMM6 |
(934) 0x442ae3 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(934) 0x442ae8 VMOVSD %XMM6,(%R9,%R14,8) |
(934) 0x442aee VMOVSD -0x8(%R9,%RBX,1),%XMM6 |
(934) 0x442af5 VMULSD (%R8,%RDX,8),%XMM6,%XMM6 |
(934) 0x442afb MOV -0x8(%RAX,%RBX,1),%R14 |
(934) 0x442b00 MOV (%R15,%R14,8),%R14 |
(934) 0x442b04 VADDSD (%R9,%R14,8),%XMM6,%XMM7 |
(934) 0x442b0a VMOVSD %XMM7,(%R9,%R14,8) |
(934) 0x442b10 VFMADD213SD %XMM6,%XMM5,%XMM4 |
(934) 0x442b15 MOV (%RAX,%RBX,1),%R14 |
(934) 0x442b19 VMOVSD (%R9,%RBX,1),%XMM5 |
(934) 0x442b1f VMULSD (%R8,%RDX,8),%XMM5,%XMM18 |
(934) 0x442b26 MOV (%R15,%R14,8),%R14 |
(934) 0x442b2a VADDSD (%R9,%R14,8),%XMM18,%XMM5 |
(934) 0x442b31 VMOVSD %XMM5,(%R9,%R14,8) |
(934) 0x442b37 VADDSD %XMM1,%XMM4,%XMM1 |
(934) 0x442b3b VMOVAPD %XMM2,%XMM5 |
(934) 0x442b3f VFMADD213SD %XMM18,%XMM3,%XMM5 |
(934) 0x442b45 VADDSD %XMM1,%XMM5,%XMM1 |
(934) 0x442b49 VADDSD %XMM0,%XMM4,%XMM0 |
(934) 0x442b4d VFMADD213SD %XMM18,%XMM3,%XMM2 |
(934) 0x442b53 VADDSD %XMM0,%XMM2,%XMM0 |
(934) 0x442b57 ADD $0x20,%RBX |
(934) 0x442b5b DEC %R11 |
(934) 0x442b5e JNE 442aa0 |
0x442b64 MOV %R10,%R11 |
0x442b67 AND $-0x4,%R11 |
0x442b6b CMP %R10,%R11 |
0x442b6e JAE 442bcb |
0x442b70 ADD %R11,%RDI |
0x442b73 MOV -0x48(%RBP),%R11 |
0x442b77 MOV -0x38(%RBP),%RBX |
0x442b7b MOV -0x58(%RBP),%RAX |
0x442b7f MOV -0x60(%RBP),%R14 |
0x442b83 NOPW %CS:(%RAX,%RAX,1) |
(935) 0x442b90 MOV (%RBX,%RDI,8),%R10 |
(935) 0x442b94 VMOVSD (%R9,%RDI,8),%XMM2 |
(935) 0x442b9a VMULSD (%R8,%RDX,8),%XMM2,%XMM18 |
(935) 0x442ba1 MOV (%R15,%R10,8),%R10 |
(935) 0x442ba5 VADDSD (%R9,%R10,8),%XMM18,%XMM2 |
(935) 0x442bac VMOVSD %XMM2,(%R9,%R10,8) |
(935) 0x442bb2 VADDSD %XMM1,%XMM18,%XMM1 |
(935) 0x442bb8 VADDSD %XMM0,%XMM18,%XMM0 |
(935) 0x442bbe INC %RDI |
(935) 0x442bc1 CMP %RDI,%RSI |
(935) 0x442bc4 JNE 442b90 |
0x442bc6 JMP 442844 |
0x442bcb MOV -0x48(%RBP),%R11 |
0x442bcf MOV -0x38(%RBP),%RBX |
0x442bd3 MOV -0x58(%RBP),%RAX |
0x442bd7 MOV -0x60(%RBP),%R14 |
0x442bdb JMP 442844 |
/scratch_na/users/xoserete/qaas_runs/171-172-8217/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1837 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.56 |
CQA speedup if FP arith vectorized | 2.45 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1819-1819,par_multi_interp.c:1824-1824,par_multi_interp.c:1827-1827,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 15.33 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 6.25 |
CQA cycles if fully vectorized | 1.92 |
Front-end cycles | 15.33 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 15.00 |
P2 cycles | 15.00 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 15.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 17.69 |
Stall cycles (UFS) | 2.18 |
Nb insns | 89.00 |
Nb uops | 89.00 |
Nb loads | 45.00 |
Nb stores | 0.00 |
Nb stack references | 17.00 |
FLOP/cycle | 0.07 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 23.48 |
Bytes prefetched | 0.00 |
Bytes loaded | 360.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.56 |
CQA speedup if FP arith vectorized | 2.45 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1819-1819,par_multi_interp.c:1824-1824,par_multi_interp.c:1827-1827,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 15.33 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 6.25 |
CQA cycles if fully vectorized | 1.92 |
Front-end cycles | 15.33 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 15.00 |
P2 cycles | 15.00 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 15.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 17.69 |
Stall cycles (UFS) | 2.18 |
Nb insns | 89.00 |
Nb uops | 89.00 |
Nb loads | 45.00 |
Nb stores | 0.00 |
Nb stack references | 17.00 |
FLOP/cycle | 0.07 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 23.48 |
Bytes prefetched | 0.00 |
Bytes loaded | 360.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | exec |
nb instructions | 89 |
nb uops | 89 |
loop length | 417 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 15.33 cycles |
front end | 15.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 15.00 | 15.00 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 15.00 |
cycles | 6.50 | 5.30 | 15.00 | 15.00 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.69 |
Stall cycles | 2.18 |
LM full (events) | 7.03 |
Front-end | 15.33 |
Dispatch | 15.00 |
Overall L1 | 15.33 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442be0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xba0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R8,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 4428b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x70(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 442a47 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x4,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4428f5 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8b5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4429d4 <hypre_BoomerAMGBuildMultipass.extracted.28+0x994> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xe0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4428e1 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD (%RSI,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442a38 <hypre_BoomerAMGBuildMultipass.extracted.28+0x9f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RBX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xc0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442a47 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa07> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 442840 <hypre_BoomerAMGBuildMultipass.extracted.28+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442a7f <hypre_BoomerAMGBuildMultipass.extracted.28+0xa3f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 442b64 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb24> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x18(,%RDI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442bcb <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R11,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | exec |
nb instructions | 89 |
nb uops | 89 |
loop length | 417 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 15.33 cycles |
front end | 15.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 15.00 | 15.00 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 15.00 |
cycles | 6.50 | 5.30 | 15.00 | 15.00 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 15.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 17.69 |
Stall cycles | 2.18 |
LM full (events) | 7.03 |
Front-end | 15.33 |
Dispatch | 15.00 |
Overall L1 | 15.33 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442be0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xba0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R8,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 4428b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x70(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 442a47 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x4,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4428f5 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8b5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4429d4 <hypre_BoomerAMGBuildMultipass.extracted.28+0x994> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xe0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4428e1 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VADDSD (%RSI,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442a38 <hypre_BoomerAMGBuildMultipass.extracted.28+0x9f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RBX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xc0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442a47 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa07> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 442840 <hypre_BoomerAMGBuildMultipass.extracted.28+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442a7f <hypre_BoomerAMGBuildMultipass.extracted.28+0xa3f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 442b64 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb24> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x18(,%RDI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 442bcb <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R11,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 442844 <hypre_BoomerAMGBuildMultipass.extracted.28+0x804> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |