Loop Id: 1616 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.9% |
---|
Loop Id: 1616 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.9% |
---|
0x71780 MOV -0x50(%RBP),%R10 |
0x71784 MOV -0x48(%RBP),%R9 |
0x71788 INC %RDX |
0x7178b CMP %R12,%RDX |
0x7178e JE 71b40 |
0x71794 MOV -0x170(%RBP),%RAX |
0x7179b MOV (%RAX,%RDX,8),%RSI |
0x7179f MOV -0x78(%RBP),%RAX |
0x717a3 CMP %R9,(%RAX,%RSI,8) |
0x717a7 JNE 71800 |
0x717a9 MOV -0xb0(%RBP),%RAX |
0x717b0 MOV (%RAX,%RSI,8),%RDI |
0x717b4 MOV 0x8(%RAX,%RSI,8),%R8 |
0x717b9 MOV %R8,%R9 |
0x717bc SUB %RDI,%R9 |
0x717bf JLE 7198c |
0x717c5 CMP $0x4,%R9 |
0x717c9 JAE 71840 |
0x717cb JMP 71919 |
0x71800 MOV -0x158(%RBP),%RDI |
0x71807 CMPQ $-0x3,(%RDI,%RSI,8) |
0x7180c JE 71788 |
0x71812 CMPQ $0x1,-0xe0(%RBP) |
0x7181a JE 71831 |
0x7181c MOV -0xc8(%RBP),%R8 |
0x71823 MOV (%R8,%R9,8),%RDI |
0x71827 CMP (%R8,%RSI,8),%RDI |
0x7182b JNE 71788 |
0x71831 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
0x71836 JMP 71788 |
0x71840 MOV %R9,%R10 |
0x71843 SHR $0x2,%R10 |
0x71847 LEA 0x18(,%RDI,8),%R11 |
0x7184f MOV -0x68(%RBP),%RAX |
0x71853 MOV -0x58(%RBP),%RCX |
0x71857 NOPW (%RAX,%RAX,1) |
(1619) 0x71860 MOV -0x18(%RAX,%R11,1),%R12 |
(1619) 0x71865 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(1619) 0x7186c VMOVSD (%RBX,%RDX,8),%XMM3 |
(1619) 0x71871 MOV (%RCX,%R12,8),%R12 |
(1619) 0x71875 VMOVSD (%R14,%R12,8),%XMM4 |
(1619) 0x7187b VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1619) 0x71880 VMOVSD %XMM4,(%R14,%R12,8) |
(1619) 0x71886 MOV -0x10(%RAX,%R11,1),%R12 |
(1619) 0x7188b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(1619) 0x71892 VMOVSD (%RBX,%RDX,8),%XMM5 |
(1619) 0x71897 MOV (%RCX,%R12,8),%R12 |
(1619) 0x7189b VMOVSD (%R14,%R12,8),%XMM6 |
(1619) 0x718a1 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(1619) 0x718a6 VMOVSD %XMM6,(%R14,%R12,8) |
(1619) 0x718ac MOV -0x8(%RAX,%R11,1),%R12 |
(1619) 0x718b1 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(1619) 0x718b8 VMOVSD (%RBX,%RDX,8),%XMM7 |
(1619) 0x718bd MOV (%RCX,%R12,8),%R12 |
(1619) 0x718c1 VMOVSD (%R14,%R12,8),%XMM8 |
(1619) 0x718c7 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(1619) 0x718cc VMOVSD %XMM8,(%R14,%R12,8) |
(1619) 0x718d2 VMOVSD (%R14,%R11,1),%XMM8 |
(1619) 0x718d8 VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(1619) 0x718df MOV (%RAX,%R11,1),%R12 |
(1619) 0x718e3 MOV (%RCX,%R12,8),%R12 |
(1619) 0x718e7 VADDSD (%R14,%R12,8),%XMM18,%XMM8 |
(1619) 0x718ee VMOVSD %XMM8,(%R14,%R12,8) |
(1619) 0x718f4 VFMADD213SD %XMM18,%XMM5,%XMM4 |
(1619) 0x718fa VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1619) 0x718ff VFMADD231SD %XMM6,%XMM7,%XMM4 |
(1619) 0x71904 VADDSD %XMM1,%XMM4,%XMM1 |
(1619) 0x71908 VADDSD %XMM0,%XMM4,%XMM0 |
(1619) 0x7190c ADD $0x20,%R11 |
(1619) 0x71910 DEC %R10 |
(1619) 0x71913 JNE 71860 |
0x71919 MOV %R9,%R10 |
0x7191c AND $-0x4,%R10 |
0x71920 CMP %R9,%R10 |
0x71923 JAE 71980 |
0x71925 ADD %R10,%RDI |
0x71928 MOV -0x50(%RBP),%R10 |
0x7192c MOV -0x38(%RBP),%R11 |
0x71930 MOV -0x68(%RBP),%RAX |
0x71934 MOV -0x58(%RBP),%RCX |
0x71938 MOV -0x30(%RBP),%R12 |
0x7193c NOPL (%RAX) |
(1620) 0x71940 MOV (%RAX,%RDI,8),%R9 |
(1620) 0x71944 VMOVSD (%R14,%RDI,8),%XMM2 |
(1620) 0x7194a VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(1620) 0x71951 MOV (%RCX,%R9,8),%R9 |
(1620) 0x71955 VADDSD (%R14,%R9,8),%XMM18,%XMM2 |
(1620) 0x7195c VMOVSD %XMM2,(%R14,%R9,8) |
(1620) 0x71962 VADDSD %XMM1,%XMM18,%XMM1 |
(1620) 0x71968 VADDSD %XMM0,%XMM18,%XMM0 |
(1620) 0x7196e INC %RDI |
(1620) 0x71971 CMP %RDI,%R8 |
(1620) 0x71974 JNE 71940 |
0x71976 JMP 7198c |
0x71980 MOV -0x50(%RBP),%R10 |
0x71984 MOV -0x38(%RBP),%R11 |
0x71988 MOV -0x30(%RBP),%R12 |
0x7198c MOV -0xb8(%RBP),%R8 |
0x71993 MOV (%R8,%RSI,8),%RDI |
0x71997 MOV 0x8(%R8,%RSI,8),%RSI |
0x7199c MOV %RSI,%R8 |
0x7199f SUB %RDI,%R8 |
0x719a2 JLE 71784 |
0x719a8 CMP $0x4,%R8 |
0x719ac JAE 719c0 |
0x719ae JMP 71aa6 |
0x719c0 MOV %R8,%R9 |
0x719c3 SHR $0x2,%R9 |
0x719c7 LEA 0x18(,%RDI,8),%R10 |
0x719cf NOP |
(1617) 0x719d0 MOV -0x38(%RBP),%R11 |
(1617) 0x719d4 MOV -0x18(%R11,%R10,1),%R11 |
(1617) 0x719d9 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(1617) 0x719e0 VMOVSD (%RBX,%RDX,8),%XMM3 |
(1617) 0x719e5 MOV (%R15,%R11,8),%R11 |
(1617) 0x719e9 VMOVSD (%R13,%R11,8),%XMM4 |
(1617) 0x719f0 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1617) 0x719f5 VMOVSD %XMM4,(%R13,%R11,8) |
(1617) 0x719fc MOV -0x38(%RBP),%R11 |
(1617) 0x71a00 MOV -0x10(%R11,%R10,1),%R11 |
(1617) 0x71a05 VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(1617) 0x71a0c VMOVSD (%RBX,%RDX,8),%XMM5 |
(1617) 0x71a11 MOV (%R15,%R11,8),%R11 |
(1617) 0x71a15 VMOVSD (%R13,%R11,8),%XMM6 |
(1617) 0x71a1c VFMADD231SD %XMM4,%XMM5,%XMM6 |
(1617) 0x71a21 VMOVSD %XMM6,(%R13,%R11,8) |
(1617) 0x71a28 MOV -0x38(%RBP),%R11 |
(1617) 0x71a2c MOV -0x8(%R11,%R10,1),%R11 |
(1617) 0x71a31 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(1617) 0x71a38 VMOVSD (%RBX,%RDX,8),%XMM7 |
(1617) 0x71a3d MOV (%R15,%R11,8),%R11 |
(1617) 0x71a41 VMOVSD (%R13,%R11,8),%XMM8 |
(1617) 0x71a48 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(1617) 0x71a4d VMOVSD %XMM8,(%R13,%R11,8) |
(1617) 0x71a54 VMOVSD (%R13,%R10,1),%XMM8 |
(1617) 0x71a5b VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(1617) 0x71a62 MOV -0x38(%RBP),%R11 |
(1617) 0x71a66 MOV (%R11,%R10,1),%R11 |
(1617) 0x71a6a MOV (%R15,%R11,8),%R11 |
(1617) 0x71a6e VADDSD (%R13,%R11,8),%XMM18,%XMM8 |
(1617) 0x71a76 VMOVSD %XMM8,(%R13,%R11,8) |
(1617) 0x71a7d MOV -0x38(%RBP),%R11 |
(1617) 0x71a81 VFMADD213SD %XMM18,%XMM5,%XMM4 |
(1617) 0x71a87 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1617) 0x71a8c VFMADD231SD %XMM6,%XMM7,%XMM4 |
(1617) 0x71a91 VADDSD %XMM1,%XMM4,%XMM1 |
(1617) 0x71a95 VADDSD %XMM0,%XMM4,%XMM0 |
(1617) 0x71a99 ADD $0x20,%R10 |
(1617) 0x71a9d DEC %R9 |
(1617) 0x71aa0 JNE 719d0 |
0x71aa6 MOV %R8,%R9 |
0x71aa9 AND $-0x4,%R9 |
0x71aad CMP %R8,%R9 |
0x71ab0 JAE 71780 |
0x71ab6 ADD %R9,%RDI |
0x71ab9 MOV -0x50(%RBP),%R10 |
0x71abd MOV -0x48(%RBP),%R9 |
0x71ac1 NOPW %CS:(%RAX,%RAX,1) |
(1618) 0x71ad0 MOV (%R11,%RDI,8),%R8 |
(1618) 0x71ad4 VMOVSD (%R13,%RDI,8),%XMM2 |
(1618) 0x71adb VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(1618) 0x71ae2 MOV (%R15,%R8,8),%R8 |
(1618) 0x71ae6 VADDSD (%R13,%R8,8),%XMM18,%XMM2 |
(1618) 0x71aee VMOVSD %XMM2,(%R13,%R8,8) |
(1618) 0x71af5 VADDSD %XMM1,%XMM18,%XMM1 |
(1618) 0x71afb VADDSD %XMM0,%XMM18,%XMM0 |
(1618) 0x71b01 INC %RDI |
(1618) 0x71b04 CMP %RDI,%RSI |
(1618) 0x71b07 JNE 71ad0 |
0x71b09 JMP 71788 |
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1837 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.11 |
CQA speedup if FP arith vectorized | 2.24 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1824-1824,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 12.67 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 5.67 |
CQA cycles if fully vectorized | 1.58 |
Front-end cycles | 12.67 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 10.33 |
P2 cycles | 10.33 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 10.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 13.20 |
Stall cycles (UFS) | 0.31 |
Nb insns | 73.00 |
Nb uops | 73.00 |
Nb loads | 31.00 |
Nb stores | 0.00 |
Nb stack references | 13.00 |
FLOP/cycle | 0.08 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 19.58 |
Bytes prefetched | 0.00 |
Bytes loaded | 248.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.11 |
CQA speedup if FP arith vectorized | 2.24 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1824-1824,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 12.67 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 5.67 |
CQA cycles if fully vectorized | 1.58 |
Front-end cycles | 12.67 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 10.33 |
P2 cycles | 10.33 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 10.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 13.20 |
Stall cycles (UFS) | 0.31 |
Nb insns | 73.00 |
Nb uops | 73.00 |
Nb loads | 31.00 |
Nb stores | 0.00 |
Nb stack references | 13.00 |
FLOP/cycle | 0.08 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 19.58 |
Bytes prefetched | 0.00 |
Bytes loaded | 248.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | libparcsr_ls.so |
nb instructions | 73 |
nb uops | 73 |
loop length | 326 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 10.33 | 10.33 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.33 |
cycles | 6.50 | 5.30 | 10.33 | 10.33 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.20 |
Stall cycles | 0.31 |
LM full (events) | 1.11 |
Front-end | 12.67 |
Dispatch | 10.33 |
Overall L1 | 12.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 71b40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,(%RAX,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 71800 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 7198c <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71840 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71919 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb19> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xe0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 71831 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa31> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71980 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R10,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 7198c <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 71784 <hypre_BoomerAMGBuildMultipass.extracted.28+0x984> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 719c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71aa6 <hypre_BoomerAMGBuildMultipass.extracted.28+0xca6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71780 <hypre_BoomerAMGBuildMultipass.extracted.28+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | libparcsr_ls.so |
nb instructions | 73 |
nb uops | 73 |
loop length | 326 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 10.33 | 10.33 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.33 |
cycles | 6.50 | 5.30 | 10.33 | 10.33 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.20 |
Stall cycles | 0.31 |
LM full (events) | 1.11 |
Front-end | 12.67 |
Dispatch | 10.33 |
Overall L1 | 12.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 71b40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,(%RAX,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 71800 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 7198c <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71840 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71919 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb19> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xe0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 71831 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa31> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71980 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R10,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 7198c <hypre_BoomerAMGBuildMultipass.extracted.28+0xb8c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 71784 <hypre_BoomerAMGBuildMultipass.extracted.28+0x984> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 719c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71aa6 <hypre_BoomerAMGBuildMultipass.extracted.28+0xca6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 71780 <hypre_BoomerAMGBuildMultipass.extracted.28+0x980> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 71788 <hypre_BoomerAMGBuildMultipass.extracted.28+0x988> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |