Loop Id: 973 | Module: exec | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.79% |
---|
Loop Id: 973 | Module: exec | Source: par_multi_interp.c:1747-1837 [...] | Coverage: 0.79% |
---|
0x444140 MOV -0x50(%RBP),%RAX |
0x444144 MOV -0x30(%RBP),%R9 |
0x444148 INC %RDX |
0x44414b CMP %R11,%RDX |
0x44414e JE 444460 |
0x444154 MOV -0x170(%RBP),%RSI |
0x44415b MOV (%RSI,%RDX,8),%RSI |
0x44415f MOV -0x68(%RBP),%RDI |
0x444163 CMP %R9,(%RDI,%RSI,8) |
0x444167 JNE 444190 |
0x444169 MOV -0xb0(%RBP),%R8 |
0x444170 MOV (%R8,%RSI,8),%RDI |
0x444174 MOV 0x8(%R8,%RSI,8),%R8 |
0x444179 MOV %R8,%R9 |
0x44417c SUB %RDI,%R9 |
0x44417f JLE 4442e9 |
0x444185 CMP $0x4,%R9 |
0x444189 JAE 4441c0 |
0x44418b JMP 444295 |
0x444190 MOV -0x158(%RBP),%RDI |
0x444197 CMPQ $-0x3,(%RDI,%RSI,8) |
0x44419c JE 444148 |
0x44419e CMPQ $0x1,-0xf0(%RBP) |
0x4441a6 JE 4441b9 |
0x4441a8 MOV -0xd0(%RBP),%R8 |
0x4441af MOV (%R8,%R9,8),%RDI |
0x4441b3 CMP (%R8,%RSI,8),%RDI |
0x4441b7 JNE 444148 |
0x4441b9 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
0x4441be JMP 444148 |
0x4441c0 MOV %R9,%R10 |
0x4441c3 SHR $0x2,%R10 |
0x4441c7 LEA 0x18(,%RDI,8),%R11 |
0x4441cf MOV %R12,%RCX |
0x4441d2 NOPW %CS:(%RAX,%RAX,1) |
(976) 0x4441e0 MOV -0x18(%RAX,%R11,1),%R12 |
(976) 0x4441e5 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(976) 0x4441ec VMOVSD (%RBX,%RDX,8),%XMM3 |
(976) 0x4441f1 MOV (%RCX,%R12,8),%R12 |
(976) 0x4441f5 VMOVSD (%R14,%R12,8),%XMM4 |
(976) 0x4441fb VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x444200 VMOVSD %XMM4,(%R14,%R12,8) |
(976) 0x444206 MOV -0x10(%RAX,%R11,1),%R12 |
(976) 0x44420b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(976) 0x444212 VMOVSD (%RBX,%RDX,8),%XMM5 |
(976) 0x444217 MOV (%RCX,%R12,8),%R12 |
(976) 0x44421b VMOVSD (%R14,%R12,8),%XMM6 |
(976) 0x444221 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(976) 0x444226 VMOVSD %XMM6,(%R14,%R12,8) |
(976) 0x44422c MOV -0x8(%RAX,%R11,1),%R12 |
(976) 0x444231 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(976) 0x444238 VMOVSD (%RBX,%RDX,8),%XMM7 |
(976) 0x44423d MOV (%RCX,%R12,8),%R12 |
(976) 0x444241 VMOVSD (%R14,%R12,8),%XMM8 |
(976) 0x444247 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(976) 0x44424c VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x444252 VMOVSD (%R14,%R11,1),%XMM8 |
(976) 0x444258 VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(976) 0x44425d MOV (%RAX,%R11,1),%R12 |
(976) 0x444261 MOV (%RCX,%R12,8),%R12 |
(976) 0x444265 VADDSD (%R14,%R12,8),%XMM10,%XMM8 |
(976) 0x44426b VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x444271 VFMADD213SD %XMM10,%XMM5,%XMM4 |
(976) 0x444276 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x44427b VFMADD231SD %XMM6,%XMM7,%XMM4 |
(976) 0x444280 VADDSD %XMM1,%XMM4,%XMM1 |
(976) 0x444284 VADDSD %XMM0,%XMM4,%XMM0 |
(976) 0x444288 ADD $0x20,%R11 |
(976) 0x44428c DEC %R10 |
(976) 0x44428f JNE 4441e0 |
0x444295 MOV %R9,%R10 |
0x444298 AND $-0x4,%R10 |
0x44429c CMP %R9,%R10 |
0x44429f MOV -0x80(%RBP),%R11 |
0x4442a3 JAE 4442e1 |
0x4442a5 ADD %R10,%RDI |
0x4442a8 MOV -0x40(%RBP),%R10 |
0x4442ac MOV -0x48(%RBP),%R12 |
(977) 0x4442b0 MOV (%RAX,%RDI,8),%R9 |
(977) 0x4442b4 VMOVSD (%R14,%RDI,8),%XMM2 |
(977) 0x4442ba VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(977) 0x4442bf MOV (%R12,%R9,8),%R9 |
(977) 0x4442c3 VADDSD (%R14,%R9,8),%XMM10,%XMM2 |
(977) 0x4442c9 VMOVSD %XMM2,(%R14,%R9,8) |
(977) 0x4442cf VADDSD %XMM1,%XMM10,%XMM1 |
(977) 0x4442d3 VADDSD %XMM0,%XMM10,%XMM0 |
(977) 0x4442d7 INC %RDI |
(977) 0x4442da CMP %RDI,%R8 |
(977) 0x4442dd JNE 4442b0 |
0x4442df JMP 4442e9 |
0x4442e1 MOV -0x40(%RBP),%R10 |
0x4442e5 MOV -0x48(%RBP),%R12 |
0x4442e9 MOV -0xb8(%RBP),%RAX |
0x4442f0 MOV (%RAX,%RSI,8),%RDI |
0x4442f4 MOV 0x8(%RAX,%RSI,8),%RSI |
0x4442f9 MOV %RSI,%R8 |
0x4442fc SUB %RDI,%R8 |
0x4442ff JLE 444140 |
0x444305 CMP $0x4,%R8 |
0x444309 JAE 444310 |
0x44430b JMP 4443ee |
0x444310 MOV %R8,%R9 |
0x444313 SHR $0x2,%R9 |
0x444317 MOV %R10,%RAX |
0x44431a LEA 0x18(,%RDI,8),%R10 |
0x444322 NOPW %CS:(%RAX,%RAX,1) |
(974) 0x444330 MOV -0x18(%RAX,%R10,1),%R11 |
(974) 0x444335 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(974) 0x44433c VMOVSD (%RBX,%RDX,8),%XMM3 |
(974) 0x444341 MOV (%R15,%R11,8),%R11 |
(974) 0x444345 VMOVSD (%R13,%R11,8),%XMM4 |
(974) 0x44434c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x444351 VMOVSD %XMM4,(%R13,%R11,8) |
(974) 0x444358 MOV -0x10(%RAX,%R10,1),%R11 |
(974) 0x44435d VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(974) 0x444364 VMOVSD (%RBX,%RDX,8),%XMM5 |
(974) 0x444369 MOV (%R15,%R11,8),%R11 |
(974) 0x44436d VMOVSD (%R13,%R11,8),%XMM6 |
(974) 0x444374 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(974) 0x444379 VMOVSD %XMM6,(%R13,%R11,8) |
(974) 0x444380 MOV -0x8(%RAX,%R10,1),%R11 |
(974) 0x444385 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(974) 0x44438c VMOVSD (%RBX,%RDX,8),%XMM7 |
(974) 0x444391 MOV (%R15,%R11,8),%R11 |
(974) 0x444395 VMOVSD (%R13,%R11,8),%XMM8 |
(974) 0x44439c VFMADD231SD %XMM6,%XMM7,%XMM8 |
(974) 0x4443a1 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x4443a8 VMOVSD (%R13,%R10,1),%XMM8 |
(974) 0x4443af VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(974) 0x4443b4 MOV (%RAX,%R10,1),%R11 |
(974) 0x4443b8 MOV (%R15,%R11,8),%R11 |
(974) 0x4443bc VADDSD (%R13,%R11,8),%XMM10,%XMM8 |
(974) 0x4443c3 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x4443ca VFMADD213SD %XMM10,%XMM5,%XMM4 |
(974) 0x4443cf VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x4443d4 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(974) 0x4443d9 VADDSD %XMM1,%XMM4,%XMM1 |
(974) 0x4443dd VADDSD %XMM0,%XMM4,%XMM0 |
(974) 0x4443e1 ADD $0x20,%R10 |
(974) 0x4443e5 DEC %R9 |
(974) 0x4443e8 JNE 444330 |
0x4443ee MOV %R8,%R9 |
0x4443f1 AND $-0x4,%R9 |
0x4443f5 CMP %R8,%R9 |
0x4443f8 JAE 444447 |
0x4443fa ADD %R9,%RDI |
0x4443fd MOV -0x40(%RBP),%R10 |
0x444401 MOV -0x50(%RBP),%RAX |
0x444405 MOV -0x30(%RBP),%R9 |
0x444409 MOV -0x80(%RBP),%R11 |
0x44440d NOPL (%RAX) |
(975) 0x444410 MOV (%R10,%RDI,8),%R8 |
(975) 0x444414 VMOVSD (%R13,%RDI,8),%XMM2 |
(975) 0x44441b VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(975) 0x444420 MOV (%R15,%R8,8),%R8 |
(975) 0x444424 VADDSD (%R13,%R8,8),%XMM10,%XMM2 |
(975) 0x44442b VMOVSD %XMM2,(%R13,%R8,8) |
(975) 0x444432 VADDSD %XMM1,%XMM10,%XMM1 |
(975) 0x444436 VADDSD %XMM0,%XMM10,%XMM0 |
(975) 0x44443a INC %RDI |
(975) 0x44443d CMP %RDI,%RSI |
(975) 0x444440 JNE 444410 |
0x444442 JMP 444148 |
0x444447 MOV -0x40(%RBP),%R10 |
0x44444b MOV -0x50(%RBP),%RAX |
0x44444f MOV -0x30(%RBP),%R9 |
0x444453 MOV -0x80(%RBP),%R11 |
0x444457 JMP 444148 |
/home/eoseret/qaas_runs_CPU_9468/171-112-7443/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1837 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.19 |
CQA speedup if FP arith vectorized | 2.43 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1824-1824,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.17 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 5.42 |
CQA cycles if fully vectorized | 1.65 |
Front-end cycles | 13.17 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 10.67 |
P2 cycles | 10.67 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 10.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 13.87 |
Stall cycles (UFS) | 0.49 |
Nb insns | 76.00 |
Nb uops | 76.00 |
Nb loads | 32.00 |
Nb stores | 0.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.08 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 19.44 |
Bytes prefetched | 0.00 |
Bytes loaded | 256.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.19 |
CQA speedup if FP arith vectorized | 2.43 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1811-1816,par_multi_interp.c:1824-1824,par_multi_interp.c:1835-1837 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.17 |
CQA cycles if no scalar integer | 6.00 |
CQA cycles if FP arith vectorized | 5.42 |
CQA cycles if fully vectorized | 1.65 |
Front-end cycles | 13.17 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 5.30 |
P1 cycles | 10.67 |
P2 cycles | 10.67 |
P3 cycles | 0.00 |
P4 cycles | 5.30 |
P5 cycles | 6.50 |
P6 cycles | 0.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 5.40 |
P10 cycles | 10.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 13.87 |
Stall cycles (UFS) | 0.49 |
Nb insns | 76.00 |
Nb uops | 76.00 |
Nb loads | 32.00 |
Nb stores | 0.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.08 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 19.44 |
Bytes prefetched | 0.00 |
Bytes loaded | 256.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | exec |
nb instructions | 76 |
nb uops | 76 |
loop length | 328 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 10.67 | 10.67 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.67 |
cycles | 6.50 | 5.30 | 10.67 | 10.67 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.87 |
Stall cycles | 0.49 |
LM full (events) | 1.32 |
Front-end | 13.17 |
Dispatch | 10.67 |
Overall L1 | 13.17 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 444460 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 444190 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4442e9 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa19> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4441c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444295 <hypre_BoomerAMGBuildMultipass.extracted.28+0x9c5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xf0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4441b9 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd0(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4442e1 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa11> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R10,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4442e9 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa19> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444140 <hypre_BoomerAMGBuildMultipass.extracted.28+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 444310 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4443ee <hypre_BoomerAMGBuildMultipass.extracted.28+0xb1e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x18(,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 444447 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb77> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1837 |
Module | exec |
nb instructions | 76 |
nb uops | 76 |
loop length | 328 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 5.30 | 10.67 | 10.67 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.67 |
cycles | 6.50 | 5.30 | 10.67 | 10.67 | 0.00 | 5.30 | 6.50 | 0.00 | 0.00 | 0.00 | 5.40 | 10.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.87 |
Stall cycles | 0.49 |
LM full (events) | 1.32 |
Front-end | 13.17 |
Dispatch | 10.67 |
Overall L1 | 13.17 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 444460 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x170(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 444190 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4442e9 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa19> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4441c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444295 <hypre_BoomerAMGBuildMultipass.extracted.28+0x9c5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x158(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $-0x3,(%RDI,%RSI,8) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPQ $0x1,-0xf0(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4441b9 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd0(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP (%R8,%RSI,8),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4442e1 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa11> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R10,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4442e9 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa19> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RSI,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444140 <hypre_BoomerAMGBuildMultipass.extracted.28+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 444310 <hypre_BoomerAMGBuildMultipass.extracted.28+0xa40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4443ee <hypre_BoomerAMGBuildMultipass.extracted.28+0xb1e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x18(,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 444447 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb77> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 444148 <hypre_BoomerAMGBuildMultipass.extracted.28+0x878> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |