Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2311-2320 | Coverage: 0.01% |
---|
Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2311-2320 | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 2311 - 2320 |
-------------------------------------------------------------------------------- |
2311: for (i=0; i < num_var; i++) |
2312: { |
2313: if (CF_marker[i] > 0 ) |
2314: { |
2315: if (CF_marker[i] == 1) CF_marker[i] = new_CF_marker[cnt++]; |
2316: else { CF_marker[i] = 1; cnt++;} |
2317: } |
2318: } |
2319: |
2320: return 0; |
0x48d400 TEST %RSI,%RSI |
0x48d403 JLE 48d4e1 |
0x48d409 CMP $0x4,%RSI |
0x48d40d JAE 48d426 |
0x48d40f XOR %EAX,%EAX |
0x48d411 MOV %RSI,%RCX |
0x48d414 AND $-0x4,%RCX |
0x48d418 CMP %RSI,%RCX |
0x48d41b JB 48d4ff |
0x48d421 JMP 48d4e1 |
0x48d426 PUSH %RBP |
0x48d427 MOV %RSP,%RBP |
0x48d42a MOV %RSI,%RCX |
0x48d42d SHR $0x2,%RCX |
0x48d431 LEA 0x18(%RDI),%R8 |
0x48d435 XOR %EAX,%EAX |
0x48d437 JMP 48d453 |
0x48d439 NOPL (%RAX) |
(2443) 0x48d440 INC %RAX |
(2443) 0x48d443 MOV %R9,(%R8) |
(2443) 0x48d446 ADD $0x20,%R8 |
(2443) 0x48d44a DEC %RCX |
(2443) 0x48d44d JE 48d4d4 |
(2443) 0x48d453 MOV -0x18(%R8),%R10 |
(2443) 0x48d457 TEST %R10,%R10 |
(2443) 0x48d45a JLE 48d473 |
(2443) 0x48d45c MOV $0x1,%R9D |
(2443) 0x48d462 CMP $0x1,%R10 |
(2443) 0x48d466 JNE 48d46c |
(2443) 0x48d468 MOV (%RDX,%RAX,8),%R9 |
(2443) 0x48d46c INC %RAX |
(2443) 0x48d46f MOV %R9,-0x18(%R8) |
(2443) 0x48d473 MOV -0x10(%R8),%R10 |
(2443) 0x48d477 TEST %R10,%R10 |
(2443) 0x48d47a JLE 48d493 |
(2443) 0x48d47c MOV $0x1,%R9D |
(2443) 0x48d482 CMP $0x1,%R10 |
(2443) 0x48d486 JNE 48d48c |
(2443) 0x48d488 MOV (%RDX,%RAX,8),%R9 |
(2443) 0x48d48c INC %RAX |
(2443) 0x48d48f MOV %R9,-0x10(%R8) |
(2443) 0x48d493 MOV -0x8(%R8),%R10 |
(2443) 0x48d497 TEST %R10,%R10 |
(2443) 0x48d49a JLE 48d4b3 |
(2443) 0x48d49c MOV $0x1,%R9D |
(2443) 0x48d4a2 CMP $0x1,%R10 |
(2443) 0x48d4a6 JNE 48d4ac |
(2443) 0x48d4a8 MOV (%RDX,%RAX,8),%R9 |
(2443) 0x48d4ac INC %RAX |
(2443) 0x48d4af MOV %R9,-0x8(%R8) |
(2443) 0x48d4b3 MOV (%R8),%R10 |
(2443) 0x48d4b6 TEST %R10,%R10 |
(2443) 0x48d4b9 JLE 48d446 |
(2443) 0x48d4bb MOV $0x1,%R9D |
(2443) 0x48d4c1 CMP $0x1,%R10 |
(2443) 0x48d4c5 JNE 48d440 |
(2443) 0x48d4cb MOV (%RDX,%RAX,8),%R9 |
(2443) 0x48d4cf JMP 48d440 |
0x48d4d4 POP %RBP |
0x48d4d5 MOV %RSI,%RCX |
0x48d4d8 AND $-0x4,%RCX |
0x48d4dc CMP %RSI,%RCX |
0x48d4df JB 48d4ff |
0x48d4e1 XOR %EAX,%EAX |
0x48d4e3 RET |
0x48d4e4 NOPW %CS:(%RAX,%RAX,1) |
(2442) 0x48d4f0 INC %RAX |
(2442) 0x48d4f3 MOV %R8,(%RDI,%RCX,8) |
(2442) 0x48d4f7 INC %RCX |
(2442) 0x48d4fa CMP %RCX,%RSI |
(2442) 0x48d4fd JE 48d4e1 |
(2442) 0x48d4ff MOV (%RDI,%RCX,8),%R9 |
(2442) 0x48d503 TEST %R9,%R9 |
(2442) 0x48d506 JLE 48d4f7 |
(2442) 0x48d508 MOV $0x1,%R8D |
(2442) 0x48d50e CMP $0x1,%R9 |
(2442) 0x48d512 JNE 48d4f0 |
(2442) 0x48d514 MOV (%RDX,%RAX,8),%R8 |
(2442) 0x48d518 JMP 48d4f0 |
0x48d51a NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | par_strength.c:2311-2320 |
Module | exec |
nb instructions | 27 |
nb uops | 27 |
loop length | 98 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 4.50 cycles |
front end | 4.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.00 | 0.67 | 0.67 | 0.50 | 2.00 | 3.00 | 0.50 | 0.50 | 0.50 | 2.00 | 0.67 |
cycles | 3.00 | 2.00 | 0.67 | 0.67 | 0.50 | 2.00 | 3.00 | 0.50 | 0.50 | 0.50 | 2.00 | 0.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 4.59 |
Stall cycles | 0.00 |
Front-end | 4.50 |
Dispatch | 3.00 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 48d4e1 <hypre_BoomerAMGCorrectCFMarker+0xe1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 48d426 <hypre_BoomerAMGCorrectCFMarker+0x26> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 48d4ff <hypre_BoomerAMGCorrectCFMarker+0xff> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 48d4e1 <hypre_BoomerAMGCorrectCFMarker+0xe1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(%RDI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 48d453 <hypre_BoomerAMGCorrectCFMarker+0x53> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 48d4ff <hypre_BoomerAMGCorrectCFMarker+0xff> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_strength.c:2311-2320 |
Module | exec |
nb instructions | 27 |
nb uops | 27 |
loop length | 98 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 4.50 cycles |
front end | 4.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.00 | 0.67 | 0.67 | 0.50 | 2.00 | 3.00 | 0.50 | 0.50 | 0.50 | 2.00 | 0.67 |
cycles | 3.00 | 2.00 | 0.67 | 0.67 | 0.50 | 2.00 | 3.00 | 0.50 | 0.50 | 0.50 | 2.00 | 0.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 4.59 |
Stall cycles | 0.00 |
Front-end | 4.50 |
Dispatch | 3.00 |
Overall L1 | 4.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 48d4e1 <hypre_BoomerAMGCorrectCFMarker+0xe1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 48d426 <hypre_BoomerAMGCorrectCFMarker+0x26> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 48d4ff <hypre_BoomerAMGCorrectCFMarker+0xff> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 48d4e1 <hypre_BoomerAMGCorrectCFMarker+0xe1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x18(%RDI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 48d453 <hypre_BoomerAMGCorrectCFMarker+0x53> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 48d4ff <hypre_BoomerAMGCorrectCFMarker+0xff> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_BoomerAMGCorrectCFMarker– | 0.01 | 0 |
○Loop 2443 - par_strength.c:2311-2315 - exec | 0.01 | 0.1 |
○Loop 2442 - par_strength.c:2311-2315 - exec | 0 | 0 |