Function: hypre_qsort0 | Module: exec | Source: hypre_qsort.c:31-187 [...] | Coverage: 0.01% |
---|
Function: hypre_qsort0 | Module: exec | Source: hypre_qsort.c:31-187 [...] | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-147-2675/intel/AMG/build/AMG/AMG/utilities/hypre_qsort.c: 31 - 187 |
-------------------------------------------------------------------------------- |
31: temp = v[i]; |
32: v[i] = v[j]; |
33: v[j] = temp; |
[...] |
175: if (left >= right) |
176: return; |
177: hypre_swap( v, left, (left+right)/2); |
178: last = left; |
179: for (i = left+1; i <= right; i++) |
180: if (v[i] < v[left]) |
181: { |
182: hypre_swap(v, ++last, i); |
183: } |
184: hypre_swap(v, left, last); |
185: hypre_qsort0(v, left, last-1); |
186: hypre_qsort0(v, last+1, right); |
187: } |
0x4e7a70 CMP %RDX,%RSI |
0x4e7a73 JGE 4e7c1a |
0x4e7a79 PUSH %RBP |
0x4e7a7a MOV %RSP,%RBP |
0x4e7a7d PUSH %R15 |
0x4e7a7f PUSH %R14 |
0x4e7a81 PUSH %R12 |
0x4e7a83 PUSH %RBX |
0x4e7a84 MOV %RDX,%RBX |
0x4e7a87 MOV %RDI,%R14 |
0x4e7a8a LEA 0x20(%RDI),%R15 |
0x4e7a8e MOV %RSI,%R12 |
0x4e7a91 JMP 4e7acc |
0x4e7a93 NOPW %CS:(%RAX,%RAX,1) |
(4441) 0x4e7aa0 MOV (%R14,%RSI,8),%RAX |
(4441) 0x4e7aa4 MOV (%R14,%R12,8),%RCX |
(4441) 0x4e7aa8 MOV %RCX,(%R14,%RSI,8) |
(4441) 0x4e7aac MOV %RAX,(%R14,%R12,8) |
(4441) 0x4e7ab0 LEA -0x1(%R12),%RDX |
(4441) 0x4e7ab5 MOV %R14,%RDI |
(4441) 0x4e7ab8 CALL 4e7a70 <hypre_qsort0> |
(4441) 0x4e7abd INC %R12 |
(4441) 0x4e7ac0 MOV %R12,%RSI |
(4441) 0x4e7ac3 CMP %RBX,%R12 |
(4441) 0x4e7ac6 JGE 4e7c12 |
(4441) 0x4e7acc LEA (%RSI,%RBX,1),%RAX |
(4441) 0x4e7ad0 MOV %RAX,%RCX |
(4441) 0x4e7ad3 SHR $0x3f,%RCX |
(4441) 0x4e7ad7 ADD %RAX,%RCX |
(4441) 0x4e7ada MOV (%R14,%RSI,8),%RAX |
(4441) 0x4e7ade AND $-0x2,%RCX |
(4441) 0x4e7ae2 MOV (%R14,%RCX,4),%RDX |
(4441) 0x4e7ae6 MOV %RDX,(%R14,%RSI,8) |
(4441) 0x4e7aea MOV %RAX,(%R14,%RCX,4) |
(4441) 0x4e7aee LEA 0x1(%RSI),%RAX |
(4441) 0x4e7af2 CMP %RAX,%RBX |
(4441) 0x4e7af5 CMOVG %RBX,%RAX |
(4441) 0x4e7af9 MOV %RAX,%RCX |
(4441) 0x4e7afc SUB %RSI,%RCX |
(4441) 0x4e7aff CMP $0x4,%RCX |
(4441) 0x4e7b03 JAE 4e7b50 |
(4441) 0x4e7b05 MOV %RCX,%RDX |
(4441) 0x4e7b08 AND $-0x4,%RDX |
(4441) 0x4e7b0c CMP %RCX,%RDX |
(4441) 0x4e7b0f JAE 4e7aa0 |
(4441) 0x4e7b11 ADD %RSI,%RDX |
(4441) 0x4e7b14 JMP 4e7b2c |
0x4e7b16 NOPW %CS:(%RAX,%RAX,1) |
(4442) 0x4e7b20 INC %RDX |
(4442) 0x4e7b23 CMP %RDX,%RAX |
(4442) 0x4e7b26 JE 4e7aa0 |
(4442) 0x4e7b2c MOV 0x8(%R14,%RDX,8),%RCX |
(4442) 0x4e7b31 CMP (%R14,%RSI,8),%RCX |
(4442) 0x4e7b35 JGE 4e7b20 |
(4442) 0x4e7b37 MOV 0x8(%R14,%R12,8),%RDI |
(4442) 0x4e7b3c MOV %RCX,0x8(%R14,%R12,8) |
(4442) 0x4e7b41 INC %R12 |
(4442) 0x4e7b44 MOV %RDI,0x8(%R14,%RDX,8) |
(4442) 0x4e7b49 JMP 4e7b20 |
0x4e7b4b NOPL (%RAX,%RAX,1) |
(4441) 0x4e7b50 MOV %RCX,%RDX |
(4441) 0x4e7b53 SHR $0x2,%RDX |
(4441) 0x4e7b57 LEA (%R15,%RSI,8),%RDI |
(4441) 0x4e7b5b MOV %RSI,%R12 |
(4441) 0x4e7b5e JMP 4e7b69 |
(4443) 0x4e7b60 ADD $0x20,%RDI |
(4443) 0x4e7b64 DEC %RDX |
(4443) 0x4e7b67 JE 4e7b05 |
(4443) 0x4e7b69 MOV -0x18(%RDI),%R9 |
(4443) 0x4e7b6d MOV (%R14,%RSI,8),%R8 |
(4443) 0x4e7b71 CMP %R8,%R9 |
(4443) 0x4e7b74 JL 4e7ba0 |
(4443) 0x4e7b76 MOV -0x10(%RDI),%R9 |
(4443) 0x4e7b7a CMP %R8,%R9 |
(4443) 0x4e7b7d JL 4e7bbe |
(4443) 0x4e7b7f MOV -0x8(%RDI),%R9 |
(4443) 0x4e7b83 CMP %R8,%R9 |
(4443) 0x4e7b86 JL 4e7bdc |
(4443) 0x4e7b88 MOV (%RDI),%R9 |
(4443) 0x4e7b8b CMP %R8,%R9 |
(4443) 0x4e7b8e JGE 4e7b60 |
(4443) 0x4e7b90 JMP 4e7bfd |
0x4e7b92 NOPW %CS:(%RAX,%RAX,1) |
(4443) 0x4e7ba0 MOV 0x8(%R14,%R12,8),%R8 |
(4443) 0x4e7ba5 MOV %R9,0x8(%R14,%R12,8) |
(4443) 0x4e7baa INC %R12 |
(4443) 0x4e7bad MOV %R8,-0x18(%RDI) |
(4443) 0x4e7bb1 MOV (%R14,%RSI,8),%R8 |
(4443) 0x4e7bb5 MOV -0x10(%RDI),%R9 |
(4443) 0x4e7bb9 CMP %R8,%R9 |
(4443) 0x4e7bbc JGE 4e7b7f |
(4443) 0x4e7bbe MOV 0x8(%R14,%R12,8),%R8 |
(4443) 0x4e7bc3 MOV %R9,0x8(%R14,%R12,8) |
(4443) 0x4e7bc8 INC %R12 |
(4443) 0x4e7bcb MOV %R8,-0x10(%RDI) |
(4443) 0x4e7bcf MOV (%R14,%RSI,8),%R8 |
(4443) 0x4e7bd3 MOV -0x8(%RDI),%R9 |
(4443) 0x4e7bd7 CMP %R8,%R9 |
(4443) 0x4e7bda JGE 4e7b88 |
(4443) 0x4e7bdc MOV 0x8(%R14,%R12,8),%R8 |
(4443) 0x4e7be1 MOV %R9,0x8(%R14,%R12,8) |
(4443) 0x4e7be6 INC %R12 |
(4443) 0x4e7be9 MOV %R8,-0x8(%RDI) |
(4443) 0x4e7bed MOV (%R14,%RSI,8),%R8 |
(4443) 0x4e7bf1 MOV (%RDI),%R9 |
(4443) 0x4e7bf4 CMP %R8,%R9 |
(4443) 0x4e7bf7 JGE 4e7b60 |
(4443) 0x4e7bfd MOV 0x8(%R14,%R12,8),%R8 |
(4443) 0x4e7c02 MOV %R9,0x8(%R14,%R12,8) |
(4443) 0x4e7c07 INC %R12 |
(4443) 0x4e7c0a MOV %R8,(%RDI) |
(4443) 0x4e7c0d JMP 4e7b60 |
0x4e7c12 POP %RBX |
0x4e7c13 POP %R12 |
0x4e7c15 POP %R14 |
0x4e7c17 POP %R15 |
0x4e7c19 POP %RBP |
0x4e7c1a RET |
0x4e7c1b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | hypre_qsort.c:31-187 |
Module | exec |
nb instructions | 24 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
cycles | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 4.10-4.12 |
Stall cycles | 0.00 |
Front-end | 4.00 |
Dispatch | 2.50 |
Overall L1 | 4.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4e7c1a <hypre_qsort0+0x1aa> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x20(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4e7acc <hypre_qsort0+0x5c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | hypre_qsort.c:31-187 |
Module | exec |
nb instructions | 24 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
cycles | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 4.10-4.12 |
Stall cycles | 0.00 |
Front-end | 4.00 |
Dispatch | 2.50 |
Overall L1 | 4.00 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4e7c1a <hypre_qsort0+0x1aa> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x20(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4e7acc <hypre_qsort0+0x5c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort0– | 0.01 | 0 |
▼Loop 4441 - hypre_qsort.c:31-186 - exec– | 0 | 0 |
○Loop 4442 - hypre_qsort.c:31-182 - exec | 0 | 0 |
○Loop 4443 - hypre_qsort.c:31-182 - exec | 0 | 0.03 |