Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-7443/intel/AMG/build/AMG/AMG/parcsr_ls/par_interp.c: 3180 - 3192 |
-------------------------------------------------------------------------------- |
3180: if (left >= right) |
3181: return; |
3182: hypre_swap2( v, w, left, (left+right)/2); |
3183: last = left; |
3184: for (i = left+1; i <= right; i++) |
3185: if (fabs(w[i]) > fabs(w[left])) |
3186: { |
3187: hypre_swap2(v, w, ++last, i); |
3188: } |
3189: hypre_swap2(v, w, left, last); |
3190: hypre_qsort2abs(v, w, left, last-1); |
3191: hypre_qsort2abs(v, w, last+1, right); |
3192: } |
0x43a1a0 CMP %RCX,%RDX |
0x43a1a3 JGE 43a64e |
0x43a1a9 PUSH %RBP |
0x43a1aa MOV %RSP,%RBP |
0x43a1ad PUSH %R15 |
0x43a1af PUSH %R14 |
0x43a1b1 PUSH %R13 |
0x43a1b3 PUSH %R12 |
0x43a1b5 PUSH %RBX |
0x43a1b6 SUB $0x38,%RSP |
0x43a1ba MOV %RCX,%R15 |
0x43a1bd MOV %RDX,%R12 |
0x43a1c0 MOV %RSI,%R14 |
0x43a1c3 MOV %RDI,%RBX |
0x43a1c6 VMOVDDUP 0xf0f9a(%RIP),%XMM0 |
0x43a1ce VMOVUPD %XMM0,-0x60(%RBP) |
0x43a1d3 MOV %RDX,%R13 |
0x43a1d6 MOV %RCX,-0x38(%RBP) |
0x43a1da MOV %RDI,-0x30(%RBP) |
0x43a1de JMP 43a23a |
0x43a1e0 NOPW %CS:(%RAX,%RAX,1) |
0x43a1ef NOPW %CS:(%RAX,%RAX,1) |
0x43a1fe XCHG %AX,%AX |
(574) 0x43a200 MOV -0x38(%RBP),%R15 |
(574) 0x43a204 MOV -0x30(%RBP),%RBX |
(574) 0x43a208 MOV %RBX,%RDI |
(574) 0x43a20b MOV %R14,%RSI |
(574) 0x43a20e MOV %R12,%RDX |
(574) 0x43a211 MOV %R13,%RCX |
(574) 0x43a214 CALL 51bd60 <hypre_swap2> |
(574) 0x43a219 LEA -0x1(%R13),%RCX |
(574) 0x43a21d MOV %RBX,%RDI |
(574) 0x43a220 MOV %R14,%RSI |
(574) 0x43a223 MOV %R12,%RDX |
(574) 0x43a226 CALL 43a1a0 <hypre_qsort2abs> |
(574) 0x43a22b INC %R13 |
(574) 0x43a22e MOV %R13,%R12 |
(574) 0x43a231 CMP %R15,%R13 |
(574) 0x43a234 JGE 43a640 |
(574) 0x43a23a LEA (%R12,%R15,1),%RAX |
(574) 0x43a23e MOV %RAX,%RCX |
(574) 0x43a241 SHR $0x3f,%RCX |
(574) 0x43a245 ADD %RAX,%RCX |
(574) 0x43a248 SAR $0x1,%RCX |
(574) 0x43a24b MOV %RBX,%RDI |
(574) 0x43a24e MOV %R14,%RSI |
(574) 0x43a251 MOV %R12,%RDX |
(574) 0x43a254 CALL 51bd60 <hypre_swap2> |
(574) 0x43a259 LEA 0x1(%R12),%RAX |
(574) 0x43a25e CMP %RAX,%R15 |
(574) 0x43a261 CMOVG %R15,%RAX |
(574) 0x43a265 MOV %RAX,-0x48(%RBP) |
(574) 0x43a269 SUB %R12,%RAX |
(574) 0x43a26c MOV %RAX,-0x40(%RBP) |
(574) 0x43a270 CMP $0x8,%RAX |
(574) 0x43a274 JAE 43a340 |
(574) 0x43a27a MOV -0x40(%RBP),%RCX |
(574) 0x43a27e MOV %RCX,%RAX |
(574) 0x43a281 AND $-0x8,%RAX |
(574) 0x43a285 CMP %RCX,%RAX |
(574) 0x43a288 JAE 43a200 |
(574) 0x43a28e LEA 0x1(%R12,%RAX,1),%RBX |
(574) 0x43a293 NEGQ -0x48(%RBP) |
(574) 0x43a297 MOV -0x38(%RBP),%R15 |
(574) 0x43a29b JMP 43a2d6 |
0x43a29d NOPW %CS:(%RAX,%RAX,1) |
0x43a2ac NOPW %CS:(%RAX,%RAX,1) |
0x43a2bb NOPL (%RAX,%RAX,1) |
(575) 0x43a2c0 MOV -0x48(%RBP),%RAX |
(575) 0x43a2c4 LEA 0x1(%RAX,%RBX,1),%RAX |
(575) 0x43a2c9 INC %RBX |
(575) 0x43a2cc CMP $0x1,%RAX |
(575) 0x43a2d0 JE 43a204 |
(575) 0x43a2d6 VMOVSD (%R14,%RBX,8),%XMM0 |
(575) 0x43a2dc VMOVUPD -0x60(%RBP),%XMM2 |
(575) 0x43a2e1 VANDPD %XMM2,%XMM0,%XMM0 |
(575) 0x43a2e5 VMOVSD (%R14,%R12,8),%XMM1 |
(575) 0x43a2eb VANDPD %XMM2,%XMM1,%XMM1 |
(575) 0x43a2ef VUCOMISD %XMM1,%XMM0 |
(575) 0x43a2f3 JBE 43a2c0 |
(575) 0x43a2f5 INC %R13 |
(575) 0x43a2f8 MOV -0x30(%RBP),%RDI |
(575) 0x43a2fc MOV %R14,%RSI |
(575) 0x43a2ff MOV %R13,%RDX |
(575) 0x43a302 MOV %RBX,%RCX |
(575) 0x43a305 CALL 51bd60 <hypre_swap2> |
(575) 0x43a30a JMP 43a2c0 |
0x43a30c NOPW %CS:(%RAX,%RAX,1) |
0x43a31b NOPW %CS:(%RAX,%RAX,1) |
0x43a32a NOPW %CS:(%RAX,%RAX,1) |
0x43a339 NOPL (%RAX) |
(574) 0x43a340 MOV -0x40(%RBP),%R15 |
(574) 0x43a344 SHR $0x3,%R15 |
(574) 0x43a348 LEA 0x8(%R12),%RBX |
(574) 0x43a34d MOV %R12,%R13 |
(574) 0x43a350 JMP 43a38d |
0x43a352 NOPW %CS:(%RAX,%RAX,1) |
0x43a361 NOPW %CS:(%RAX,%RAX,1) |
0x43a370 NOPW %CS:(%RAX,%RAX,1) |
0x43a37f NOP |
(576) 0x43a380 ADD $0x8,%RBX |
(576) 0x43a384 DEC %R15 |
(576) 0x43a387 JE 43a27a |
(576) 0x43a38d VMOVSD -0x38(%R14,%RBX,8),%XMM0 |
(576) 0x43a394 VMOVUPD -0x60(%RBP),%XMM2 |
(576) 0x43a399 VANDPD %XMM2,%XMM0,%XMM1 |
(576) 0x43a39d VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a3a3 VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a3a7 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a3ab JBE 43a400 |
(576) 0x43a3ad LEA -0x7(%RBX),%RCX |
(576) 0x43a3b1 INC %R13 |
(576) 0x43a3b4 MOV -0x30(%RBP),%RDI |
(576) 0x43a3b8 MOV %R14,%RSI |
(576) 0x43a3bb MOV %R13,%RDX |
(576) 0x43a3be CALL 51bd60 <hypre_swap2> |
(576) 0x43a3c3 VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a3c9 VMOVDDUP 0xf0d97(%RIP),%XMM2 |
(576) 0x43a3d1 VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a3d5 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(576) 0x43a3dc VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a3e0 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a3e4 JA 43a419 |
(576) 0x43a3e6 JMP 43a441 |
0x43a3e8 NOPW %CS:(%RAX,%RAX,1) |
0x43a3f7 NOPW (%RAX,%RAX,1) |
(576) 0x43a400 VMOVDDUP 0xf0d60(%RIP),%XMM2 |
(576) 0x43a408 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(576) 0x43a40f VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a413 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a417 JBE 43a441 |
(576) 0x43a419 LEA -0x6(%RBX),%RCX |
(576) 0x43a41d INC %R13 |
(576) 0x43a420 MOV -0x30(%RBP),%RDI |
(576) 0x43a424 MOV %R14,%RSI |
(576) 0x43a427 MOV %R13,%RDX |
(576) 0x43a42a CALL 51bd60 <hypre_swap2> |
(576) 0x43a42f VMOVDDUP 0xf0d31(%RIP),%XMM2 |
(576) 0x43a437 VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a43d VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a441 VMOVSD -0x28(%R14,%RBX,8),%XMM1 |
(576) 0x43a448 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a44c VUCOMISD %XMM0,%XMM1 |
(576) 0x43a450 JA 43a4c0 |
(576) 0x43a452 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(576) 0x43a459 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a45d VUCOMISD %XMM0,%XMM1 |
(576) 0x43a461 JA 43a4fd |
(576) 0x43a467 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(576) 0x43a46e VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a472 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a476 JA 43a53a |
(576) 0x43a47c VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(576) 0x43a483 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a487 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a48b JA 43a577 |
(576) 0x43a491 VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(576) 0x43a498 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a49c VUCOMISD %XMM0,%XMM1 |
(576) 0x43a4a0 JA 43a5b4 |
(576) 0x43a4a6 VMOVSD (%R14,%RBX,8),%XMM1 |
(576) 0x43a4ac VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a4b0 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a4b4 JBE 43a380 |
(576) 0x43a4ba JMP 43a5f0 |
0x43a4bf NOP |
(576) 0x43a4c0 LEA -0x5(%RBX),%RCX |
(576) 0x43a4c4 INC %R13 |
(576) 0x43a4c7 MOV -0x30(%RBP),%RDI |
(576) 0x43a4cb MOV %R14,%RSI |
(576) 0x43a4ce MOV %R13,%RDX |
(576) 0x43a4d1 CALL 51bd60 <hypre_swap2> |
(576) 0x43a4d6 VMOVDDUP 0xf0c8a(%RIP),%XMM2 |
(576) 0x43a4de VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a4e4 VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a4e8 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(576) 0x43a4ef VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a4f3 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a4f7 JBE 43a467 |
(576) 0x43a4fd LEA -0x4(%RBX),%RCX |
(576) 0x43a501 INC %R13 |
(576) 0x43a504 MOV -0x30(%RBP),%RDI |
(576) 0x43a508 MOV %R14,%RSI |
(576) 0x43a50b MOV %R13,%RDX |
(576) 0x43a50e CALL 51bd60 <hypre_swap2> |
(576) 0x43a513 VMOVDDUP 0xf0c4d(%RIP),%XMM2 |
(576) 0x43a51b VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a521 VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a525 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(576) 0x43a52c VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a530 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a534 JBE 43a47c |
(576) 0x43a53a LEA -0x3(%RBX),%RCX |
(576) 0x43a53e INC %R13 |
(576) 0x43a541 MOV -0x30(%RBP),%RDI |
(576) 0x43a545 MOV %R14,%RSI |
(576) 0x43a548 MOV %R13,%RDX |
(576) 0x43a54b CALL 51bd60 <hypre_swap2> |
(576) 0x43a550 VMOVDDUP 0xf0c10(%RIP),%XMM2 |
(576) 0x43a558 VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a55e VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a562 VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(576) 0x43a569 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a56d VUCOMISD %XMM0,%XMM1 |
(576) 0x43a571 JBE 43a491 |
(576) 0x43a577 LEA -0x2(%RBX),%RCX |
(576) 0x43a57b INC %R13 |
(576) 0x43a57e MOV -0x30(%RBP),%RDI |
(576) 0x43a582 MOV %R14,%RSI |
(576) 0x43a585 MOV %R13,%RDX |
(576) 0x43a588 CALL 51bd60 <hypre_swap2> |
(576) 0x43a58d VMOVDDUP 0xf0bd3(%RIP),%XMM2 |
(576) 0x43a595 VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a59b VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a59f VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(576) 0x43a5a6 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a5aa VUCOMISD %XMM0,%XMM1 |
(576) 0x43a5ae JBE 43a4a6 |
(576) 0x43a5b4 LEA -0x1(%RBX),%RCX |
(576) 0x43a5b8 INC %R13 |
(576) 0x43a5bb MOV -0x30(%RBP),%RDI |
(576) 0x43a5bf MOV %R14,%RSI |
(576) 0x43a5c2 MOV %R13,%RDX |
(576) 0x43a5c5 CALL 51bd60 <hypre_swap2> |
(576) 0x43a5ca VMOVDDUP 0xf0b96(%RIP),%XMM2 |
(576) 0x43a5d2 VMOVSD (%R14,%R12,8),%XMM0 |
(576) 0x43a5d8 VANDPD %XMM2,%XMM0,%XMM0 |
(576) 0x43a5dc VMOVSD (%R14,%RBX,8),%XMM1 |
(576) 0x43a5e2 VANDPD %XMM2,%XMM1,%XMM1 |
(576) 0x43a5e6 VUCOMISD %XMM0,%XMM1 |
(576) 0x43a5ea JBE 43a380 |
(576) 0x43a5f0 INC %R13 |
(576) 0x43a5f3 MOV -0x30(%RBP),%RDI |
(576) 0x43a5f7 MOV %R14,%RSI |
(576) 0x43a5fa MOV %R13,%RDX |
(576) 0x43a5fd MOV %RBX,%RCX |
(576) 0x43a600 CALL 51bd60 <hypre_swap2> |
(576) 0x43a605 JMP 43a380 |
0x43a60a NOPW %CS:(%RAX,%RAX,1) |
0x43a619 NOPW %CS:(%RAX,%RAX,1) |
0x43a628 NOPW %CS:(%RAX,%RAX,1) |
0x43a637 NOPW (%RAX,%RAX,1) |
0x43a640 ADD $0x38,%RSP |
0x43a644 POP %RBX |
0x43a645 POP %R12 |
0x43a647 POP %R13 |
0x43a649 POP %R14 |
0x43a64b POP %R15 |
0x43a64d POP %RBP |
0x43a64e RET |
0x43a64f NOP |
Path / |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 50 |
nb uops | 50 |
loop length | 324 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 8.33 cycles |
front end | 8.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 8.44 |
Stall cycles | 0.00 |
Front-end | 8.33 |
Dispatch | 4.50 |
Overall L1 | 8.33 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43a64e <hypre_qsort2abs+0x4ae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xf0f9a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43a23a <hypre_qsort2abs+0x9a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 50 |
nb uops | 50 |
loop length | 324 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 8.33 cycles |
front end | 8.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 8.44 |
Stall cycles | 0.00 |
Front-end | 8.33 |
Dispatch | 4.50 |
Overall L1 | 8.33 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43a64e <hypre_qsort2abs+0x4ae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xf0f9a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43a23a <hypre_qsort2abs+0x9a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort2abs– | 0.01 | 0 |
▼Loop 574 - par_interp.c:3180-3191 - exec– | 0 | 0 |
○Loop 576 - par_interp.c:3184-3187 - exec | 0 | 0 |
○Loop 575 - par_interp.c:3180-3187 - exec | 0 | 0 |