Function: hypre_qsort2abs | Module: libparcsr_ls.so | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
Function: hypre_qsort2abs | Module: libparcsr_ls.so | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/par_interp.c: 3180 - 3192 |
-------------------------------------------------------------------------------- |
3180: if (left >= right) |
3181: return; |
3182: hypre_swap2( v, w, left, (left+right)/2); |
3183: last = left; |
3184: for (i = left+1; i <= right; i++) |
3185: if (fabs(w[i]) > fabs(w[left])) |
3186: { |
3187: hypre_swap2(v, w, ++last, i); |
3188: } |
3189: hypre_swap2(v, w, left, last); |
3190: hypre_qsort2abs(v, w, left, last-1); |
3191: hypre_qsort2abs(v, w, last+1, right); |
3192: } |
0x59300 CMP %RCX,%RDX |
0x59303 JGE 5978e |
0x59309 PUSH %RBP |
0x5930a MOV %RSP,%RBP |
0x5930d PUSH %R15 |
0x5930f PUSH %R14 |
0x59311 PUSH %R13 |
0x59313 PUSH %R12 |
0x59315 PUSH %RBX |
0x59316 SUB $0x38,%RSP |
0x5931a MOV %RCX,%R15 |
0x5931d MOV %RDX,%R12 |
0x59320 MOV %RSI,%R14 |
0x59323 MOV %RDI,%RBX |
0x59326 VMOVDDUP 0x83ce2(%RIP),%XMM0 |
0x5932e VMOVUPD %XMM0,-0x60(%RBP) |
0x59333 MOV %RDX,%R13 |
0x59336 MOV %RCX,-0x38(%RBP) |
0x5933a MOV %RDI,-0x30(%RBP) |
0x5933e JMP 5937a |
(1173) 0x59340 MOV -0x38(%RBP),%R15 |
(1173) 0x59344 MOV -0x30(%RBP),%RBX |
(1173) 0x59348 MOV %RBX,%RDI |
(1173) 0x5934b MOV %R14,%RSI |
(1173) 0x5934e MOV %R12,%RDX |
(1173) 0x59351 MOV %R13,%RCX |
(1173) 0x59354 CALL e390 <hypre_swap2@plt> |
(1173) 0x59359 LEA -0x1(%R13),%RCX |
(1173) 0x5935d MOV %RBX,%RDI |
(1173) 0x59360 MOV %R14,%RSI |
(1173) 0x59363 MOV %R12,%RDX |
(1173) 0x59366 CALL e5a0 <hypre_qsort2abs@plt> |
(1173) 0x5936b INC %R13 |
(1173) 0x5936e MOV %R13,%R12 |
(1173) 0x59371 CMP %R15,%R13 |
(1173) 0x59374 JGE 59780 |
(1173) 0x5937a LEA (%R12,%R15,1),%RAX |
(1173) 0x5937e MOV %RAX,%RCX |
(1173) 0x59381 SHR $0x3f,%RCX |
(1173) 0x59385 ADD %RAX,%RCX |
(1173) 0x59388 SAR $0x1,%RCX |
(1173) 0x5938b MOV %RBX,%RDI |
(1173) 0x5938e MOV %R14,%RSI |
(1173) 0x59391 MOV %R12,%RDX |
(1173) 0x59394 CALL e390 <hypre_swap2@plt> |
(1173) 0x59399 LEA 0x1(%R12),%RAX |
(1173) 0x5939e CMP %RAX,%R15 |
(1173) 0x593a1 CMOVG %R15,%RAX |
(1173) 0x593a5 MOV %RAX,-0x48(%RBP) |
(1173) 0x593a9 SUB %R12,%RAX |
(1173) 0x593ac MOV %RAX,-0x40(%RBP) |
(1173) 0x593b0 CMP $0x8,%RAX |
(1173) 0x593b4 JAE 59480 |
(1173) 0x593ba MOV -0x40(%RBP),%RCX |
(1173) 0x593be MOV %RCX,%RAX |
(1173) 0x593c1 AND $-0x8,%RAX |
(1173) 0x593c5 CMP %RCX,%RAX |
(1173) 0x593c8 JAE 59340 |
(1173) 0x593ce LEA 0x1(%R12,%RAX,1),%RBX |
(1173) 0x593d3 NEGQ -0x48(%RBP) |
(1173) 0x593d7 MOV -0x38(%RBP),%R15 |
(1173) 0x593db JMP 59416 |
0x593dd NOPW %CS:(%RAX,%RAX,1) |
0x593ec NOPW %CS:(%RAX,%RAX,1) |
0x593fb NOPL (%RAX,%RAX,1) |
(1174) 0x59400 MOV -0x48(%RBP),%RAX |
(1174) 0x59404 LEA 0x1(%RAX,%RBX,1),%RAX |
(1174) 0x59409 INC %RBX |
(1174) 0x5940c CMP $0x1,%RAX |
(1174) 0x59410 JE 59344 |
(1174) 0x59416 VMOVSD (%R14,%RBX,8),%XMM0 |
(1174) 0x5941c VMOVUPD -0x60(%RBP),%XMM2 |
(1174) 0x59421 VANDPD %XMM2,%XMM0,%XMM0 |
(1174) 0x59425 VMOVSD (%R14,%R12,8),%XMM1 |
(1174) 0x5942b VANDPD %XMM2,%XMM1,%XMM1 |
(1174) 0x5942f VUCOMISD %XMM1,%XMM0 |
(1174) 0x59433 JBE 59400 |
(1174) 0x59435 INC %R13 |
(1174) 0x59438 MOV -0x30(%RBP),%RDI |
(1174) 0x5943c MOV %R14,%RSI |
(1174) 0x5943f MOV %R13,%RDX |
(1174) 0x59442 MOV %RBX,%RCX |
(1174) 0x59445 CALL e390 <hypre_swap2@plt> |
(1174) 0x5944a JMP 59400 |
0x5944c NOPW %CS:(%RAX,%RAX,1) |
0x5945b NOPW %CS:(%RAX,%RAX,1) |
0x5946a NOPW %CS:(%RAX,%RAX,1) |
0x59479 NOPL (%RAX) |
(1173) 0x59480 MOV -0x40(%RBP),%R15 |
(1173) 0x59484 SHR $0x3,%R15 |
(1173) 0x59488 LEA 0x8(%R12),%RBX |
(1173) 0x5948d MOV %R12,%R13 |
(1173) 0x59490 JMP 594cd |
0x59492 NOPW %CS:(%RAX,%RAX,1) |
0x594a1 NOPW %CS:(%RAX,%RAX,1) |
0x594b0 NOPW %CS:(%RAX,%RAX,1) |
0x594bf NOP |
(1175) 0x594c0 ADD $0x8,%RBX |
(1175) 0x594c4 DEC %R15 |
(1175) 0x594c7 JE 593ba |
(1175) 0x594cd VMOVSD -0x38(%R14,%RBX,8),%XMM0 |
(1175) 0x594d4 VMOVUPD -0x60(%RBP),%XMM2 |
(1175) 0x594d9 VANDPD %XMM2,%XMM0,%XMM1 |
(1175) 0x594dd VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x594e3 VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x594e7 VUCOMISD %XMM0,%XMM1 |
(1175) 0x594eb JBE 59540 |
(1175) 0x594ed LEA -0x7(%RBX),%RCX |
(1175) 0x594f1 INC %R13 |
(1175) 0x594f4 MOV -0x30(%RBP),%RDI |
(1175) 0x594f8 MOV %R14,%RSI |
(1175) 0x594fb MOV %R13,%RDX |
(1175) 0x594fe CALL e390 <hypre_swap2@plt> |
(1175) 0x59503 VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x59509 VMOVDDUP 0x83aff(%RIP),%XMM2 |
(1175) 0x59511 VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x59515 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(1175) 0x5951c VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x59520 VUCOMISD %XMM0,%XMM1 |
(1175) 0x59524 JA 59559 |
(1175) 0x59526 JMP 59581 |
0x59528 NOPW %CS:(%RAX,%RAX,1) |
0x59537 NOPW (%RAX,%RAX,1) |
(1175) 0x59540 VMOVDDUP 0x83ac8(%RIP),%XMM2 |
(1175) 0x59548 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(1175) 0x5954f VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x59553 VUCOMISD %XMM0,%XMM1 |
(1175) 0x59557 JBE 59581 |
(1175) 0x59559 LEA -0x6(%RBX),%RCX |
(1175) 0x5955d INC %R13 |
(1175) 0x59560 MOV -0x30(%RBP),%RDI |
(1175) 0x59564 MOV %R14,%RSI |
(1175) 0x59567 MOV %R13,%RDX |
(1175) 0x5956a CALL e390 <hypre_swap2@plt> |
(1175) 0x5956f VMOVDDUP 0x83a99(%RIP),%XMM2 |
(1175) 0x59577 VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x5957d VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x59581 VMOVSD -0x28(%R14,%RBX,8),%XMM1 |
(1175) 0x59588 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x5958c VUCOMISD %XMM0,%XMM1 |
(1175) 0x59590 JA 59600 |
(1175) 0x59592 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(1175) 0x59599 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x5959d VUCOMISD %XMM0,%XMM1 |
(1175) 0x595a1 JA 5963d |
(1175) 0x595a7 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(1175) 0x595ae VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x595b2 VUCOMISD %XMM0,%XMM1 |
(1175) 0x595b6 JA 5967a |
(1175) 0x595bc VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(1175) 0x595c3 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x595c7 VUCOMISD %XMM0,%XMM1 |
(1175) 0x595cb JA 596b7 |
(1175) 0x595d1 VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(1175) 0x595d8 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x595dc VUCOMISD %XMM0,%XMM1 |
(1175) 0x595e0 JA 596f4 |
(1175) 0x595e6 VMOVSD (%R14,%RBX,8),%XMM1 |
(1175) 0x595ec VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x595f0 VUCOMISD %XMM0,%XMM1 |
(1175) 0x595f4 JBE 594c0 |
(1175) 0x595fa JMP 59730 |
0x595ff NOP |
(1175) 0x59600 LEA -0x5(%RBX),%RCX |
(1175) 0x59604 INC %R13 |
(1175) 0x59607 MOV -0x30(%RBP),%RDI |
(1175) 0x5960b MOV %R14,%RSI |
(1175) 0x5960e MOV %R13,%RDX |
(1175) 0x59611 CALL e390 <hypre_swap2@plt> |
(1175) 0x59616 VMOVDDUP 0x839f2(%RIP),%XMM2 |
(1175) 0x5961e VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x59624 VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x59628 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(1175) 0x5962f VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x59633 VUCOMISD %XMM0,%XMM1 |
(1175) 0x59637 JBE 595a7 |
(1175) 0x5963d LEA -0x4(%RBX),%RCX |
(1175) 0x59641 INC %R13 |
(1175) 0x59644 MOV -0x30(%RBP),%RDI |
(1175) 0x59648 MOV %R14,%RSI |
(1175) 0x5964b MOV %R13,%RDX |
(1175) 0x5964e CALL e390 <hypre_swap2@plt> |
(1175) 0x59653 VMOVDDUP 0x839b5(%RIP),%XMM2 |
(1175) 0x5965b VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x59661 VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x59665 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(1175) 0x5966c VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x59670 VUCOMISD %XMM0,%XMM1 |
(1175) 0x59674 JBE 595bc |
(1175) 0x5967a LEA -0x3(%RBX),%RCX |
(1175) 0x5967e INC %R13 |
(1175) 0x59681 MOV -0x30(%RBP),%RDI |
(1175) 0x59685 MOV %R14,%RSI |
(1175) 0x59688 MOV %R13,%RDX |
(1175) 0x5968b CALL e390 <hypre_swap2@plt> |
(1175) 0x59690 VMOVDDUP 0x83978(%RIP),%XMM2 |
(1175) 0x59698 VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x5969e VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x596a2 VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(1175) 0x596a9 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x596ad VUCOMISD %XMM0,%XMM1 |
(1175) 0x596b1 JBE 595d1 |
(1175) 0x596b7 LEA -0x2(%RBX),%RCX |
(1175) 0x596bb INC %R13 |
(1175) 0x596be MOV -0x30(%RBP),%RDI |
(1175) 0x596c2 MOV %R14,%RSI |
(1175) 0x596c5 MOV %R13,%RDX |
(1175) 0x596c8 CALL e390 <hypre_swap2@plt> |
(1175) 0x596cd VMOVDDUP 0x8393b(%RIP),%XMM2 |
(1175) 0x596d5 VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x596db VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x596df VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(1175) 0x596e6 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x596ea VUCOMISD %XMM0,%XMM1 |
(1175) 0x596ee JBE 595e6 |
(1175) 0x596f4 LEA -0x1(%RBX),%RCX |
(1175) 0x596f8 INC %R13 |
(1175) 0x596fb MOV -0x30(%RBP),%RDI |
(1175) 0x596ff MOV %R14,%RSI |
(1175) 0x59702 MOV %R13,%RDX |
(1175) 0x59705 CALL e390 <hypre_swap2@plt> |
(1175) 0x5970a VMOVDDUP 0x838fe(%RIP),%XMM2 |
(1175) 0x59712 VMOVSD (%R14,%R12,8),%XMM0 |
(1175) 0x59718 VANDPD %XMM2,%XMM0,%XMM0 |
(1175) 0x5971c VMOVSD (%R14,%RBX,8),%XMM1 |
(1175) 0x59722 VANDPD %XMM2,%XMM1,%XMM1 |
(1175) 0x59726 VUCOMISD %XMM0,%XMM1 |
(1175) 0x5972a JBE 594c0 |
(1175) 0x59730 INC %R13 |
(1175) 0x59733 MOV -0x30(%RBP),%RDI |
(1175) 0x59737 MOV %R14,%RSI |
(1175) 0x5973a MOV %R13,%RDX |
(1175) 0x5973d MOV %RBX,%RCX |
(1175) 0x59740 CALL e390 <hypre_swap2@plt> |
(1175) 0x59745 JMP 594c0 |
0x5974a NOPW %CS:(%RAX,%RAX,1) |
0x59759 NOPW %CS:(%RAX,%RAX,1) |
0x59768 NOPW %CS:(%RAX,%RAX,1) |
0x59777 NOPW (%RAX,%RAX,1) |
0x59780 ADD $0x38,%RSP |
0x59784 POP %RBX |
0x59785 POP %R12 |
0x59787 POP %R13 |
0x59789 POP %R14 |
0x5978b POP %R15 |
0x5978d POP %RBP |
0x5978e RET |
0x5978f NOPW %CS:(%RAX,%RAX,1) |
0x59799 NOPW %CS:(%RAX,%RAX,1) |
0x597a3 NOPW %CS:(%RAX,%RAX,1) |
0x597ad NOPW %CS:(%RAX,%RAX,1) |
0x597b7 NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | par_interp.c:3180-3192 |
Module | libparcsr_ls.so |
nb instructions | 51 |
nb uops | 51 |
loop length | 340 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 8.50 cycles |
front end | 8.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 8.60 |
Stall cycles | 0.00 |
Front-end | 8.50 |
Dispatch | 4.50 |
Overall L1 | 8.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5978e <hypre_qsort2abs+0x48e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0x83ce2(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 5937a <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_interp.c:3180-3192 |
Module | libparcsr_ls.so |
nb instructions | 51 |
nb uops | 51 |
loop length | 340 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 8.50 cycles |
front end | 8.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 8.60 |
Stall cycles | 0.00 |
Front-end | 8.50 |
Dispatch | 4.50 |
Overall L1 | 8.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5978e <hypre_qsort2abs+0x48e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0x83ce2(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 5937a <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort2abs– | 0.01 | 0 |
▼Loop 1173 - par_interp.c:3180-3191 - libparcsr_ls.so– | 0 | 0 |
○Loop 1175 - par_interp.c:3184-3187 - libparcsr_ls.so | 0 | 0 |
○Loop 1174 - par_interp.c:3180-3187 - libparcsr_ls.so | 0 | 0 |