Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-8218/intel/AMG/build/AMG/AMG/parcsr_ls/par_interp.c: 3180 - 3192 |
-------------------------------------------------------------------------------- |
3180: if (left >= right) |
3181: return; |
3182: hypre_swap2( v, w, left, (left+right)/2); |
3183: last = left; |
3184: for (i = left+1; i <= right; i++) |
3185: if (fabs(w[i]) > fabs(w[left])) |
3186: { |
3187: hypre_swap2(v, w, ++last, i); |
3188: } |
3189: hypre_swap2(v, w, left, last); |
3190: hypre_qsort2abs(v, w, left, last-1); |
3191: hypre_qsort2abs(v, w, last+1, right); |
3192: } |
0x430a20 CMP %RCX,%RDX |
0x430a23 JGE 430df8 |
0x430a29 PUSH %RBP |
0x430a2a MOV %RSP,%RBP |
0x430a2d PUSH %R15 |
0x430a2f PUSH %R14 |
0x430a31 PUSH %R13 |
0x430a33 PUSH %R12 |
0x430a35 PUSH %RBX |
0x430a36 SUB $0x38,%RSP |
0x430a3a MOV %RCX,%R15 |
0x430a3d MOV %RDX,%R12 |
0x430a40 MOV %RSI,%R14 |
0x430a43 MOV %RDI,%RBX |
0x430a46 VMOVDDUP 0xcec3a(%RIP),%XMM0 |
0x430a4e VMOVUPD %XMM0,-0x60(%RBP) |
0x430a53 MOV %RDX,%R13 |
0x430a56 MOV %RCX,-0x38(%RBP) |
0x430a5a MOV %RDI,-0x30(%RBP) |
0x430a5e JMP 430a9a |
(560) 0x430a60 MOV -0x38(%RBP),%R15 |
(560) 0x430a64 MOV -0x30(%RBP),%RBX |
(560) 0x430a68 MOV %RBX,%RDI |
(560) 0x430a6b MOV %R14,%RSI |
(560) 0x430a6e MOV %R12,%RDX |
(560) 0x430a71 MOV %R13,%RCX |
(560) 0x430a74 CALL 4e73e0 <hypre_swap2> |
(560) 0x430a79 LEA -0x1(%R13),%RCX |
(560) 0x430a7d MOV %RBX,%RDI |
(560) 0x430a80 MOV %R14,%RSI |
(560) 0x430a83 MOV %R12,%RDX |
(560) 0x430a86 CALL 430a20 <hypre_qsort2abs> |
(560) 0x430a8b INC %R13 |
(560) 0x430a8e MOV %R13,%R12 |
(560) 0x430a91 CMP %R15,%R13 |
(560) 0x430a94 JGE 430dea |
(560) 0x430a9a LEA (%R12,%R15,1),%RAX |
(560) 0x430a9e MOV %RAX,%RCX |
(560) 0x430aa1 SHR $0x3f,%RCX |
(560) 0x430aa5 ADD %RAX,%RCX |
(560) 0x430aa8 SAR $0x1,%RCX |
(560) 0x430aab MOV %RBX,%RDI |
(560) 0x430aae MOV %R14,%RSI |
(560) 0x430ab1 MOV %R12,%RDX |
(560) 0x430ab4 CALL 4e73e0 <hypre_swap2> |
(560) 0x430ab9 LEA 0x1(%R12),%RAX |
(560) 0x430abe CMP %RAX,%R15 |
(560) 0x430ac1 CMOVG %R15,%RAX |
(560) 0x430ac5 MOV %RAX,-0x48(%RBP) |
(560) 0x430ac9 SUB %R12,%RAX |
(560) 0x430acc MOV %RAX,-0x40(%RBP) |
(560) 0x430ad0 CMP $0x8,%RAX |
(560) 0x430ad4 JAE 430b50 |
(560) 0x430ad6 MOV -0x40(%RBP),%RCX |
(560) 0x430ada MOV %RCX,%RAX |
(560) 0x430add AND $-0x8,%RAX |
(560) 0x430ae1 CMP %RCX,%RAX |
(560) 0x430ae4 JAE 430a60 |
(560) 0x430aea LEA 0x1(%R12,%RAX,1),%RBX |
(560) 0x430aef NEGQ -0x48(%RBP) |
(560) 0x430af3 MOV -0x38(%RBP),%R15 |
(560) 0x430af7 JMP 430b16 |
0x430af9 NOPL (%RAX) |
(561) 0x430b00 MOV -0x48(%RBP),%RAX |
(561) 0x430b04 LEA 0x1(%RAX,%RBX,1),%RAX |
(561) 0x430b09 INC %RBX |
(561) 0x430b0c CMP $0x1,%RAX |
(561) 0x430b10 JE 430a64 |
(561) 0x430b16 VMOVSD (%R14,%RBX,8),%XMM0 |
(561) 0x430b1c VMOVUPD -0x60(%RBP),%XMM2 |
(561) 0x430b21 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x430b25 VMOVSD (%R14,%R12,8),%XMM1 |
(561) 0x430b2b VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430b2f VUCOMISD %XMM1,%XMM0 |
(561) 0x430b33 JBE 430b00 |
(561) 0x430b35 INC %R13 |
(561) 0x430b38 MOV -0x30(%RBP),%RDI |
(561) 0x430b3c MOV %R14,%RSI |
(561) 0x430b3f MOV %R13,%RDX |
(561) 0x430b42 MOV %RBX,%RCX |
(561) 0x430b45 CALL 4e73e0 <hypre_swap2> |
(561) 0x430b4a JMP 430b00 |
0x430b4c NOPL (%RAX) |
(560) 0x430b50 MOV -0x40(%RBP),%R15 |
(560) 0x430b54 SHR $0x3,%R15 |
(560) 0x430b58 LEA 0x8(%R12),%RBX |
(560) 0x430b5d MOV %R12,%R13 |
(560) 0x430b60 JMP 430b7d |
0x430b62 NOPW %CS:(%RAX,%RAX,1) |
(562) 0x430b70 ADD $0x8,%RBX |
(562) 0x430b74 DEC %R15 |
(562) 0x430b77 JE 430ad6 |
(562) 0x430b7d VMOVSD -0x38(%R14,%RBX,8),%XMM0 |
(562) 0x430b84 VMOVUPD -0x60(%RBP),%XMM2 |
(562) 0x430b89 VANDPD %XMM2,%XMM0,%XMM1 |
(562) 0x430b8d VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430b93 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430b97 VUCOMISD %XMM0,%XMM1 |
(562) 0x430b9b JBE 430be0 |
(562) 0x430b9d LEA -0x7(%RBX),%RCX |
(562) 0x430ba1 INC %R13 |
(562) 0x430ba4 MOV -0x30(%RBP),%RDI |
(562) 0x430ba8 MOV %R14,%RSI |
(562) 0x430bab MOV %R13,%RDX |
(562) 0x430bae CALL 4e73e0 <hypre_swap2> |
(562) 0x430bb3 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430bb9 VMOVDDUP 0xceac7(%RIP),%XMM2 |
(562) 0x430bc1 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430bc5 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(562) 0x430bcc VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430bd0 VUCOMISD %XMM0,%XMM1 |
(562) 0x430bd4 JA 430bf9 |
(562) 0x430bd6 JMP 430c21 |
0x430bd8 NOPL (%RAX,%RAX,1) |
(562) 0x430be0 VMOVDDUP 0xceaa0(%RIP),%XMM2 |
(562) 0x430be8 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(562) 0x430bef VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430bf3 VUCOMISD %XMM0,%XMM1 |
(562) 0x430bf7 JBE 430c21 |
(562) 0x430bf9 LEA -0x6(%RBX),%RCX |
(562) 0x430bfd INC %R13 |
(562) 0x430c00 MOV -0x30(%RBP),%RDI |
(562) 0x430c04 MOV %R14,%RSI |
(562) 0x430c07 MOV %R13,%RDX |
(562) 0x430c0a CALL 4e73e0 <hypre_swap2> |
(562) 0x430c0f VMOVDDUP 0xcea71(%RIP),%XMM2 |
(562) 0x430c17 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430c1d VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430c21 VMOVSD -0x28(%R14,%RBX,8),%XMM1 |
(562) 0x430c28 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c2c VUCOMISD %XMM0,%XMM1 |
(562) 0x430c30 JA 430ca0 |
(562) 0x430c32 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(562) 0x430c39 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c3d VUCOMISD %XMM0,%XMM1 |
(562) 0x430c41 JA 430cdd |
(562) 0x430c47 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(562) 0x430c4e VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c52 VUCOMISD %XMM0,%XMM1 |
(562) 0x430c56 JA 430d1a |
(562) 0x430c5c VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(562) 0x430c63 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c67 VUCOMISD %XMM0,%XMM1 |
(562) 0x430c6b JA 430d57 |
(562) 0x430c71 VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(562) 0x430c78 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c7c VUCOMISD %XMM0,%XMM1 |
(562) 0x430c80 JA 430d94 |
(562) 0x430c86 VMOVSD (%R14,%RBX,8),%XMM1 |
(562) 0x430c8c VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430c90 VUCOMISD %XMM0,%XMM1 |
(562) 0x430c94 JBE 430b70 |
(562) 0x430c9a JMP 430dd0 |
0x430c9f NOP |
(562) 0x430ca0 LEA -0x5(%RBX),%RCX |
(562) 0x430ca4 INC %R13 |
(562) 0x430ca7 MOV -0x30(%RBP),%RDI |
(562) 0x430cab MOV %R14,%RSI |
(562) 0x430cae MOV %R13,%RDX |
(562) 0x430cb1 CALL 4e73e0 <hypre_swap2> |
(562) 0x430cb6 VMOVDDUP 0xce9ca(%RIP),%XMM2 |
(562) 0x430cbe VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430cc4 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430cc8 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(562) 0x430ccf VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430cd3 VUCOMISD %XMM0,%XMM1 |
(562) 0x430cd7 JBE 430c47 |
(562) 0x430cdd LEA -0x4(%RBX),%RCX |
(562) 0x430ce1 INC %R13 |
(562) 0x430ce4 MOV -0x30(%RBP),%RDI |
(562) 0x430ce8 MOV %R14,%RSI |
(562) 0x430ceb MOV %R13,%RDX |
(562) 0x430cee CALL 4e73e0 <hypre_swap2> |
(562) 0x430cf3 VMOVDDUP 0xce98d(%RIP),%XMM2 |
(562) 0x430cfb VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430d01 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430d05 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(562) 0x430d0c VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430d10 VUCOMISD %XMM0,%XMM1 |
(562) 0x430d14 JBE 430c5c |
(562) 0x430d1a LEA -0x3(%RBX),%RCX |
(562) 0x430d1e INC %R13 |
(562) 0x430d21 MOV -0x30(%RBP),%RDI |
(562) 0x430d25 MOV %R14,%RSI |
(562) 0x430d28 MOV %R13,%RDX |
(562) 0x430d2b CALL 4e73e0 <hypre_swap2> |
(562) 0x430d30 VMOVDDUP 0xce950(%RIP),%XMM2 |
(562) 0x430d38 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430d3e VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430d42 VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(562) 0x430d49 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430d4d VUCOMISD %XMM0,%XMM1 |
(562) 0x430d51 JBE 430c71 |
(562) 0x430d57 LEA -0x2(%RBX),%RCX |
(562) 0x430d5b INC %R13 |
(562) 0x430d5e MOV -0x30(%RBP),%RDI |
(562) 0x430d62 MOV %R14,%RSI |
(562) 0x430d65 MOV %R13,%RDX |
(562) 0x430d68 CALL 4e73e0 <hypre_swap2> |
(562) 0x430d6d VMOVDDUP 0xce913(%RIP),%XMM2 |
(562) 0x430d75 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430d7b VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430d7f VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(562) 0x430d86 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430d8a VUCOMISD %XMM0,%XMM1 |
(562) 0x430d8e JBE 430c86 |
(562) 0x430d94 LEA -0x1(%RBX),%RCX |
(562) 0x430d98 INC %R13 |
(562) 0x430d9b MOV -0x30(%RBP),%RDI |
(562) 0x430d9f MOV %R14,%RSI |
(562) 0x430da2 MOV %R13,%RDX |
(562) 0x430da5 CALL 4e73e0 <hypre_swap2> |
(562) 0x430daa VMOVDDUP 0xce8d6(%RIP),%XMM2 |
(562) 0x430db2 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x430db8 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x430dbc VMOVSD (%R14,%RBX,8),%XMM1 |
(562) 0x430dc2 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x430dc6 VUCOMISD %XMM0,%XMM1 |
(562) 0x430dca JBE 430b70 |
(562) 0x430dd0 INC %R13 |
(562) 0x430dd3 MOV -0x30(%RBP),%RDI |
(562) 0x430dd7 MOV %R14,%RSI |
(562) 0x430dda MOV %R13,%RDX |
(562) 0x430ddd MOV %RBX,%RCX |
(562) 0x430de0 CALL 4e73e0 <hypre_swap2> |
(562) 0x430de5 JMP 430b70 |
0x430dea ADD $0x38,%RSP |
0x430dee POP %RBX |
0x430def POP %R12 |
0x430df1 POP %R13 |
0x430df3 POP %R14 |
0x430df5 POP %R15 |
0x430df7 POP %RBP |
0x430df8 RET |
0x430df9 NOPL (%RAX) |
Path / |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 430df8 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xcec3a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430a9a <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 430df8 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xcec3a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430a9a <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort2abs– | 0.01 | 0 |
▼Loop 560 - par_interp.c:3180-3191 - exec– | 0 | 0 |
○Loop 561 - par_interp.c:3180-3187 - exec | 0 | 0 |
○Loop 562 - par_interp.c:3184-3187 - exec | 0 | 0 |