Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-147-2675/intel/AMG/build/AMG/AMG/parcsr_ls/par_interp.c: 3180 - 3192 |
-------------------------------------------------------------------------------- |
3180: if (left >= right) |
3181: return; |
3182: hypre_swap2( v, w, left, (left+right)/2); |
3183: last = left; |
3184: for (i = left+1; i <= right; i++) |
3185: if (fabs(w[i]) > fabs(w[left])) |
3186: { |
3187: hypre_swap2(v, w, ++last, i); |
3188: } |
3189: hypre_swap2(v, w, left, last); |
3190: hypre_qsort2abs(v, w, left, last-1); |
3191: hypre_qsort2abs(v, w, last+1, right); |
3192: } |
0x430f60 CMP %RCX,%RDX |
0x430f63 JGE 431338 |
0x430f69 PUSH %RBP |
0x430f6a MOV %RSP,%RBP |
0x430f6d PUSH %R15 |
0x430f6f PUSH %R14 |
0x430f71 PUSH %R13 |
0x430f73 PUSH %R12 |
0x430f75 PUSH %RBX |
0x430f76 SUB $0x38,%RSP |
0x430f7a MOV %RCX,%R15 |
0x430f7d MOV %RDX,%R12 |
0x430f80 MOV %RSI,%R14 |
0x430f83 MOV %RDI,%RBX |
0x430f86 VMOVDDUP 0xcf19a(%RIP),%XMM0 |
0x430f8e VMOVUPD %XMM0,-0x60(%RBP) |
0x430f93 MOV %RDX,%R13 |
0x430f96 MOV %RCX,-0x38(%RBP) |
0x430f9a MOV %RDI,-0x30(%RBP) |
0x430f9e JMP 430fda |
(560) 0x430fa0 MOV -0x38(%RBP),%R15 |
(560) 0x430fa4 MOV -0x30(%RBP),%RBX |
(560) 0x430fa8 MOV %RBX,%RDI |
(560) 0x430fab MOV %R14,%RSI |
(560) 0x430fae MOV %R12,%RDX |
(560) 0x430fb1 MOV %R13,%RCX |
(560) 0x430fb4 CALL 4e7920 <hypre_swap2> |
(560) 0x430fb9 LEA -0x1(%R13),%RCX |
(560) 0x430fbd MOV %RBX,%RDI |
(560) 0x430fc0 MOV %R14,%RSI |
(560) 0x430fc3 MOV %R12,%RDX |
(560) 0x430fc6 CALL 430f60 <hypre_qsort2abs> |
(560) 0x430fcb INC %R13 |
(560) 0x430fce MOV %R13,%R12 |
(560) 0x430fd1 CMP %R15,%R13 |
(560) 0x430fd4 JGE 43132a |
(560) 0x430fda LEA (%R12,%R15,1),%RAX |
(560) 0x430fde MOV %RAX,%RCX |
(560) 0x430fe1 SHR $0x3f,%RCX |
(560) 0x430fe5 ADD %RAX,%RCX |
(560) 0x430fe8 SAR $0x1,%RCX |
(560) 0x430feb MOV %RBX,%RDI |
(560) 0x430fee MOV %R14,%RSI |
(560) 0x430ff1 MOV %R12,%RDX |
(560) 0x430ff4 CALL 4e7920 <hypre_swap2> |
(560) 0x430ff9 LEA 0x1(%R12),%RAX |
(560) 0x430ffe CMP %RAX,%R15 |
(560) 0x431001 CMOVG %R15,%RAX |
(560) 0x431005 MOV %RAX,-0x48(%RBP) |
(560) 0x431009 SUB %R12,%RAX |
(560) 0x43100c MOV %RAX,-0x40(%RBP) |
(560) 0x431010 CMP $0x8,%RAX |
(560) 0x431014 JAE 431090 |
(560) 0x431016 MOV -0x40(%RBP),%RCX |
(560) 0x43101a MOV %RCX,%RAX |
(560) 0x43101d AND $-0x8,%RAX |
(560) 0x431021 CMP %RCX,%RAX |
(560) 0x431024 JAE 430fa0 |
(560) 0x43102a LEA 0x1(%R12,%RAX,1),%RBX |
(560) 0x43102f NEGQ -0x48(%RBP) |
(560) 0x431033 MOV -0x38(%RBP),%R15 |
(560) 0x431037 JMP 431056 |
0x431039 NOPL (%RAX) |
(561) 0x431040 MOV -0x48(%RBP),%RAX |
(561) 0x431044 LEA 0x1(%RAX,%RBX,1),%RAX |
(561) 0x431049 INC %RBX |
(561) 0x43104c CMP $0x1,%RAX |
(561) 0x431050 JE 430fa4 |
(561) 0x431056 VMOVSD (%R14,%RBX,8),%XMM0 |
(561) 0x43105c VMOVUPD -0x60(%RBP),%XMM2 |
(561) 0x431061 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x431065 VMOVSD (%R14,%R12,8),%XMM1 |
(561) 0x43106b VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x43106f VUCOMISD %XMM1,%XMM0 |
(561) 0x431073 JBE 431040 |
(561) 0x431075 INC %R13 |
(561) 0x431078 MOV -0x30(%RBP),%RDI |
(561) 0x43107c MOV %R14,%RSI |
(561) 0x43107f MOV %R13,%RDX |
(561) 0x431082 MOV %RBX,%RCX |
(561) 0x431085 CALL 4e7920 <hypre_swap2> |
(561) 0x43108a JMP 431040 |
0x43108c NOPL (%RAX) |
(560) 0x431090 MOV -0x40(%RBP),%R15 |
(560) 0x431094 SHR $0x3,%R15 |
(560) 0x431098 LEA 0x8(%R12),%RBX |
(560) 0x43109d MOV %R12,%R13 |
(560) 0x4310a0 JMP 4310bd |
0x4310a2 NOPW %CS:(%RAX,%RAX,1) |
(562) 0x4310b0 ADD $0x8,%RBX |
(562) 0x4310b4 DEC %R15 |
(562) 0x4310b7 JE 431016 |
(562) 0x4310bd VMOVSD -0x38(%R14,%RBX,8),%XMM0 |
(562) 0x4310c4 VMOVUPD -0x60(%RBP),%XMM2 |
(562) 0x4310c9 VANDPD %XMM2,%XMM0,%XMM1 |
(562) 0x4310cd VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x4310d3 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x4310d7 VUCOMISD %XMM0,%XMM1 |
(562) 0x4310db JBE 431120 |
(562) 0x4310dd LEA -0x7(%RBX),%RCX |
(562) 0x4310e1 INC %R13 |
(562) 0x4310e4 MOV -0x30(%RBP),%RDI |
(562) 0x4310e8 MOV %R14,%RSI |
(562) 0x4310eb MOV %R13,%RDX |
(562) 0x4310ee CALL 4e7920 <hypre_swap2> |
(562) 0x4310f3 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x4310f9 VMOVDDUP 0xcf027(%RIP),%XMM2 |
(562) 0x431101 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x431105 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(562) 0x43110c VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431110 VUCOMISD %XMM0,%XMM1 |
(562) 0x431114 JA 431139 |
(562) 0x431116 JMP 431161 |
0x431118 NOPL (%RAX,%RAX,1) |
(562) 0x431120 VMOVDDUP 0xcf000(%RIP),%XMM2 |
(562) 0x431128 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(562) 0x43112f VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431133 VUCOMISD %XMM0,%XMM1 |
(562) 0x431137 JBE 431161 |
(562) 0x431139 LEA -0x6(%RBX),%RCX |
(562) 0x43113d INC %R13 |
(562) 0x431140 MOV -0x30(%RBP),%RDI |
(562) 0x431144 MOV %R14,%RSI |
(562) 0x431147 MOV %R13,%RDX |
(562) 0x43114a CALL 4e7920 <hypre_swap2> |
(562) 0x43114f VMOVDDUP 0xcefd1(%RIP),%XMM2 |
(562) 0x431157 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x43115d VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x431161 VMOVSD -0x28(%R14,%RBX,8),%XMM1 |
(562) 0x431168 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x43116c VUCOMISD %XMM0,%XMM1 |
(562) 0x431170 JA 4311e0 |
(562) 0x431172 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(562) 0x431179 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x43117d VUCOMISD %XMM0,%XMM1 |
(562) 0x431181 JA 43121d |
(562) 0x431187 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(562) 0x43118e VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431192 VUCOMISD %XMM0,%XMM1 |
(562) 0x431196 JA 43125a |
(562) 0x43119c VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(562) 0x4311a3 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x4311a7 VUCOMISD %XMM0,%XMM1 |
(562) 0x4311ab JA 431297 |
(562) 0x4311b1 VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(562) 0x4311b8 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x4311bc VUCOMISD %XMM0,%XMM1 |
(562) 0x4311c0 JA 4312d4 |
(562) 0x4311c6 VMOVSD (%R14,%RBX,8),%XMM1 |
(562) 0x4311cc VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x4311d0 VUCOMISD %XMM0,%XMM1 |
(562) 0x4311d4 JBE 4310b0 |
(562) 0x4311da JMP 431310 |
0x4311df NOP |
(562) 0x4311e0 LEA -0x5(%RBX),%RCX |
(562) 0x4311e4 INC %R13 |
(562) 0x4311e7 MOV -0x30(%RBP),%RDI |
(562) 0x4311eb MOV %R14,%RSI |
(562) 0x4311ee MOV %R13,%RDX |
(562) 0x4311f1 CALL 4e7920 <hypre_swap2> |
(562) 0x4311f6 VMOVDDUP 0xcef2a(%RIP),%XMM2 |
(562) 0x4311fe VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x431204 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x431208 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(562) 0x43120f VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431213 VUCOMISD %XMM0,%XMM1 |
(562) 0x431217 JBE 431187 |
(562) 0x43121d LEA -0x4(%RBX),%RCX |
(562) 0x431221 INC %R13 |
(562) 0x431224 MOV -0x30(%RBP),%RDI |
(562) 0x431228 MOV %R14,%RSI |
(562) 0x43122b MOV %R13,%RDX |
(562) 0x43122e CALL 4e7920 <hypre_swap2> |
(562) 0x431233 VMOVDDUP 0xceeed(%RIP),%XMM2 |
(562) 0x43123b VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x431241 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x431245 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(562) 0x43124c VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431250 VUCOMISD %XMM0,%XMM1 |
(562) 0x431254 JBE 43119c |
(562) 0x43125a LEA -0x3(%RBX),%RCX |
(562) 0x43125e INC %R13 |
(562) 0x431261 MOV -0x30(%RBP),%RDI |
(562) 0x431265 MOV %R14,%RSI |
(562) 0x431268 MOV %R13,%RDX |
(562) 0x43126b CALL 4e7920 <hypre_swap2> |
(562) 0x431270 VMOVDDUP 0xceeb0(%RIP),%XMM2 |
(562) 0x431278 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x43127e VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x431282 VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(562) 0x431289 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x43128d VUCOMISD %XMM0,%XMM1 |
(562) 0x431291 JBE 4311b1 |
(562) 0x431297 LEA -0x2(%RBX),%RCX |
(562) 0x43129b INC %R13 |
(562) 0x43129e MOV -0x30(%RBP),%RDI |
(562) 0x4312a2 MOV %R14,%RSI |
(562) 0x4312a5 MOV %R13,%RDX |
(562) 0x4312a8 CALL 4e7920 <hypre_swap2> |
(562) 0x4312ad VMOVDDUP 0xcee73(%RIP),%XMM2 |
(562) 0x4312b5 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x4312bb VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x4312bf VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(562) 0x4312c6 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x4312ca VUCOMISD %XMM0,%XMM1 |
(562) 0x4312ce JBE 4311c6 |
(562) 0x4312d4 LEA -0x1(%RBX),%RCX |
(562) 0x4312d8 INC %R13 |
(562) 0x4312db MOV -0x30(%RBP),%RDI |
(562) 0x4312df MOV %R14,%RSI |
(562) 0x4312e2 MOV %R13,%RDX |
(562) 0x4312e5 CALL 4e7920 <hypre_swap2> |
(562) 0x4312ea VMOVDDUP 0xcee36(%RIP),%XMM2 |
(562) 0x4312f2 VMOVSD (%R14,%R12,8),%XMM0 |
(562) 0x4312f8 VANDPD %XMM2,%XMM0,%XMM0 |
(562) 0x4312fc VMOVSD (%R14,%RBX,8),%XMM1 |
(562) 0x431302 VANDPD %XMM2,%XMM1,%XMM1 |
(562) 0x431306 VUCOMISD %XMM0,%XMM1 |
(562) 0x43130a JBE 4310b0 |
(562) 0x431310 INC %R13 |
(562) 0x431313 MOV -0x30(%RBP),%RDI |
(562) 0x431317 MOV %R14,%RSI |
(562) 0x43131a MOV %R13,%RDX |
(562) 0x43131d MOV %RBX,%RCX |
(562) 0x431320 CALL 4e7920 <hypre_swap2> |
(562) 0x431325 JMP 4310b0 |
0x43132a ADD $0x38,%RSP |
0x43132e POP %RBX |
0x43132f POP %R12 |
0x431331 POP %R13 |
0x431333 POP %R14 |
0x431335 POP %R15 |
0x431337 POP %RBP |
0x431338 RET |
0x431339 NOPL (%RAX) |
Path / |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 431338 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xcf19a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430fda <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 431338 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xcf19a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430fda <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort2abs– | 0.01 | 0 |
▼Loop 560 - par_interp.c:3180-3191 - exec– | 0 | 0 |
○Loop 561 - par_interp.c:3180-3187 - exec | 0 | 0 |
○Loop 562 - par_interp.c:3184-3187 - exec | 0 | 0 |