Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
Function: hypre_qsort2abs | Module: exec | Source: par_interp.c:3180-3192 | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-587-0261/intel/AMG/build/AMG/AMG/parcsr_ls/par_interp.c: 3180 - 3192 |
-------------------------------------------------------------------------------- |
3180: if (left >= right) |
3181: return; |
3182: hypre_swap2( v, w, left, (left+right)/2); |
3183: last = left; |
3184: for (i = left+1; i <= right; i++) |
3185: if (fabs(w[i]) > fabs(w[left])) |
3186: { |
3187: hypre_swap2(v, w, ++last, i); |
3188: } |
3189: hypre_swap2(v, w, left, last); |
3190: hypre_qsort2abs(v, w, left, last-1); |
3191: hypre_qsort2abs(v, w, last+1, right); |
3192: } |
0x430d70 CMP %RCX,%RDX |
0x430d73 JGE 431148 |
0x430d79 PUSH %RBP |
0x430d7a MOV %RSP,%RBP |
0x430d7d PUSH %R15 |
0x430d7f PUSH %R14 |
0x430d81 PUSH %R13 |
0x430d83 PUSH %R12 |
0x430d85 PUSH %RBX |
0x430d86 SUB $0x38,%RSP |
0x430d8a MOV %RCX,%RBX |
0x430d8d MOV %RDX,%R12 |
0x430d90 MOV %RSI,%R14 |
0x430d93 MOV %RDI,%R15 |
0x430d96 VMOVDDUP 0xce80a(%RIP),%XMM0 |
0x430d9e VMOVUPD %XMM0,-0x60(%RBP) |
0x430da3 MOV %RDX,%R13 |
0x430da6 MOV %RCX,-0x48(%RBP) |
0x430daa MOV %RDI,-0x30(%RBP) |
0x430dae JMP 430dea |
(559) 0x430db0 MOV -0x30(%RBP),%R15 |
(559) 0x430db4 MOV %R15,%RDI |
(559) 0x430db7 MOV %R14,%RSI |
(559) 0x430dba MOV %R12,%RDX |
(559) 0x430dbd MOV %R13,%RCX |
(559) 0x430dc0 CALL 4e6ff0 <hypre_swap2> |
(559) 0x430dc5 LEA -0x1(%R13),%RCX |
(559) 0x430dc9 MOV %R15,%RDI |
(559) 0x430dcc MOV %R14,%RSI |
(559) 0x430dcf MOV %R12,%RDX |
(559) 0x430dd2 CALL 430d70 <hypre_qsort2abs> |
(559) 0x430dd7 INC %R13 |
(559) 0x430dda MOV %R13,%R12 |
(559) 0x430ddd MOV -0x48(%RBP),%RBX |
(559) 0x430de1 CMP %RBX,%R13 |
(559) 0x430de4 JGE 43113a |
(559) 0x430dea LEA (%R12,%RBX,1),%RAX |
(559) 0x430dee MOV %RAX,%RCX |
(559) 0x430df1 SHR $0x3f,%RCX |
(559) 0x430df5 ADD %RAX,%RCX |
(559) 0x430df8 SAR $0x1,%RCX |
(559) 0x430dfb MOV %R15,%RDI |
(559) 0x430dfe MOV %R14,%RSI |
(559) 0x430e01 MOV %R12,%RDX |
(559) 0x430e04 CALL 4e6ff0 <hypre_swap2> |
(559) 0x430e09 LEA 0x1(%R12),%RAX |
(559) 0x430e0e CMP %RAX,%RBX |
(559) 0x430e11 CMOVG %RBX,%RAX |
(559) 0x430e15 MOV %RAX,-0x40(%RBP) |
(559) 0x430e19 SUB %R12,%RAX |
(559) 0x430e1c MOV %RAX,-0x38(%RBP) |
(559) 0x430e20 CMP $0x8,%RAX |
(559) 0x430e24 JAE 430ea0 |
(559) 0x430e26 MOV -0x38(%RBP),%RCX |
(559) 0x430e2a MOV %RCX,%RAX |
(559) 0x430e2d AND $-0x8,%RAX |
(559) 0x430e31 CMP %RCX,%RAX |
(559) 0x430e34 JE 430db0 |
(559) 0x430e3a LEA 0x1(%R12,%RAX,1),%RBX |
(559) 0x430e3f NEGQ -0x40(%RBP) |
(559) 0x430e43 MOV -0x30(%RBP),%R15 |
(559) 0x430e47 JMP 430e66 |
0x430e49 NOPL (%RAX) |
(560) 0x430e50 MOV -0x40(%RBP),%RAX |
(560) 0x430e54 LEA 0x1(%RAX,%RBX,1),%RAX |
(560) 0x430e59 INC %RBX |
(560) 0x430e5c CMP $0x1,%RAX |
(560) 0x430e60 JE 430db4 |
(560) 0x430e66 VMOVSD (%R14,%RBX,8),%XMM0 |
(560) 0x430e6c VMOVUPD -0x60(%RBP),%XMM2 |
(560) 0x430e71 VANDPD %XMM2,%XMM0,%XMM0 |
(560) 0x430e75 VMOVSD (%R14,%R12,8),%XMM1 |
(560) 0x430e7b VANDPD %XMM2,%XMM1,%XMM1 |
(560) 0x430e7f VUCOMISD %XMM1,%XMM0 |
(560) 0x430e83 JBE 430e50 |
(560) 0x430e85 INC %R13 |
(560) 0x430e88 MOV %R15,%RDI |
(560) 0x430e8b MOV %R14,%RSI |
(560) 0x430e8e MOV %R13,%RDX |
(560) 0x430e91 MOV %RBX,%RCX |
(560) 0x430e94 CALL 4e6ff0 <hypre_swap2> |
(560) 0x430e99 JMP 430e50 |
0x430e9b NOPL (%RAX,%RAX,1) |
(559) 0x430ea0 MOV -0x38(%RBP),%R15 |
(559) 0x430ea4 SHR $0x3,%R15 |
(559) 0x430ea8 LEA 0x8(%R12),%RBX |
(559) 0x430ead MOV %R12,%R13 |
(559) 0x430eb0 JMP 430ecd |
0x430eb2 NOPW %CS:(%RAX,%RAX,1) |
(561) 0x430ec0 ADD $0x8,%RBX |
(561) 0x430ec4 DEC %R15 |
(561) 0x430ec7 JE 430e26 |
(561) 0x430ecd VMOVSD -0x38(%R14,%RBX,8),%XMM0 |
(561) 0x430ed4 VMOVUPD -0x60(%RBP),%XMM2 |
(561) 0x430ed9 VANDPD %XMM2,%XMM0,%XMM1 |
(561) 0x430edd VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x430ee3 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x430ee7 VUCOMISD %XMM0,%XMM1 |
(561) 0x430eeb JBE 430f30 |
(561) 0x430eed LEA -0x7(%RBX),%RCX |
(561) 0x430ef1 INC %R13 |
(561) 0x430ef4 MOV -0x30(%RBP),%RDI |
(561) 0x430ef8 MOV %R14,%RSI |
(561) 0x430efb MOV %R13,%RDX |
(561) 0x430efe CALL 4e6ff0 <hypre_swap2> |
(561) 0x430f03 VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x430f09 VMOVDDUP 0xce697(%RIP),%XMM2 |
(561) 0x430f11 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x430f15 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(561) 0x430f1c VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430f20 VUCOMISD %XMM0,%XMM1 |
(561) 0x430f24 JA 430f49 |
(561) 0x430f26 JMP 430f71 |
0x430f28 NOPL (%RAX,%RAX,1) |
(561) 0x430f30 VMOVDDUP 0xce670(%RIP),%XMM2 |
(561) 0x430f38 VMOVSD -0x30(%R14,%RBX,8),%XMM1 |
(561) 0x430f3f VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430f43 VUCOMISD %XMM0,%XMM1 |
(561) 0x430f47 JBE 430f71 |
(561) 0x430f49 LEA -0x6(%RBX),%RCX |
(561) 0x430f4d INC %R13 |
(561) 0x430f50 MOV -0x30(%RBP),%RDI |
(561) 0x430f54 MOV %R14,%RSI |
(561) 0x430f57 MOV %R13,%RDX |
(561) 0x430f5a CALL 4e6ff0 <hypre_swap2> |
(561) 0x430f5f VMOVDDUP 0xce641(%RIP),%XMM2 |
(561) 0x430f67 VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x430f6d VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x430f71 VMOVSD -0x28(%R14,%RBX,8),%XMM1 |
(561) 0x430f78 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430f7c VUCOMISD %XMM0,%XMM1 |
(561) 0x430f80 JA 430ff0 |
(561) 0x430f82 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(561) 0x430f89 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430f8d VUCOMISD %XMM0,%XMM1 |
(561) 0x430f91 JA 43102d |
(561) 0x430f97 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(561) 0x430f9e VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430fa2 VUCOMISD %XMM0,%XMM1 |
(561) 0x430fa6 JA 43106a |
(561) 0x430fac VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(561) 0x430fb3 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430fb7 VUCOMISD %XMM0,%XMM1 |
(561) 0x430fbb JA 4310a7 |
(561) 0x430fc1 VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(561) 0x430fc8 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430fcc VUCOMISD %XMM0,%XMM1 |
(561) 0x430fd0 JA 4310e4 |
(561) 0x430fd6 VMOVSD (%R14,%RBX,8),%XMM1 |
(561) 0x430fdc VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x430fe0 VUCOMISD %XMM0,%XMM1 |
(561) 0x430fe4 JBE 430ec0 |
(561) 0x430fea JMP 431120 |
0x430fef NOP |
(561) 0x430ff0 LEA -0x5(%RBX),%RCX |
(561) 0x430ff4 INC %R13 |
(561) 0x430ff7 MOV -0x30(%RBP),%RDI |
(561) 0x430ffb MOV %R14,%RSI |
(561) 0x430ffe MOV %R13,%RDX |
(561) 0x431001 CALL 4e6ff0 <hypre_swap2> |
(561) 0x431006 VMOVDDUP 0xce59a(%RIP),%XMM2 |
(561) 0x43100e VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x431014 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x431018 VMOVSD -0x20(%R14,%RBX,8),%XMM1 |
(561) 0x43101f VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x431023 VUCOMISD %XMM0,%XMM1 |
(561) 0x431027 JBE 430f97 |
(561) 0x43102d LEA -0x4(%RBX),%RCX |
(561) 0x431031 INC %R13 |
(561) 0x431034 MOV -0x30(%RBP),%RDI |
(561) 0x431038 MOV %R14,%RSI |
(561) 0x43103b MOV %R13,%RDX |
(561) 0x43103e CALL 4e6ff0 <hypre_swap2> |
(561) 0x431043 VMOVDDUP 0xce55d(%RIP),%XMM2 |
(561) 0x43104b VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x431051 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x431055 VMOVSD -0x18(%R14,%RBX,8),%XMM1 |
(561) 0x43105c VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x431060 VUCOMISD %XMM0,%XMM1 |
(561) 0x431064 JBE 430fac |
(561) 0x43106a LEA -0x3(%RBX),%RCX |
(561) 0x43106e INC %R13 |
(561) 0x431071 MOV -0x30(%RBP),%RDI |
(561) 0x431075 MOV %R14,%RSI |
(561) 0x431078 MOV %R13,%RDX |
(561) 0x43107b CALL 4e6ff0 <hypre_swap2> |
(561) 0x431080 VMOVDDUP 0xce520(%RIP),%XMM2 |
(561) 0x431088 VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x43108e VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x431092 VMOVSD -0x10(%R14,%RBX,8),%XMM1 |
(561) 0x431099 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x43109d VUCOMISD %XMM0,%XMM1 |
(561) 0x4310a1 JBE 430fc1 |
(561) 0x4310a7 LEA -0x2(%RBX),%RCX |
(561) 0x4310ab INC %R13 |
(561) 0x4310ae MOV -0x30(%RBP),%RDI |
(561) 0x4310b2 MOV %R14,%RSI |
(561) 0x4310b5 MOV %R13,%RDX |
(561) 0x4310b8 CALL 4e6ff0 <hypre_swap2> |
(561) 0x4310bd VMOVDDUP 0xce4e3(%RIP),%XMM2 |
(561) 0x4310c5 VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x4310cb VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x4310cf VMOVSD -0x8(%R14,%RBX,8),%XMM1 |
(561) 0x4310d6 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x4310da VUCOMISD %XMM0,%XMM1 |
(561) 0x4310de JBE 430fd6 |
(561) 0x4310e4 LEA -0x1(%RBX),%RCX |
(561) 0x4310e8 INC %R13 |
(561) 0x4310eb MOV -0x30(%RBP),%RDI |
(561) 0x4310ef MOV %R14,%RSI |
(561) 0x4310f2 MOV %R13,%RDX |
(561) 0x4310f5 CALL 4e6ff0 <hypre_swap2> |
(561) 0x4310fa VMOVDDUP 0xce4a6(%RIP),%XMM2 |
(561) 0x431102 VMOVSD (%R14,%R12,8),%XMM0 |
(561) 0x431108 VANDPD %XMM2,%XMM0,%XMM0 |
(561) 0x43110c VMOVSD (%R14,%RBX,8),%XMM1 |
(561) 0x431112 VANDPD %XMM2,%XMM1,%XMM1 |
(561) 0x431116 VUCOMISD %XMM0,%XMM1 |
(561) 0x43111a JBE 430ec0 |
(561) 0x431120 INC %R13 |
(561) 0x431123 MOV -0x30(%RBP),%RDI |
(561) 0x431127 MOV %R14,%RSI |
(561) 0x43112a MOV %R13,%RDX |
(561) 0x43112d MOV %RBX,%RCX |
(561) 0x431130 CALL 4e6ff0 <hypre_swap2> |
(561) 0x431135 JMP 430ec0 |
0x43113a ADD $0x38,%RSP |
0x43113e POP %RBX |
0x43113f POP %R12 |
0x431141 POP %R13 |
0x431143 POP %R14 |
0x431145 POP %R15 |
0x431147 POP %RBP |
0x431148 RET |
0x431149 NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►60.60+ | hypre_BoomerAMGInterpTruncatio[...] | par_interp.c:2912 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►30.31+ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_BoomerAMGInterpTruncatio[...] | par_interp.c:2912 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►3.03+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►3.03+ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_BoomerAMGInterpTruncatio[...] | par_interp.c:2912 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►3.03+ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_qsort2abs | par_interp.c:3191 | exec |
○ | hypre_BoomerAMGInterpTruncatio[...] | par_interp.c:2912 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 121 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 431148 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xce80a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430dea <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_interp.c:3180-3192 |
Module | exec |
nb instructions | 34 |
nb uops | 34 |
loop length | 121 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
cycles | 1.00 | 0.40 | 2.67 | 2.67 | 4.50 | 0.40 | 1.00 | 4.50 | 4.50 | 4.50 | 0.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.78 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.50 |
Overall L1 | 5.67 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 0% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 12% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 431148 <hypre_qsort2abs+0x3d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDDUP 0xce80a(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD %XMM0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 430dea <hypre_qsort2abs+0x7a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort2abs– | 0.01 | 0 |
▼Loop 559 - par_interp.c:3180-3191 - exec– | 0 | 0 |
○Loop 561 - par_interp.c:3184-3187 - exec | 0 | 0 |
○Loop 560 - par_interp.c:3180-3187 - exec | 0 | 0 |