Loop Id: 980 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.21% |
---|
Loop Id: 980 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.21% |
---|
0x4510c0 MOV -0x90(%RBP),%RDX |
0x4510c7 INC %RDX |
0x4510ca CMP -0x88(%RBP),%RDX |
0x4510d1 JGE 451d00 |
0x4510d7 MOV -0x100(%RBP),%RAX |
0x4510de MOV %RDX,-0x90(%RBP) |
0x4510e5 MOV (%RAX,%RDX,8),%R9 |
0x4510e9 MOV -0x108(%RBP),%RAX |
0x4510f0 MOV (%RAX,%R9,8),%R8 |
0x4510f4 MOV -0x98(%RBP),%RAX |
0x4510fb MOV (%RAX,%R9,8),%R12 |
0x4510ff MOV 0x8(%RAX,%R9,8),%RDI |
0x451104 LEA (%RDI,%R8,1),%RAX |
0x451108 SUB %R12,%RAX |
0x45110b CMP %RAX,%R8 |
0x45110e MOV %R9,-0x30(%RBP) |
0x451112 JGE 4512ef |
0x451118 MOV -0x60(%RBP),%RAX |
0x45111c MOV (%RAX),%RAX |
0x45111f MOV -0xe0(%RBP),%RCX |
0x451126 MOV (%RCX,%RAX,8),%RAX |
0x45112a MOV %RDI,%RCX |
0x45112d SUB %R12,%RCX |
0x451130 CMP $0xd,%RCX |
0x451134 JB 4512c0 |
0x45113a MOV %RDI,-0x80(%RBP) |
0x45113e LEA (%R14,%R12,8),%RDI |
0x451142 MOV %RCX,-0x78(%RBP) |
0x451146 LEA (,%RCX,8),%RDX |
0x45114e XOR %ESI,%ESI |
0x451150 MOV %RAX,-0x38(%RBP) |
0x451154 MOV %R8,-0x48(%RBP) |
0x451158 VZEROUPPER |
0x45115b CALL 527570 <__intel_avx_rep_memset> |
0x451160 MOV -0x78(%RBP),%RAX |
0x451164 MOV %RAX,%R10 |
0x451167 SHR $0x3,%RAX |
0x45116b MOV -0x48(%RBP),%RDX |
0x45116f MOV -0x38(%RBP),%RCX |
0x451173 LEA 0x38(%RCX,%RDX,8),%RCX |
0x451178 MOV -0xd8(%RBP),%RDX |
0x45117f LEA (%RDX,%R12,8),%RDX |
0x451183 MOV %RAX,%RSI |
0x451186 XOR %EDI,%EDI |
0x451188 MOV -0x50(%RBP),%R11 |
0x45118c NOPL (%RAX) |
(998) 0x451190 MOV -0x38(%RCX,%RDI,8),%R8 |
(998) 0x451195 LEA (%R12,%RDI,1),%R9 |
(998) 0x451199 MOV %R9,(%R11,%R8,8) |
(998) 0x45119d MOV %R8,-0x38(%RDX,%RDI,8) |
(998) 0x4511a2 MOV -0x30(%RCX,%RDI,8),%R8 |
(998) 0x4511a7 LEA 0x1(%R12,%RDI,1),%R9 |
(998) 0x4511ac MOV %R9,(%R11,%R8,8) |
(998) 0x4511b0 MOV %R8,-0x30(%RDX,%RDI,8) |
(998) 0x4511b5 MOV -0x28(%RCX,%RDI,8),%R8 |
(998) 0x4511ba LEA 0x2(%R12,%RDI,1),%R9 |
(998) 0x4511bf MOV %R9,(%R11,%R8,8) |
(998) 0x4511c3 MOV %R8,-0x28(%RDX,%RDI,8) |
(998) 0x4511c8 MOV -0x20(%RCX,%RDI,8),%R8 |
(998) 0x4511cd LEA 0x3(%R12,%RDI,1),%R9 |
(998) 0x4511d2 MOV %R9,(%R11,%R8,8) |
(998) 0x4511d6 MOV %R8,-0x20(%RDX,%RDI,8) |
(998) 0x4511db MOV -0x18(%RCX,%RDI,8),%R8 |
(998) 0x4511e0 LEA 0x4(%R12,%RDI,1),%R9 |
(998) 0x4511e5 MOV %R9,(%R11,%R8,8) |
(998) 0x4511e9 MOV %R8,-0x18(%RDX,%RDI,8) |
(998) 0x4511ee MOV -0x10(%RCX,%RDI,8),%R8 |
(998) 0x4511f3 LEA 0x5(%R12,%RDI,1),%R9 |
(998) 0x4511f8 MOV %R9,(%R11,%R8,8) |
(998) 0x4511fc MOV %R8,-0x10(%RDX,%RDI,8) |
(998) 0x451201 MOV -0x8(%RCX,%RDI,8),%R8 |
(998) 0x451206 LEA 0x6(%R12,%RDI,1),%R9 |
(998) 0x45120b MOV %R9,(%R11,%R8,8) |
(998) 0x45120f MOV %R8,-0x8(%RDX,%RDI,8) |
(998) 0x451214 MOV (%RCX,%RDI,8),%R8 |
(998) 0x451218 LEA 0x7(%R12,%RDI,1),%R9 |
(998) 0x45121d MOV %R9,(%R11,%R8,8) |
(998) 0x451221 MOV %R8,(%RDX,%RDI,8) |
(998) 0x451225 ADD $0x8,%RDI |
(998) 0x451229 DEC %RSI |
(998) 0x45122c JNE 451190 |
0x451232 MOV %R10,%RDX |
0x451235 MOV %R10,%RCX |
0x451238 AND $-0x8,%RCX |
0x45123c CMP %R10,%RCX |
0x45123f MOV -0x40(%RBP),%R10 |
0x451243 MOV -0x58(%RBP),%RDX |
0x451247 MOV -0x30(%RBP),%R9 |
0x45124b MOV -0x80(%RBP),%RSI |
0x45124f MOV -0x38(%RBP),%RDI |
0x451253 MOV -0x48(%RBP),%R8 |
0x451257 JAE 4512ef |
0x45125d ADD %RCX,%R12 |
0x451260 SAL $0x6,%RAX |
0x451264 LEA (%RAX,%R8,8),%RAX |
0x451268 ADD %RAX,%RDI |
0x45126b NOPL (%RAX,%RAX,1) |
(999) 0x451270 MOV (%RDI),%RAX |
(999) 0x451273 MOV %R12,(%R11,%RAX,8) |
(999) 0x451277 MOV %RAX,(%RDX,%R12,8) |
(999) 0x45127b INC %R12 |
(999) 0x45127e ADD $0x8,%RDI |
(999) 0x451282 CMP %R12,%RSI |
(999) 0x451285 JNE 451270 |
0x451287 JMP 4512ef |
0x4512c0 LEA (%RAX,%R8,8),%RAX |
0x4512c4 MOV -0x58(%RBP),%RDX |
0x4512c8 MOV -0x50(%RBP),%RSI |
0x4512cc NOPL (%RAX) |
(997) 0x4512d0 MOV (%RAX),%RCX |
(997) 0x4512d3 MOV %R12,(%RSI,%RCX,8) |
(997) 0x4512d7 MOVQ $0,(%R14,%R12,8) |
(997) 0x4512df MOV %RCX,(%RDX,%R12,8) |
(997) 0x4512e3 INC %R12 |
(997) 0x4512e6 ADD $0x8,%RAX |
(997) 0x4512ea CMP %R12,%RDI |
(997) 0x4512ed JNE 4512d0 |
0x4512ef MOV -0x110(%RBP),%RAX |
0x4512f6 MOV (%RAX,%R9,8),%RSI |
0x4512fa MOV -0xa0(%RBP),%RAX |
0x451301 MOV (%RAX,%R9,8),%R12 |
0x451305 MOV 0x8(%RAX,%R9,8),%RDX |
0x45130a LEA (%RDX,%RSI,1),%RAX |
0x45130e SUB %R12,%RAX |
0x451311 CMP %RAX,%RSI |
0x451314 JGE 4514b0 |
0x45131a MOV -0x60(%RBP),%RAX |
0x45131e MOV (%RAX),%RAX |
0x451321 MOV -0xe8(%RBP),%RCX |
0x451328 MOV (%RCX,%RAX,8),%RAX |
0x45132c MOV %RDX,%RCX |
0x45132f SUB %R12,%RCX |
0x451332 CMP $0xd,%RCX |
0x451336 JB 451480 |
0x45133c MOV %RDX,-0x48(%RBP) |
0x451340 LEA (%R13,%R12,8),%RDI |
0x451345 LEA (,%RCX,8),%RDX |
0x45134d MOV %RSI,-0x38(%RBP) |
0x451351 XOR %ESI,%ESI |
0x451353 MOV %RAX,-0x80(%RBP) |
0x451357 MOV %RCX,-0x78(%RBP) |
0x45135b VZEROUPPER |
0x45135e CALL 527570 <__intel_avx_rep_memset> |
0x451363 MOV -0x80(%RBP),%R11 |
0x451367 MOV -0x78(%RBP),%RAX |
0x45136b MOV %RAX,%R10 |
0x45136e SHR $0x3,%RAX |
0x451372 MOV -0x38(%RBP),%RCX |
0x451376 LEA 0x38(%R11,%RCX,8),%RCX |
0x45137b MOV -0xd0(%RBP),%RDX |
0x451382 LEA (%RDX,%R12,8),%RDX |
0x451386 MOV %RAX,%RSI |
0x451389 XOR %EDI,%EDI |
0x45138b NOPL (%RAX,%RAX,1) |
(995) 0x451390 MOV -0x38(%RCX,%RDI,8),%R8 |
(995) 0x451395 LEA (%R12,%RDI,1),%R9 |
(995) 0x451399 MOV %R9,(%R15,%R8,8) |
(995) 0x45139d MOV %R8,-0x38(%RDX,%RDI,8) |
(995) 0x4513a2 MOV -0x30(%RCX,%RDI,8),%R8 |
(995) 0x4513a7 LEA 0x1(%R12,%RDI,1),%R9 |
(995) 0x4513ac MOV %R9,(%R15,%R8,8) |
(995) 0x4513b0 MOV %R8,-0x30(%RDX,%RDI,8) |
(995) 0x4513b5 MOV -0x28(%RCX,%RDI,8),%R8 |
(995) 0x4513ba LEA 0x2(%R12,%RDI,1),%R9 |
(995) 0x4513bf MOV %R9,(%R15,%R8,8) |
(995) 0x4513c3 MOV %R8,-0x28(%RDX,%RDI,8) |
(995) 0x4513c8 MOV -0x20(%RCX,%RDI,8),%R8 |
(995) 0x4513cd LEA 0x3(%R12,%RDI,1),%R9 |
(995) 0x4513d2 MOV %R9,(%R15,%R8,8) |
(995) 0x4513d6 MOV %R8,-0x20(%RDX,%RDI,8) |
(995) 0x4513db MOV -0x18(%RCX,%RDI,8),%R8 |
(995) 0x4513e0 LEA 0x4(%R12,%RDI,1),%R9 |
(995) 0x4513e5 MOV %R9,(%R15,%R8,8) |
(995) 0x4513e9 MOV %R8,-0x18(%RDX,%RDI,8) |
(995) 0x4513ee MOV -0x10(%RCX,%RDI,8),%R8 |
(995) 0x4513f3 LEA 0x5(%R12,%RDI,1),%R9 |
(995) 0x4513f8 MOV %R9,(%R15,%R8,8) |
(995) 0x4513fc MOV %R8,-0x10(%RDX,%RDI,8) |
(995) 0x451401 MOV -0x8(%RCX,%RDI,8),%R8 |
(995) 0x451406 LEA 0x6(%R12,%RDI,1),%R9 |
(995) 0x45140b MOV %R9,(%R15,%R8,8) |
(995) 0x45140f MOV %R8,-0x8(%RDX,%RDI,8) |
(995) 0x451414 MOV (%RCX,%RDI,8),%R8 |
(995) 0x451418 LEA 0x7(%R12,%RDI,1),%R9 |
(995) 0x45141d MOV %R9,(%R15,%R8,8) |
(995) 0x451421 MOV %R8,(%RDX,%RDI,8) |
(995) 0x451425 ADD $0x8,%RDI |
(995) 0x451429 DEC %RSI |
(995) 0x45142c JNE 451390 |
0x451432 MOV %R10,%RDX |
0x451435 MOV %R10,%RCX |
0x451438 AND $-0x8,%RCX |
0x45143c CMP %R10,%RCX |
0x45143f MOV -0x40(%RBP),%R10 |
0x451443 MOV -0x30(%RBP),%R9 |
0x451447 MOV -0x48(%RBP),%RDX |
0x45144b MOV -0x38(%RBP),%RSI |
0x45144f JAE 4514b0 |
0x451451 ADD %RCX,%R12 |
0x451454 SAL $0x6,%RAX |
0x451458 LEA (%RAX,%RSI,8),%RAX |
0x45145c ADD %RAX,%R11 |
0x45145f NOP |
(996) 0x451460 MOV (%R11),%RAX |
(996) 0x451463 MOV %R12,(%R15,%RAX,8) |
(996) 0x451467 MOV %RAX,(%R10,%R12,8) |
(996) 0x45146b INC %R12 |
(996) 0x45146e ADD $0x8,%R11 |
(996) 0x451472 CMP %R12,%RDX |
(996) 0x451475 JNE 451460 |
0x451477 JMP 4514b0 |
0x451480 LEA (%RAX,%RSI,8),%RAX |
0x451484 NOPW %CS:(%RAX,%RAX,1) |
(994) 0x451490 MOV (%RAX),%RCX |
(994) 0x451493 MOV %R12,(%R15,%RCX,8) |
(994) 0x451497 MOVQ $0,(%R13,%R12,8) |
(994) 0x4514a0 MOV %RCX,(%R10,%R12,8) |
(994) 0x4514a4 INC %R12 |
(994) 0x4514a7 ADD $0x8,%RAX |
(994) 0x4514ab CMP %R12,%RDX |
(994) 0x4514ae JNE 451490 |
0x4514b0 MOV -0xb8(%RBP),%RCX |
0x4514b7 MOV (%RCX,%R9,8),%RAX |
0x4514bb MOV 0x8(%RCX,%R9,8),%RCX |
0x4514c0 CMP %RCX,%RAX |
0x4514c3 MOV -0x50(%RBP),%R12 |
0x4514c7 JGE 451540 |
0x4514c9 MOV -0x60(%RBP),%RDX |
0x4514cd MOV (%RDX),%RDX |
0x4514d0 DEC %RDX |
0x4514d3 JMP 451508 |
(993) 0x451500 INC %RAX |
(993) 0x451503 CMP %RCX,%RAX |
(993) 0x451506 JGE 451540 |
(993) 0x451508 MOV -0x168(%RBP),%RSI |
(993) 0x45150f MOV (%RSI,%RAX,8),%RSI |
(993) 0x451513 MOV -0x178(%RBP),%RDI |
(993) 0x45151a CMP %RDX,(%RDI,%RSI,8) |
(993) 0x45151e JNE 451500 |
(993) 0x451520 MOV -0x70(%RBP),%RCX |
(993) 0x451524 MOV %R9,(%RCX,%RSI,8) |
(993) 0x451528 MOV -0xb8(%RBP),%RCX |
(993) 0x45152f MOV 0x8(%RCX,%R9,8),%RCX |
(993) 0x451534 JMP 451500 |
0x451540 MOV -0xc0(%RBP),%RCX |
0x451547 MOV (%RCX,%R9,8),%RAX |
0x45154b MOV 0x8(%RCX,%R9,8),%RCX |
0x451550 CMP %RCX,%RAX |
0x451553 JGE 4515c0 |
0x451555 MOV -0x60(%RBP),%RDX |
0x451559 MOV (%RDX),%RDX |
0x45155c DEC %RDX |
0x45155f JMP 451588 |
(992) 0x451580 INC %RAX |
(992) 0x451583 CMP %RCX,%RAX |
(992) 0x451586 JGE 4515c0 |
(992) 0x451588 MOV -0x170(%RBP),%RSI |
(992) 0x45158f MOV (%RSI,%RAX,8),%RSI |
(992) 0x451593 MOV -0x180(%RBP),%RDI |
(992) 0x45159a CMP %RDX,(%RDI,%RSI,8) |
(992) 0x45159e JNE 451580 |
(992) 0x4515a0 MOV -0x68(%RBP),%RCX |
(992) 0x4515a4 MOV %R9,(%RCX,%RSI,8) |
(992) 0x4515a8 MOV -0xc0(%RBP),%RCX |
(992) 0x4515af MOV 0x8(%RCX,%R9,8),%RCX |
(992) 0x4515b4 JMP 451580 |
0x4515c0 MOV -0xf0(%RBP),%RAX |
0x4515c7 MOV (%RAX,%R9,8),%RCX |
0x4515cb MOV 0x8(%RAX,%R9,8),%R11 |
0x4515d0 LEA 0x1(%RCX),%RDX |
0x4515d4 VXORPD %XMM1,%XMM1,%XMM1 |
0x4515d8 CMP %R11,%RDX |
0x4515db MOV %RCX,-0x48(%RBP) |
0x4515df VXORPD %XMM0,%XMM0,%XMM0 |
0x4515e3 JGE 451a00 |
0x4515e9 MOV -0x58(%RBP),%RAX |
0x4515ed MOV %R11,-0x38(%RBP) |
0x4515f1 JMP 451614 |
(987) 0x451600 MOV -0x58(%RBP),%RAX |
(987) 0x451604 MOV -0x30(%RBP),%R9 |
(987) 0x451608 INC %RDX |
(987) 0x45160b CMP %R11,%RDX |
(987) 0x45160e JE 451a00 |
(987) 0x451614 MOV -0x158(%RBP),%RSI |
(987) 0x45161b MOV (%RSI,%RDX,8),%RSI |
(987) 0x45161f MOV -0x70(%RBP),%RDI |
(987) 0x451623 CMP %R9,(%RDI,%RSI,8) |
(987) 0x451627 JNE 451680 |
(987) 0x451629 MOV -0x98(%RBP),%R8 |
(987) 0x451630 MOV (%R8,%RSI,8),%RDI |
(987) 0x451634 MOV 0x8(%R8,%RSI,8),%R8 |
(987) 0x451639 MOV %R8,%R9 |
(987) 0x45163c SUB %RDI,%R9 |
(987) 0x45163f JLE 451808 |
(987) 0x451645 CMP $0x4,%R9 |
(987) 0x451649 JAE 4516c0 |
(987) 0x45164b JMP 451799 |
(987) 0x451680 MOV -0x140(%RBP),%RDI |
(987) 0x451687 CMPQ $-0x3,(%RDI,%RSI,8) |
(987) 0x45168c JE 451608 |
(987) 0x451692 CMPQ $0x1,-0xc8(%RBP) |
(987) 0x45169a JE 4516b1 |
(987) 0x45169c MOV -0xa8(%RBP),%R8 |
(987) 0x4516a3 MOV (%R8,%R9,8),%RDI |
(987) 0x4516a7 CMP (%R8,%RSI,8),%RDI |
(987) 0x4516ab JNE 451608 |
(987) 0x4516b1 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(987) 0x4516b6 JMP 451608 |
(987) 0x4516c0 MOV %R9,%R10 |
(987) 0x4516c3 SHR $0x2,%R10 |
(987) 0x4516c7 LEA 0x18(,%RDI,8),%R11 |
(987) 0x4516cf MOV %R12,%RCX |
(987) 0x4516d2 NOPW %CS:(%RAX,%RAX,1) |
(990) 0x4516e0 MOV -0x18(%RAX,%R11,1),%R12 |
(990) 0x4516e5 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(990) 0x4516ec VMOVSD (%RBX,%RDX,8),%XMM3 |
(990) 0x4516f1 MOV (%RCX,%R12,8),%R12 |
(990) 0x4516f5 VMOVSD (%R14,%R12,8),%XMM4 |
(990) 0x4516fb VFMADD231SD %XMM2,%XMM3,%XMM4 |
(990) 0x451700 VMOVSD %XMM4,(%R14,%R12,8) |
(990) 0x451706 MOV -0x10(%RAX,%R11,1),%R12 |
(990) 0x45170b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(990) 0x451712 VMOVSD (%RBX,%RDX,8),%XMM5 |
(990) 0x451717 MOV (%RCX,%R12,8),%R12 |
(990) 0x45171b VMOVSD (%R14,%R12,8),%XMM6 |
(990) 0x451721 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(990) 0x451726 VMOVSD %XMM6,(%R14,%R12,8) |
(990) 0x45172c MOV -0x8(%RAX,%R11,1),%R12 |
(990) 0x451731 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(990) 0x451738 VMOVSD (%RBX,%RDX,8),%XMM7 |
(990) 0x45173d MOV (%RCX,%R12,8),%R12 |
(990) 0x451741 VMOVSD (%R14,%R12,8),%XMM8 |
(990) 0x451747 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(990) 0x45174c VMOVSD %XMM8,(%R14,%R12,8) |
(990) 0x451752 VMOVSD (%R14,%R11,1),%XMM8 |
(990) 0x451758 VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(990) 0x45175f MOV (%RAX,%R11,1),%R12 |
(990) 0x451763 MOV (%RCX,%R12,8),%R12 |
(990) 0x451767 VADDSD (%R14,%R12,8),%XMM18,%XMM8 |
(990) 0x45176e VMOVSD %XMM8,(%R14,%R12,8) |
(990) 0x451774 VFMADD213SD %XMM18,%XMM5,%XMM4 |
(990) 0x45177a VFMADD231SD %XMM2,%XMM3,%XMM4 |
(990) 0x45177f VFMADD231SD %XMM6,%XMM7,%XMM4 |
(990) 0x451784 VADDSD %XMM1,%XMM4,%XMM1 |
(990) 0x451788 VADDSD %XMM0,%XMM4,%XMM0 |
(990) 0x45178c ADD $0x20,%R11 |
(990) 0x451790 DEC %R10 |
(990) 0x451793 JNE 4516e0 |
(987) 0x451799 MOV %R9,%R10 |
(987) 0x45179c AND $-0x4,%R10 |
(987) 0x4517a0 CMP %R9,%R10 |
(987) 0x4517a3 MOV -0x38(%RBP),%R11 |
(987) 0x4517a7 JAE 451800 |
(987) 0x4517a9 ADD %R10,%RDI |
(987) 0x4517ac MOV -0x40(%RBP),%R10 |
(987) 0x4517b0 MOV -0x50(%RBP),%R12 |
(987) 0x4517b4 NOPW %CS:(%RAX,%RAX,1) |
(991) 0x4517c0 MOV (%RAX,%RDI,8),%R9 |
(991) 0x4517c4 VMOVSD (%R14,%RDI,8),%XMM2 |
(991) 0x4517ca VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(991) 0x4517d1 MOV (%R12,%R9,8),%R9 |
(991) 0x4517d5 VADDSD (%R14,%R9,8),%XMM18,%XMM2 |
(991) 0x4517dc VMOVSD %XMM2,(%R14,%R9,8) |
(991) 0x4517e2 VADDSD %XMM1,%XMM18,%XMM1 |
(991) 0x4517e8 VADDSD %XMM0,%XMM18,%XMM0 |
(991) 0x4517ee INC %RDI |
(991) 0x4517f1 CMP %RDI,%R8 |
(991) 0x4517f4 JNE 4517c0 |
(987) 0x4517f6 JMP 451808 |
(987) 0x451800 MOV -0x40(%RBP),%R10 |
(987) 0x451804 MOV -0x50(%RBP),%R12 |
(987) 0x451808 MOV -0xa0(%RBP),%RAX |
(987) 0x45180f MOV (%RAX,%RSI,8),%RDI |
(987) 0x451813 MOV 0x8(%RAX,%RSI,8),%RSI |
(987) 0x451818 MOV %RSI,%R8 |
(987) 0x45181b SUB %RDI,%R8 |
(987) 0x45181e JLE 451600 |
(987) 0x451824 CMP $0x4,%R8 |
(987) 0x451828 JAE 451840 |
(987) 0x45182a JMP 451922 |
(987) 0x451840 MOV %R8,%R9 |
(987) 0x451843 SHR $0x2,%R9 |
(987) 0x451847 MOV %R10,%RAX |
(987) 0x45184a LEA 0x18(,%RDI,8),%R10 |
(987) 0x451852 NOPW %CS:(%RAX,%RAX,1) |
(988) 0x451860 MOV -0x18(%RAX,%R10,1),%R11 |
(988) 0x451865 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(988) 0x45186c VMOVSD (%RBX,%RDX,8),%XMM3 |
(988) 0x451871 MOV (%R15,%R11,8),%R11 |
(988) 0x451875 VMOVSD (%R13,%R11,8),%XMM4 |
(988) 0x45187c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(988) 0x451881 VMOVSD %XMM4,(%R13,%R11,8) |
(988) 0x451888 MOV -0x10(%RAX,%R10,1),%R11 |
(988) 0x45188d VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(988) 0x451894 VMOVSD (%RBX,%RDX,8),%XMM5 |
(988) 0x451899 MOV (%R15,%R11,8),%R11 |
(988) 0x45189d VMOVSD (%R13,%R11,8),%XMM6 |
(988) 0x4518a4 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(988) 0x4518a9 VMOVSD %XMM6,(%R13,%R11,8) |
(988) 0x4518b0 MOV -0x8(%RAX,%R10,1),%R11 |
(988) 0x4518b5 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(988) 0x4518bc VMOVSD (%RBX,%RDX,8),%XMM7 |
(988) 0x4518c1 MOV (%R15,%R11,8),%R11 |
(988) 0x4518c5 VMOVSD (%R13,%R11,8),%XMM8 |
(988) 0x4518cc VFMADD231SD %XMM6,%XMM7,%XMM8 |
(988) 0x4518d1 VMOVSD %XMM8,(%R13,%R11,8) |
(988) 0x4518d8 VMOVSD (%R13,%R10,1),%XMM8 |
(988) 0x4518df VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(988) 0x4518e6 MOV (%RAX,%R10,1),%R11 |
(988) 0x4518ea MOV (%R15,%R11,8),%R11 |
(988) 0x4518ee VADDSD (%R13,%R11,8),%XMM18,%XMM8 |
(988) 0x4518f6 VMOVSD %XMM8,(%R13,%R11,8) |
(988) 0x4518fd VFMADD213SD %XMM18,%XMM5,%XMM4 |
(988) 0x451903 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(988) 0x451908 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(988) 0x45190d VADDSD %XMM1,%XMM4,%XMM1 |
(988) 0x451911 VADDSD %XMM0,%XMM4,%XMM0 |
(988) 0x451915 ADD $0x20,%R10 |
(988) 0x451919 DEC %R9 |
(988) 0x45191c JNE 451860 |
(987) 0x451922 MOV %R8,%R9 |
(987) 0x451925 AND $-0x4,%R9 |
(987) 0x451929 CMP %R8,%R9 |
(987) 0x45192c JAE 4519c0 |
(987) 0x451932 ADD %R9,%RDI |
(987) 0x451935 MOV -0x40(%RBP),%R10 |
(987) 0x451939 MOV -0x58(%RBP),%RAX |
(987) 0x45193d MOV -0x30(%RBP),%R9 |
(987) 0x451941 MOV -0x38(%RBP),%R11 |
(987) 0x451945 NOPW %CS:(%RAX,%RAX,1) |
(989) 0x451950 MOV (%R10,%RDI,8),%R8 |
(989) 0x451954 VMOVSD (%R13,%RDI,8),%XMM2 |
(989) 0x45195b VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(989) 0x451962 MOV (%R15,%R8,8),%R8 |
(989) 0x451966 VADDSD (%R13,%R8,8),%XMM18,%XMM2 |
(989) 0x45196e VMOVSD %XMM2,(%R13,%R8,8) |
(989) 0x451975 VADDSD %XMM1,%XMM18,%XMM1 |
(989) 0x45197b VADDSD %XMM0,%XMM18,%XMM0 |
(989) 0x451981 INC %RDI |
(989) 0x451984 CMP %RDI,%RSI |
(989) 0x451987 JNE 451950 |
(987) 0x451989 JMP 451608 |
(987) 0x4519c0 MOV -0x40(%RBP),%R10 |
(987) 0x4519c4 MOV -0x58(%RBP),%RAX |
(987) 0x4519c8 MOV -0x30(%RBP),%R9 |
(987) 0x4519cc MOV -0x38(%RBP),%R11 |
(987) 0x4519d0 JMP 451608 |
0x451a00 MOV -0xf8(%RBP),%RAX |
0x451a07 MOV (%RAX,%R9,8),%RCX |
0x451a0b MOV 0x8(%RAX,%R9,8),%RDX |
0x451a10 CMP %RDX,%RCX |
0x451a13 JL 451ae0 |
0x451a19 MOV -0x48(%RBP),%RAX |
0x451a1d VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x451a22 VUCOMISD %XMM16,%XMM1 |
0x451a28 JE 451a36 |
0x451a2a VXORPD %XMM17,%XMM0,%XMM0 |
0x451a30 VDIVSD %XMM1,%XMM0,%XMM18 |
0x451a36 MOV -0x98(%RBP),%RAX |
0x451a3d MOV (%RAX,%R9,8),%RDX |
0x451a41 MOV 0x8(%RAX,%R9,8),%RAX |
0x451a46 MOV %RAX,%RSI |
0x451a49 SUB %RDX,%RSI |
0x451a4c JLE 451c65 |
0x451a52 MOV %RSI,%RCX |
0x451a55 AND $-0x4,%RCX |
0x451a59 JE 451c40 |
0x451a5f LEA -0x1(%RCX),%RDI |
0x451a63 VBROADCASTSD %XMM18,%YMM0 |
0x451a69 LEA (%R14,%RDX,8),%R8 |
0x451a6d XOR %R9D,%R9D |
(984) 0x451a70 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(984) 0x451a76 VMOVUPD %YMM1,(%R8,%R9,8) |
(984) 0x451a7c ADD $0x4,%R9 |
(984) 0x451a80 CMP %RDI,%R9 |
(984) 0x451a83 JBE 451a70 |
0x451a85 CMP %RCX,%RSI |
0x451a88 MOV -0x30(%RBP),%R9 |
0x451a8c JNE 451c42 |
0x451a92 JMP 451c65 |
(985) 0x451ac0 VADDSD (%R8,%RCX,8),%XMM0,%XMM0 |
(985) 0x451ac6 MOV %R12,%R11 |
(985) 0x451ac9 INC %RCX |
(985) 0x451acc CMP %RDX,%RCX |
(985) 0x451acf MOV -0x40(%RBP),%R10 |
(985) 0x451ad3 MOV %R11,%R12 |
(985) 0x451ad6 MOV -0x30(%RBP),%R9 |
(985) 0x451ada JE 451a19 |
(985) 0x451ae0 MOV -0x160(%RBP),%RAX |
(985) 0x451ae7 LEA (%RAX,%RCX,8),%RSI |
(985) 0x451aeb CMPQ $0,-0x188(%RBP) |
(985) 0x451af3 JE 451b03 |
(985) 0x451af5 MOV (%RSI),%RSI |
(985) 0x451af8 MOV -0x148(%RBP),%RDI |
(985) 0x451aff LEA (%RDI,%RSI,8),%RSI |
(985) 0x451b03 MOV (%RSI),%RDI |
(985) 0x451b06 TEST %RDI,%RDI |
(985) 0x451b09 JS 451bc0 |
(985) 0x451b0f MOV -0x68(%RBP),%RSI |
(985) 0x451b13 CMP %R9,(%RSI,%RDI,8) |
(985) 0x451b17 JNE 451bc0 |
(985) 0x451b1d MOV -0x138(%RBP),%RSI |
(985) 0x451b24 MOV 0x8(%RSI,%RDI,8),%RSI |
(985) 0x451b29 TEST %RSI,%RSI |
(985) 0x451b2c JLE 451ac6 |
(985) 0x451b2e MOV -0x120(%RBP),%R8 |
(985) 0x451b35 MOV (%R8,%RDI,8),%RDI |
(985) 0x451b39 ADD %RDI,%RSI |
(985) 0x451b3c MOV -0x60(%RBP),%R8 |
(985) 0x451b40 MOV (%R8),%R8 |
(985) 0x451b43 MOV -0x128(%RBP),%R9 |
(985) 0x451b4a MOV (%R9,%R8,8),%R8 |
(985) 0x451b4e MOV %R12,%R11 |
(985) 0x451b51 MOV -0xb0(%RBP),%R12 |
(985) 0x451b58 MOV -0x130(%RBP),%RAX |
(985) 0x451b5f NOP |
(986) 0x451b60 MOV (%R8,%RDI,8),%R9 |
(986) 0x451b64 VMOVSD (%RAX,%RDI,8),%XMM2 |
(986) 0x451b69 VMULSD (%R12,%RCX,8),%XMM2,%XMM18 |
(986) 0x451b70 TEST %R9,%R9 |
(986) 0x451b73 LEA (%R15,%R9,8),%R10 |
(986) 0x451b77 NOT %R9 |
(986) 0x451b7a LEA (%R11,%R9,8),%R9 |
(986) 0x451b7e CMOVNS %R10,%R9 |
(986) 0x451b82 MOV %R13,%R10 |
(986) 0x451b85 CMOVS %R14,%R10 |
(986) 0x451b89 MOV (%R9),%R9 |
(986) 0x451b8c VADDSD (%R10,%R9,8),%XMM18,%XMM2 |
(986) 0x451b93 VMOVSD %XMM2,(%R10,%R9,8) |
(986) 0x451b99 VADDSD %XMM1,%XMM18,%XMM1 |
(986) 0x451b9f VADDSD %XMM0,%XMM18,%XMM0 |
(986) 0x451ba5 INC %RDI |
(986) 0x451ba8 CMP %RSI,%RDI |
(986) 0x451bab JL 451b60 |
(985) 0x451bad JMP 451ac9 |
(985) 0x451bc0 MOV -0x150(%RBP),%RSI |
(985) 0x451bc7 CMPQ $-0x3,(%RSI,%RDI,8) |
(985) 0x451bcc JE 451ac6 |
(985) 0x451bd2 CMPQ $0x1,-0xc8(%RBP) |
(985) 0x451bda MOV -0xb0(%RBP),%R8 |
(985) 0x451be1 JE 451ac0 |
(985) 0x451be7 MOV -0x118(%RBP),%RSI |
(985) 0x451bee MOV (%RSI,%RDI,8),%RSI |
(985) 0x451bf2 MOV -0xa8(%RBP),%RDI |
(985) 0x451bf9 CMP (%RDI,%R9,8),%RSI |
(985) 0x451bfd JE 451ac0 |
(985) 0x451c03 JMP 451ac6 |
0x451c40 XOR %ECX,%ECX |
0x451c42 ADD %RDX,%RCX |
0x451c45 NOPW %CS:(%RAX,%RAX,1) |
(983) 0x451c50 VMULSD (%R14,%RCX,8),%XMM18,%XMM0 |
(983) 0x451c57 VMOVSD %XMM0,(%R14,%RCX,8) |
(983) 0x451c5d INC %RCX |
(983) 0x451c60 CMP %RCX,%RAX |
(983) 0x451c63 JNE 451c50 |
0x451c65 MOV -0xa0(%RBP),%RAX |
0x451c6c MOV (%RAX,%R9,8),%RDX |
0x451c70 MOV 0x8(%RAX,%R9,8),%RAX |
0x451c75 MOV %RAX,%RSI |
0x451c78 SUB %RDX,%RSI |
0x451c7b JLE 4510c0 |
0x451c81 MOV %RSI,%RCX |
0x451c84 AND $-0x4,%RCX |
0x451c88 JE 451cc0 |
0x451c8a LEA -0x1(%RCX),%RDI |
0x451c8e VBROADCASTSD %XMM18,%YMM0 |
0x451c94 LEA (%R13,%RDX,8),%R8 |
0x451c99 XOR %R9D,%R9D |
0x451c9c NOPL (%RAX) |
(982) 0x451ca0 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(982) 0x451ca6 VMOVUPD %YMM1,(%R8,%R9,8) |
(982) 0x451cac ADD $0x4,%R9 |
(982) 0x451cb0 CMP %RDI,%R9 |
(982) 0x451cb3 JBE 451ca0 |
0x451cb5 CMP %RCX,%RSI |
0x451cb8 JE 4510c0 |
0x451cbe JMP 451cc2 |
0x451cc0 XOR %ECX,%ECX |
0x451cc2 ADD %RDX,%RCX |
0x451cc5 NOPW %CS:(%RAX,%RAX,1) |
(981) 0x451cd0 VMULSD (%R13,%RCX,8),%XMM18,%XMM0 |
(981) 0x451cd8 VMOVSD %XMM0,(%R13,%RCX,8) |
(981) 0x451cdf INC %RCX |
(981) 0x451ce2 CMP %RCX,%RAX |
(981) 0x451ce5 JNE 451cd0 |
0x451ce7 JMP 4510c0 |
/home/eoseret/qaas_runs_CPU_9468/171-112-7443/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.78 |
CQA speedup if fully vectorized | 7.92 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.49 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.83 |
CQA cycles if no scalar integer | 13.67 |
CQA cycles if FP arith vectorized | 9.21 |
CQA cycles if fully vectorized | 4.40 |
Front-end cycles | 34.83 |
DIV/SQRT cycles | 12.00 |
P0 cycles | 11.80 |
P1 cycles | 23.33 |
P2 cycles | 23.33 |
P3 cycles | 7.00 |
P4 cycles | 11.60 |
P5 cycles | 12.00 |
P6 cycles | 7.00 |
P7 cycles | 7.00 |
P8 cycles | 7.00 |
P9 cycles | 11.60 |
P10 cycles | 23.33 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 34.49 - 34.51 |
Stall cycles (UFS) | 0.00 |
Nb insns | 204.00 |
Nb uops | 208.00 |
Nb loads | 70.00 |
Nb stores | 12.00 |
Nb stack references | 24.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.83 |
Bytes prefetched | 0.00 |
Bytes loaded | 560.00 |
Bytes stored | 96.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 12.82 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 21.74 |
Vector-efficiency ratio all | 14.10 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 15.22 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.78 |
CQA speedup if fully vectorized | 7.92 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.49 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.83 |
CQA cycles if no scalar integer | 13.67 |
CQA cycles if FP arith vectorized | 9.21 |
CQA cycles if fully vectorized | 4.40 |
Front-end cycles | 34.83 |
DIV/SQRT cycles | 12.00 |
P0 cycles | 11.80 |
P1 cycles | 23.33 |
P2 cycles | 23.33 |
P3 cycles | 7.00 |
P4 cycles | 11.60 |
P5 cycles | 12.00 |
P6 cycles | 7.00 |
P7 cycles | 7.00 |
P8 cycles | 7.00 |
P9 cycles | 11.60 |
P10 cycles | 23.33 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 34.49 - 34.51 |
Stall cycles (UFS) | 0.00 |
Nb insns | 204.00 |
Nb uops | 208.00 |
Nb loads | 70.00 |
Nb stores | 12.00 |
Nb stack references | 24.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.83 |
Bytes prefetched | 0.00 |
Bytes loaded | 560.00 |
Bytes stored | 96.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 12.82 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 21.74 |
Vector-efficiency ratio all | 14.10 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 15.22 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 204 |
nb uops | 208 |
loop length | 871 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 34.83 cycles |
front end | 34.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.00 | 11.80 | 23.33 | 23.33 | 7.00 | 11.60 | 12.00 | 7.00 | 7.00 | 7.00 | 11.60 | 23.33 |
cycles | 12.00 | 11.80 | 23.33 | 23.33 | 7.00 | 11.60 | 12.00 | 7.00 | 7.00 | 7.00 | 11.60 | 23.33 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 34.49-34.51 |
Stall cycles | 0.00 |
Front-end | 34.83 |
Dispatch | 23.33 |
DIV/SQRT | 4.00 |
Overall L1 | 34.83 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 12% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 21% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x88(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 451d00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4512c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x590> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 527570 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RDX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xd8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 451480 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 527570 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%R11,%RCX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xd0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 451540 <hypre_BoomerAMGBuildMultipass.extracted.28+0x810> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 451508 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7d8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4515c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x890> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 451588 <hypre_BoomerAMGBuildMultipass.extracted.28+0x858> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xf0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 451a00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xcd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 451614 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xf8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 451ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdb0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM16,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 451a36 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd06> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM17,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM18 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 451c65 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf35> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 451c40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 451c42 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf12> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 451c65 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf35> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 451cc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 451cc2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 204 |
nb uops | 208 |
loop length | 871 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 34.83 cycles |
front end | 34.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.00 | 11.80 | 23.33 | 23.33 | 7.00 | 11.60 | 12.00 | 7.00 | 7.00 | 7.00 | 11.60 | 23.33 |
cycles | 12.00 | 11.80 | 23.33 | 23.33 | 7.00 | 11.60 | 12.00 | 7.00 | 7.00 | 7.00 | 11.60 | 23.33 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 34.49-34.51 |
Stall cycles | 0.00 |
Front-end | 34.83 |
Dispatch | 23.33 |
DIV/SQRT | 4.00 |
Overall L1 | 34.83 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 12% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 21% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x88(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 451d00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4512c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x590> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 527570 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RDX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xd8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4512ef <hypre_BoomerAMGBuildMultipass.extracted.28+0x5bf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 451480 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 527570 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%R11,%RCX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xd0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4514b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 451540 <hypre_BoomerAMGBuildMultipass.extracted.28+0x810> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 451508 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7d8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4515c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x890> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 451588 <hypre_BoomerAMGBuildMultipass.extracted.28+0x858> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xf0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 451a00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xcd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 451614 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xf8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 451ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdb0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM16,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 451a36 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd06> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM17,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM18 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 451c65 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf35> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 451c40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 451c42 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf12> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 451c65 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf35> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 451cc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 451cc2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4510c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x390> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |