Loop Id: 1609 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.26% |
---|
Loop Id: 1609 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.26% |
---|
0x711c0 MOV -0xa8(%RBP),%RCX |
0x711c7 INC %RCX |
0x711ca CMP -0x98(%RBP),%RCX |
0x711d1 MOV -0x68(%RBP),%RDX |
0x711d5 JGE 71e80 |
0x711db MOV -0x120(%RBP),%RAX |
0x711e2 MOV %RCX,-0xa8(%RBP) |
0x711e9 MOV (%RAX,%RCX,8),%R9 |
0x711ed MOV -0x128(%RBP),%RAX |
0x711f4 MOV (%RAX,%R9,8),%RCX |
0x711f8 MOV -0xb0(%RBP),%RAX |
0x711ff MOV (%RAX,%R9,8),%R12 |
0x71203 MOV 0x8(%RAX,%R9,8),%R8 |
0x71208 LEA (%R8,%RCX,1),%RAX |
0x7120c SUB %R12,%RAX |
0x7120f MOV %RCX,-0x30(%RBP) |
0x71213 CMP %RAX,%RCX |
0x71216 MOV -0xa0(%RBP),%RSI |
0x7121d MOV %R9,-0x48(%RBP) |
0x71221 JGE 7142f |
0x71227 MOV -0x60(%RBP),%RAX |
0x7122b MOV -0x100(%RBP),%RCX |
0x71232 MOV (%RCX,%RAX,8),%RAX |
0x71236 MOV %R8,%RCX |
0x71239 SUB %R12,%RCX |
0x7123c CMP $0xc,%RCX |
0x71240 JBE 71400 |
0x71246 MOV %R8,-0x90(%RBP) |
0x7124d LEA (%R14,%R12,8),%RDI |
0x71251 LEA (,%RCX,8),%RDX |
0x71259 XOR %ESI,%ESI |
0x7125b MOV %RAX,-0x40(%RBP) |
0x7125f MOV %RCX,-0x88(%RBP) |
0x71266 VZEROUPPER |
0x71269 CALL e4a0 <__intel_avx_rep_memset@plt> |
0x7126e MOV -0x88(%RBP),%RSI |
0x71275 MOV %RSI,%R11 |
0x71278 SHR $0x3,%RSI |
0x7127c MOV -0x30(%RBP),%RAX |
0x71280 MOV -0x40(%RBP),%RCX |
0x71284 LEA 0x38(%RCX,%RAX,8),%RCX |
0x71289 MOV -0xf8(%RBP),%RDX |
0x71290 LEA (%RDX,%R12,8),%RDX |
0x71294 MOV %RSI,-0xc0(%RBP) |
0x7129b XOR %EDI,%EDI |
0x7129d MOV -0x58(%RBP),%R10 |
0x712a1 NOPW %CS:(%RAX,%RAX,1) |
(1627) 0x712b0 MOV -0x38(%RCX,%RDI,8),%R8 |
(1627) 0x712b5 LEA (%R12,%RDI,1),%R9 |
(1627) 0x712b9 MOV %R9,(%R10,%R8,8) |
(1627) 0x712bd MOV %R8,-0x38(%RDX,%RDI,8) |
(1627) 0x712c2 MOV -0x30(%RCX,%RDI,8),%R8 |
(1627) 0x712c7 LEA 0x1(%R12,%RDI,1),%R9 |
(1627) 0x712cc MOV %R9,(%R10,%R8,8) |
(1627) 0x712d0 MOV %R8,-0x30(%RDX,%RDI,8) |
(1627) 0x712d5 MOV -0x28(%RCX,%RDI,8),%R8 |
(1627) 0x712da LEA 0x2(%R12,%RDI,1),%R9 |
(1627) 0x712df MOV %R9,(%R10,%R8,8) |
(1627) 0x712e3 MOV %R8,-0x28(%RDX,%RDI,8) |
(1627) 0x712e8 MOV -0x20(%RCX,%RDI,8),%R8 |
(1627) 0x712ed LEA 0x3(%R12,%RDI,1),%R9 |
(1627) 0x712f2 MOV %R9,(%R10,%R8,8) |
(1627) 0x712f6 MOV %R8,-0x20(%RDX,%RDI,8) |
(1627) 0x712fb MOV -0x18(%RCX,%RDI,8),%R8 |
(1627) 0x71300 LEA 0x4(%R12,%RDI,1),%R9 |
(1627) 0x71305 MOV %R9,(%R10,%R8,8) |
(1627) 0x71309 MOV %R8,-0x18(%RDX,%RDI,8) |
(1627) 0x7130e MOV -0x10(%RCX,%RDI,8),%R8 |
(1627) 0x71313 LEA 0x5(%R12,%RDI,1),%R9 |
(1627) 0x71318 MOV %R9,(%R10,%R8,8) |
(1627) 0x7131c MOV %R8,-0x10(%RDX,%RDI,8) |
(1627) 0x71321 MOV -0x8(%RCX,%RDI,8),%R8 |
(1627) 0x71326 LEA 0x6(%R12,%RDI,1),%R9 |
(1627) 0x7132b MOV %R9,(%R10,%R8,8) |
(1627) 0x7132f MOV %R8,-0x8(%RDX,%RDI,8) |
(1627) 0x71334 MOV (%RCX,%RDI,8),%R8 |
(1627) 0x71338 LEA 0x7(%R12,%RDI,1),%R9 |
(1627) 0x7133d MOV %R9,(%R10,%R8,8) |
(1627) 0x71341 MOV %R8,(%RDX,%RDI,8) |
(1627) 0x71345 ADD $0x8,%RDI |
(1627) 0x71349 DEC %RSI |
(1627) 0x7134c JNE 712b0 |
0x71352 MOV %R11,%RCX |
0x71355 MOV %R11,%RAX |
0x71358 AND $-0x8,%RAX |
0x7135c CMP %R11,%RAX |
0x7135f MOV -0x50(%RBP),%R10 |
0x71363 MOV -0xa0(%RBP),%RSI |
0x7136a MOV -0x38(%RBP),%R11 |
0x7136e MOV -0x68(%RBP),%RDX |
0x71372 MOV -0x48(%RBP),%R9 |
0x71376 MOV -0x90(%RBP),%R8 |
0x7137d MOV -0x40(%RBP),%RCX |
0x71381 JAE 7142f |
0x71387 ADD %RAX,%R12 |
0x7138a SALQ $0x6,-0xc0(%RBP) |
0x71392 MOV -0x30(%RBP),%RAX |
0x71396 MOV -0xc0(%RBP),%RDI |
0x7139d LEA (%RDI,%RAX,8),%RAX |
0x713a1 MOV -0x58(%RBP),%RDI |
0x713a5 ADD %RAX,%RCX |
0x713a8 NOPL (%RAX,%RAX,1) |
(1628) 0x713b0 MOV (%RCX),%RAX |
(1628) 0x713b3 MOV %R12,(%RDI,%RAX,8) |
(1628) 0x713b7 MOV %RAX,(%RDX,%R12,8) |
(1628) 0x713bb INC %R12 |
(1628) 0x713be ADD $0x8,%RCX |
(1628) 0x713c2 CMP %R12,%R8 |
(1628) 0x713c5 JNE 713b0 |
0x713c7 JMP 7142f |
0x71400 MOV -0x30(%RBP),%RCX |
0x71404 LEA (%RAX,%RCX,8),%RAX |
0x71408 MOV -0x58(%RBP),%RDI |
0x7140c NOPL (%RAX) |
(1626) 0x71410 MOV (%RAX),%RCX |
(1626) 0x71413 MOV %R12,(%RDI,%RCX,8) |
(1626) 0x71417 MOVQ $0,(%R14,%R12,8) |
(1626) 0x7141f MOV %RCX,(%RDX,%R12,8) |
(1626) 0x71423 INC %R12 |
(1626) 0x71426 ADD $0x8,%RAX |
(1626) 0x7142a CMP %R12,%R8 |
(1626) 0x7142d JNE 71410 |
0x7142f MOV -0x130(%RBP),%RAX |
0x71436 MOV (%RAX,%R9,8),%R8 |
0x7143a MOV -0xb8(%RBP),%RAX |
0x71441 MOV (%RAX,%R9,8),%R12 |
0x71445 MOV 0x8(%RAX,%R9,8),%RDX |
0x7144a LEA (%RDX,%R8,1),%RAX |
0x7144e SUB %R12,%RAX |
0x71451 CMP %RAX,%R8 |
0x71454 JGE 71630 |
0x7145a MOV -0x60(%RBP),%RAX |
0x7145e MOV -0x108(%RBP),%RCX |
0x71465 MOV (%RCX,%RAX,8),%RAX |
0x71469 MOV %RDX,%RCX |
0x7146c SUB %R12,%RCX |
0x7146f CMP $0xc,%RCX |
0x71473 JBE 71600 |
0x71479 MOV %RDX,-0x90(%RBP) |
0x71480 MOV %RAX,-0x30(%RBP) |
0x71484 LEA (%R13,%R12,8),%RDI |
0x71489 LEA (,%RCX,8),%RDX |
0x71491 XOR %ESI,%ESI |
0x71493 MOV %R8,-0x40(%RBP) |
0x71497 MOV %RCX,-0x88(%RBP) |
0x7149e VZEROUPPER |
0x714a1 CALL e4a0 <__intel_avx_rep_memset@plt> |
0x714a6 MOV -0x88(%RBP),%RAX |
0x714ad MOV %RAX,%R10 |
0x714b0 SHR $0x3,%RAX |
0x714b4 MOV -0x30(%RBP),%RCX |
0x714b8 MOV -0x40(%RBP),%RDX |
0x714bc LEA 0x38(%RCX,%RDX,8),%RCX |
0x714c1 MOV -0xf0(%RBP),%RDX |
0x714c8 LEA (%RDX,%R12,8),%RDX |
0x714cc MOV %RAX,%RSI |
0x714cf XOR %EDI,%EDI |
0x714d1 NOPW %CS:(%RAX,%RAX,1) |
(1624) 0x714e0 MOV -0x38(%RCX,%RDI,8),%R8 |
(1624) 0x714e5 LEA (%R12,%RDI,1),%R9 |
(1624) 0x714e9 MOV %R9,(%R15,%R8,8) |
(1624) 0x714ed MOV %R8,-0x38(%RDX,%RDI,8) |
(1624) 0x714f2 MOV -0x30(%RCX,%RDI,8),%R8 |
(1624) 0x714f7 LEA 0x1(%R12,%RDI,1),%R9 |
(1624) 0x714fc MOV %R9,(%R15,%R8,8) |
(1624) 0x71500 MOV %R8,-0x30(%RDX,%RDI,8) |
(1624) 0x71505 MOV -0x28(%RCX,%RDI,8),%R8 |
(1624) 0x7150a LEA 0x2(%R12,%RDI,1),%R9 |
(1624) 0x7150f MOV %R9,(%R15,%R8,8) |
(1624) 0x71513 MOV %R8,-0x28(%RDX,%RDI,8) |
(1624) 0x71518 MOV -0x20(%RCX,%RDI,8),%R8 |
(1624) 0x7151d LEA 0x3(%R12,%RDI,1),%R9 |
(1624) 0x71522 MOV %R9,(%R15,%R8,8) |
(1624) 0x71526 MOV %R8,-0x20(%RDX,%RDI,8) |
(1624) 0x7152b MOV -0x18(%RCX,%RDI,8),%R8 |
(1624) 0x71530 LEA 0x4(%R12,%RDI,1),%R9 |
(1624) 0x71535 MOV %R9,(%R15,%R8,8) |
(1624) 0x71539 MOV %R8,-0x18(%RDX,%RDI,8) |
(1624) 0x7153e MOV -0x10(%RCX,%RDI,8),%R8 |
(1624) 0x71543 LEA 0x5(%R12,%RDI,1),%R9 |
(1624) 0x71548 MOV %R9,(%R15,%R8,8) |
(1624) 0x7154c MOV %R8,-0x10(%RDX,%RDI,8) |
(1624) 0x71551 MOV -0x8(%RCX,%RDI,8),%R8 |
(1624) 0x71556 LEA 0x6(%R12,%RDI,1),%R9 |
(1624) 0x7155b MOV %R9,(%R15,%R8,8) |
(1624) 0x7155f MOV %R8,-0x8(%RDX,%RDI,8) |
(1624) 0x71564 MOV (%RCX,%RDI,8),%R8 |
(1624) 0x71568 LEA 0x7(%R12,%RDI,1),%R9 |
(1624) 0x7156d MOV %R9,(%R15,%R8,8) |
(1624) 0x71571 MOV %R8,(%RDX,%RDI,8) |
(1624) 0x71575 ADD $0x8,%RDI |
(1624) 0x71579 DEC %RSI |
(1624) 0x7157c JNE 714e0 |
0x71582 MOV %R10,%RDX |
0x71585 MOV %R10,%RCX |
0x71588 AND $-0x8,%RCX |
0x7158c CMP %R10,%RCX |
0x7158f MOV -0x50(%RBP),%R10 |
0x71593 MOV -0xa0(%RBP),%RSI |
0x7159a MOV -0x38(%RBP),%R11 |
0x7159e MOV -0x48(%RBP),%R9 |
0x715a2 MOV -0x90(%RBP),%RDX |
0x715a9 MOV -0x30(%RBP),%RDI |
0x715ad MOV -0x40(%RBP),%R8 |
0x715b1 JAE 71630 |
0x715b7 ADD %RCX,%R12 |
0x715ba SAL $0x6,%RAX |
0x715be LEA (%RAX,%R8,8),%RAX |
0x715c2 ADD %RAX,%RDI |
0x715c5 NOPW %CS:(%RAX,%RAX,1) |
(1625) 0x715d0 MOV (%RDI),%RAX |
(1625) 0x715d3 MOV %R12,(%R15,%RAX,8) |
(1625) 0x715d7 MOV %RAX,(%R11,%R12,8) |
(1625) 0x715db INC %R12 |
(1625) 0x715de ADD $0x8,%RDI |
(1625) 0x715e2 CMP %R12,%RDX |
(1625) 0x715e5 JNE 715d0 |
0x715e7 JMP 71630 |
0x71600 LEA (%RAX,%R8,8),%RAX |
0x71604 NOPW %CS:(%RAX,%RAX,1) |
(1623) 0x71610 MOV (%RAX),%RCX |
(1623) 0x71613 MOV %R12,(%R15,%RCX,8) |
(1623) 0x71617 MOVQ $0,(%R13,%R12,8) |
(1623) 0x71620 MOV %RCX,(%R11,%R12,8) |
(1623) 0x71624 INC %R12 |
(1623) 0x71627 ADD $0x8,%RAX |
(1623) 0x7162b CMP %R12,%RDX |
(1623) 0x7162e JNE 71610 |
0x71630 MOV -0xd0(%RBP),%RCX |
0x71637 MOV (%RCX,%R9,8),%RAX |
0x7163b MOV 0x8(%RCX,%R9,8),%RCX |
0x71640 JMP 71683 |
(1622) 0x71680 INC %RAX |
(1622) 0x71683 CMP %RCX,%RAX |
(1622) 0x71686 JGE 716c0 |
(1622) 0x71688 MOV -0x180(%RBP),%RDX |
(1622) 0x7168f MOV (%RDX,%RAX,8),%RDX |
(1622) 0x71693 MOV -0x190(%RBP),%RDI |
(1622) 0x7169a MOV -0x80(%RBP),%R8 |
(1622) 0x7169e CMP %R8,(%RDI,%RDX,8) |
(1622) 0x716a2 JNE 71680 |
(1622) 0x716a4 MOV -0x78(%RBP),%RCX |
(1622) 0x716a8 MOV %R9,(%RCX,%RDX,8) |
(1622) 0x716ac MOV -0xd0(%RBP),%RCX |
(1622) 0x716b3 MOV 0x8(%RCX,%R9,8),%RCX |
(1622) 0x716b8 JMP 71680 |
0x716c0 MOV -0xd8(%RBP),%RCX |
0x716c7 MOV (%RCX,%R9,8),%RAX |
0x716cb MOV 0x8(%RCX,%R9,8),%RCX |
0x716d0 JMP 71703 |
(1621) 0x71700 INC %RAX |
(1621) 0x71703 CMP %RCX,%RAX |
(1621) 0x71706 JGE 71740 |
(1621) 0x71708 MOV -0x188(%RBP),%RDX |
(1621) 0x7170f MOV (%RDX,%RAX,8),%RDX |
(1621) 0x71713 MOV -0x80(%RBP),%RDI |
(1621) 0x71717 CMP %RDI,(%RSI,%RDX,8) |
(1621) 0x7171b JNE 71700 |
(1621) 0x7171d MOV -0x70(%RBP),%RCX |
(1621) 0x71721 MOV %R9,(%RCX,%RDX,8) |
(1621) 0x71725 MOV -0xd8(%RBP),%RCX |
(1621) 0x7172c MOV 0x8(%RCX,%R9,8),%RCX |
(1621) 0x71731 JMP 71700 |
0x71740 MOV -0x110(%RBP),%RAX |
0x71747 MOV (%RAX,%R9,8),%RCX |
0x7174b MOV 0x8(%RAX,%R9,8),%R12 |
0x71750 LEA 0x1(%RCX),%RDX |
0x71754 VXORPD %XMM1,%XMM1,%XMM1 |
0x71758 CMP %R12,%RDX |
0x7175b MOV %RCX,-0x40(%RBP) |
0x7175f VXORPD %XMM0,%XMM0,%XMM0 |
0x71763 JGE 71b40 |
0x71769 MOV %R12,-0x30(%RBP) |
0x7176d JMP 71794 |
(1616) 0x71780 MOV -0x50(%RBP),%R10 |
(1616) 0x71784 MOV -0x48(%RBP),%R9 |
(1616) 0x71788 INC %RDX |
(1616) 0x7178b CMP %R12,%RDX |
(1616) 0x7178e JE 71b40 |
(1616) 0x71794 MOV -0x170(%RBP),%RAX |
(1616) 0x7179b MOV (%RAX,%RDX,8),%RSI |
(1616) 0x7179f MOV -0x78(%RBP),%RAX |
(1616) 0x717a3 CMP %R9,(%RAX,%RSI,8) |
(1616) 0x717a7 JNE 71800 |
(1616) 0x717a9 MOV -0xb0(%RBP),%RAX |
(1616) 0x717b0 MOV (%RAX,%RSI,8),%RDI |
(1616) 0x717b4 MOV 0x8(%RAX,%RSI,8),%R8 |
(1616) 0x717b9 MOV %R8,%R9 |
(1616) 0x717bc SUB %RDI,%R9 |
(1616) 0x717bf JLE 7198c |
(1616) 0x717c5 CMP $0x4,%R9 |
(1616) 0x717c9 JAE 71840 |
(1616) 0x717cb JMP 71919 |
(1616) 0x71800 MOV -0x158(%RBP),%RDI |
(1616) 0x71807 CMPQ $-0x3,(%RDI,%RSI,8) |
(1616) 0x7180c JE 71788 |
(1616) 0x71812 CMPQ $0x1,-0xe0(%RBP) |
(1616) 0x7181a JE 71831 |
(1616) 0x7181c MOV -0xc8(%RBP),%R8 |
(1616) 0x71823 MOV (%R8,%R9,8),%RDI |
(1616) 0x71827 CMP (%R8,%RSI,8),%RDI |
(1616) 0x7182b JNE 71788 |
(1616) 0x71831 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(1616) 0x71836 JMP 71788 |
(1616) 0x71840 MOV %R9,%R10 |
(1616) 0x71843 SHR $0x2,%R10 |
(1616) 0x71847 LEA 0x18(,%RDI,8),%R11 |
(1616) 0x7184f MOV -0x68(%RBP),%RAX |
(1616) 0x71853 MOV -0x58(%RBP),%RCX |
(1616) 0x71857 NOPW (%RAX,%RAX,1) |
(1619) 0x71860 MOV -0x18(%RAX,%R11,1),%R12 |
(1619) 0x71865 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(1619) 0x7186c VMOVSD (%RBX,%RDX,8),%XMM3 |
(1619) 0x71871 MOV (%RCX,%R12,8),%R12 |
(1619) 0x71875 VMOVSD (%R14,%R12,8),%XMM4 |
(1619) 0x7187b VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1619) 0x71880 VMOVSD %XMM4,(%R14,%R12,8) |
(1619) 0x71886 MOV -0x10(%RAX,%R11,1),%R12 |
(1619) 0x7188b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(1619) 0x71892 VMOVSD (%RBX,%RDX,8),%XMM5 |
(1619) 0x71897 MOV (%RCX,%R12,8),%R12 |
(1619) 0x7189b VMOVSD (%R14,%R12,8),%XMM6 |
(1619) 0x718a1 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(1619) 0x718a6 VMOVSD %XMM6,(%R14,%R12,8) |
(1619) 0x718ac MOV -0x8(%RAX,%R11,1),%R12 |
(1619) 0x718b1 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(1619) 0x718b8 VMOVSD (%RBX,%RDX,8),%XMM7 |
(1619) 0x718bd MOV (%RCX,%R12,8),%R12 |
(1619) 0x718c1 VMOVSD (%R14,%R12,8),%XMM8 |
(1619) 0x718c7 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(1619) 0x718cc VMOVSD %XMM8,(%R14,%R12,8) |
(1619) 0x718d2 VMOVSD (%R14,%R11,1),%XMM8 |
(1619) 0x718d8 VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(1619) 0x718df MOV (%RAX,%R11,1),%R12 |
(1619) 0x718e3 MOV (%RCX,%R12,8),%R12 |
(1619) 0x718e7 VADDSD (%R14,%R12,8),%XMM18,%XMM8 |
(1619) 0x718ee VMOVSD %XMM8,(%R14,%R12,8) |
(1619) 0x718f4 VFMADD213SD %XMM18,%XMM5,%XMM4 |
(1619) 0x718fa VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1619) 0x718ff VFMADD231SD %XMM6,%XMM7,%XMM4 |
(1619) 0x71904 VADDSD %XMM1,%XMM4,%XMM1 |
(1619) 0x71908 VADDSD %XMM0,%XMM4,%XMM0 |
(1619) 0x7190c ADD $0x20,%R11 |
(1619) 0x71910 DEC %R10 |
(1619) 0x71913 JNE 71860 |
(1616) 0x71919 MOV %R9,%R10 |
(1616) 0x7191c AND $-0x4,%R10 |
(1616) 0x71920 CMP %R9,%R10 |
(1616) 0x71923 JAE 71980 |
(1616) 0x71925 ADD %R10,%RDI |
(1616) 0x71928 MOV -0x50(%RBP),%R10 |
(1616) 0x7192c MOV -0x38(%RBP),%R11 |
(1616) 0x71930 MOV -0x68(%RBP),%RAX |
(1616) 0x71934 MOV -0x58(%RBP),%RCX |
(1616) 0x71938 MOV -0x30(%RBP),%R12 |
(1616) 0x7193c NOPL (%RAX) |
(1620) 0x71940 MOV (%RAX,%RDI,8),%R9 |
(1620) 0x71944 VMOVSD (%R14,%RDI,8),%XMM2 |
(1620) 0x7194a VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(1620) 0x71951 MOV (%RCX,%R9,8),%R9 |
(1620) 0x71955 VADDSD (%R14,%R9,8),%XMM18,%XMM2 |
(1620) 0x7195c VMOVSD %XMM2,(%R14,%R9,8) |
(1620) 0x71962 VADDSD %XMM1,%XMM18,%XMM1 |
(1620) 0x71968 VADDSD %XMM0,%XMM18,%XMM0 |
(1620) 0x7196e INC %RDI |
(1620) 0x71971 CMP %RDI,%R8 |
(1620) 0x71974 JNE 71940 |
(1616) 0x71976 JMP 7198c |
(1616) 0x71980 MOV -0x50(%RBP),%R10 |
(1616) 0x71984 MOV -0x38(%RBP),%R11 |
(1616) 0x71988 MOV -0x30(%RBP),%R12 |
(1616) 0x7198c MOV -0xb8(%RBP),%R8 |
(1616) 0x71993 MOV (%R8,%RSI,8),%RDI |
(1616) 0x71997 MOV 0x8(%R8,%RSI,8),%RSI |
(1616) 0x7199c MOV %RSI,%R8 |
(1616) 0x7199f SUB %RDI,%R8 |
(1616) 0x719a2 JLE 71784 |
(1616) 0x719a8 CMP $0x4,%R8 |
(1616) 0x719ac JAE 719c0 |
(1616) 0x719ae JMP 71aa6 |
(1616) 0x719c0 MOV %R8,%R9 |
(1616) 0x719c3 SHR $0x2,%R9 |
(1616) 0x719c7 LEA 0x18(,%RDI,8),%R10 |
(1616) 0x719cf NOP |
(1617) 0x719d0 MOV -0x38(%RBP),%R11 |
(1617) 0x719d4 MOV -0x18(%R11,%R10,1),%R11 |
(1617) 0x719d9 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(1617) 0x719e0 VMOVSD (%RBX,%RDX,8),%XMM3 |
(1617) 0x719e5 MOV (%R15,%R11,8),%R11 |
(1617) 0x719e9 VMOVSD (%R13,%R11,8),%XMM4 |
(1617) 0x719f0 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1617) 0x719f5 VMOVSD %XMM4,(%R13,%R11,8) |
(1617) 0x719fc MOV -0x38(%RBP),%R11 |
(1617) 0x71a00 MOV -0x10(%R11,%R10,1),%R11 |
(1617) 0x71a05 VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(1617) 0x71a0c VMOVSD (%RBX,%RDX,8),%XMM5 |
(1617) 0x71a11 MOV (%R15,%R11,8),%R11 |
(1617) 0x71a15 VMOVSD (%R13,%R11,8),%XMM6 |
(1617) 0x71a1c VFMADD231SD %XMM4,%XMM5,%XMM6 |
(1617) 0x71a21 VMOVSD %XMM6,(%R13,%R11,8) |
(1617) 0x71a28 MOV -0x38(%RBP),%R11 |
(1617) 0x71a2c MOV -0x8(%R11,%R10,1),%R11 |
(1617) 0x71a31 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(1617) 0x71a38 VMOVSD (%RBX,%RDX,8),%XMM7 |
(1617) 0x71a3d MOV (%R15,%R11,8),%R11 |
(1617) 0x71a41 VMOVSD (%R13,%R11,8),%XMM8 |
(1617) 0x71a48 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(1617) 0x71a4d VMOVSD %XMM8,(%R13,%R11,8) |
(1617) 0x71a54 VMOVSD (%R13,%R10,1),%XMM8 |
(1617) 0x71a5b VMULSD (%RBX,%RDX,8),%XMM8,%XMM18 |
(1617) 0x71a62 MOV -0x38(%RBP),%R11 |
(1617) 0x71a66 MOV (%R11,%R10,1),%R11 |
(1617) 0x71a6a MOV (%R15,%R11,8),%R11 |
(1617) 0x71a6e VADDSD (%R13,%R11,8),%XMM18,%XMM8 |
(1617) 0x71a76 VMOVSD %XMM8,(%R13,%R11,8) |
(1617) 0x71a7d MOV -0x38(%RBP),%R11 |
(1617) 0x71a81 VFMADD213SD %XMM18,%XMM5,%XMM4 |
(1617) 0x71a87 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(1617) 0x71a8c VFMADD231SD %XMM6,%XMM7,%XMM4 |
(1617) 0x71a91 VADDSD %XMM1,%XMM4,%XMM1 |
(1617) 0x71a95 VADDSD %XMM0,%XMM4,%XMM0 |
(1617) 0x71a99 ADD $0x20,%R10 |
(1617) 0x71a9d DEC %R9 |
(1617) 0x71aa0 JNE 719d0 |
(1616) 0x71aa6 MOV %R8,%R9 |
(1616) 0x71aa9 AND $-0x4,%R9 |
(1616) 0x71aad CMP %R8,%R9 |
(1616) 0x71ab0 JAE 71780 |
(1616) 0x71ab6 ADD %R9,%RDI |
(1616) 0x71ab9 MOV -0x50(%RBP),%R10 |
(1616) 0x71abd MOV -0x48(%RBP),%R9 |
(1616) 0x71ac1 NOPW %CS:(%RAX,%RAX,1) |
(1618) 0x71ad0 MOV (%R11,%RDI,8),%R8 |
(1618) 0x71ad4 VMOVSD (%R13,%RDI,8),%XMM2 |
(1618) 0x71adb VMULSD (%RBX,%RDX,8),%XMM2,%XMM18 |
(1618) 0x71ae2 MOV (%R15,%R8,8),%R8 |
(1618) 0x71ae6 VADDSD (%R13,%R8,8),%XMM18,%XMM2 |
(1618) 0x71aee VMOVSD %XMM2,(%R13,%R8,8) |
(1618) 0x71af5 VADDSD %XMM1,%XMM18,%XMM1 |
(1618) 0x71afb VADDSD %XMM0,%XMM18,%XMM0 |
(1618) 0x71b01 INC %RDI |
(1618) 0x71b04 CMP %RDI,%RSI |
(1618) 0x71b07 JNE 71ad0 |
(1616) 0x71b09 JMP 71788 |
0x71b40 MOV -0x118(%RBP),%RAX |
0x71b47 MOV (%RAX,%R9,8),%RCX |
0x71b4b MOV 0x8(%RAX,%R9,8),%RDX |
0x71b50 CMP %RDX,%RCX |
0x71b53 JL 71c15 |
0x71b59 MOV -0x40(%RBP),%RAX |
0x71b5d VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x71b62 VUCOMISD %XMM16,%XMM1 |
0x71b68 JE 71b76 |
0x71b6a VXORPD %XMM17,%XMM0,%XMM0 |
0x71b70 VDIVSD %XMM1,%XMM0,%XMM18 |
0x71b76 MOV -0xb0(%RBP),%RAX |
0x71b7d MOV (%RAX,%R9,8),%RDX |
0x71b81 MOV 0x8(%RAX,%R9,8),%RAX |
0x71b86 MOV %RAX,%RSI |
0x71b89 SUB %RDX,%RSI |
0x71b8c JLE 71de5 |
0x71b92 MOV %RSI,%RCX |
0x71b95 AND $-0x4,%RCX |
0x71b99 JE 71dc0 |
0x71b9f LEA -0x1(%RCX),%RDI |
0x71ba3 LEA (%R14,%RDX,8),%R8 |
0x71ba7 VBROADCASTSD %XMM18,%YMM0 |
0x71bad XOR %R9D,%R9D |
(1613) 0x71bb0 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(1613) 0x71bb6 VMOVUPD %YMM1,(%R8,%R9,8) |
(1613) 0x71bbc ADD $0x4,%R9 |
(1613) 0x71bc0 CMP %RDI,%R9 |
(1613) 0x71bc3 JBE 71bb0 |
0x71bc5 CMP %RCX,%RSI |
0x71bc8 MOV -0x48(%RBP),%R9 |
0x71bcc JNE 71dc2 |
0x71bd2 JMP 71de5 |
(1614) 0x71c00 VADDSD (%RAX,%RCX,8),%XMM0,%XMM0 |
(1614) 0x71c05 INC %RCX |
(1614) 0x71c08 CMP %RDX,%RCX |
(1614) 0x71c0b MOV -0x48(%RBP),%R9 |
(1614) 0x71c0f JE 71b59 |
(1614) 0x71c15 MOV -0x178(%RBP),%RAX |
(1614) 0x71c1c LEA (%RAX,%RCX,8),%RSI |
(1614) 0x71c20 CMPQ $0,-0x198(%RBP) |
(1614) 0x71c28 JE 71c38 |
(1614) 0x71c2a MOV (%RSI),%RSI |
(1614) 0x71c2d MOV -0x160(%RBP),%RDI |
(1614) 0x71c34 LEA (%RDI,%RSI,8),%RSI |
(1614) 0x71c38 MOV -0xe8(%RBP),%RAX |
(1614) 0x71c3f MOV (%RSI),%RDI |
(1614) 0x71c42 TEST %RDI,%RDI |
(1614) 0x71c45 JS 71d40 |
(1614) 0x71c4b MOV -0x70(%RBP),%RSI |
(1614) 0x71c4f CMP %R9,(%RSI,%RDI,8) |
(1614) 0x71c53 JNE 71d40 |
(1614) 0x71c59 MOV -0x150(%RBP),%RSI |
(1614) 0x71c60 MOV 0x8(%RSI,%RDI,8),%RSI |
(1614) 0x71c65 TEST %RSI,%RSI |
(1614) 0x71c68 JLE 71c05 |
(1614) 0x71c6a MOV -0x140(%RBP),%R8 |
(1614) 0x71c71 MOV (%R8,%RDI,8),%RDI |
(1614) 0x71c75 ADD %RDI,%RSI |
(1614) 0x71c78 MOV -0x60(%RBP),%R8 |
(1614) 0x71c7c MOV -0x148(%RBP),%R9 |
(1614) 0x71c83 MOV (%R9,%R8,8),%R8 |
(1614) 0x71c87 JMP 71ceb |
(1615) 0x71cc0 MOV (%R15,%R9,8),%R9 |
(1615) 0x71cc4 VADDSD (%R13,%R9,8),%XMM18,%XMM2 |
(1615) 0x71ccc VMOVSD %XMM2,(%R13,%R9,8) |
(1615) 0x71cd3 VADDSD %XMM1,%XMM18,%XMM1 |
(1615) 0x71cd9 VADDSD %XMM0,%XMM18,%XMM0 |
(1615) 0x71cdf INC %RDI |
(1615) 0x71ce2 CMP %RSI,%RDI |
(1615) 0x71ce5 JGE 71c05 |
(1615) 0x71ceb MOV (%R8,%RDI,8),%R9 |
(1615) 0x71cef VMOVSD (%R10,%RDI,8),%XMM2 |
(1615) 0x71cf5 VMULSD (%RAX,%RCX,8),%XMM2,%XMM18 |
(1615) 0x71cfc TEST %R9,%R9 |
(1615) 0x71cff JNS 71cc0 |
(1615) 0x71d01 NOT %R9 |
(1615) 0x71d04 MOV -0x58(%RBP),%R12 |
(1615) 0x71d08 MOV (%R12,%R9,8),%R9 |
(1615) 0x71d0c VADDSD (%R14,%R9,8),%XMM18,%XMM2 |
(1615) 0x71d13 VMOVSD %XMM2,(%R14,%R9,8) |
(1615) 0x71d19 JMP 71cd3 |
(1614) 0x71d40 MOV -0x168(%RBP),%RSI |
(1614) 0x71d47 CMPQ $-0x3,(%RSI,%RDI,8) |
(1614) 0x71d4c JE 71c05 |
(1614) 0x71d52 CMPQ $0x1,-0xe0(%RBP) |
(1614) 0x71d5a JE 71c00 |
(1614) 0x71d60 MOV -0x138(%RBP),%RSI |
(1614) 0x71d67 MOV (%RSI,%RDI,8),%RSI |
(1614) 0x71d6b MOV -0xc8(%RBP),%RDI |
(1614) 0x71d72 CMP (%RDI,%R9,8),%RSI |
(1614) 0x71d76 JE 71c00 |
(1614) 0x71d7c JMP 71c05 |
0x71dc0 XOR %ECX,%ECX |
0x71dc2 ADD %RDX,%RCX |
0x71dc5 NOPW %CS:(%RAX,%RAX,1) |
(1612) 0x71dd0 VMULSD (%R14,%RCX,8),%XMM18,%XMM0 |
(1612) 0x71dd7 VMOVSD %XMM0,(%R14,%RCX,8) |
(1612) 0x71ddd INC %RCX |
(1612) 0x71de0 CMP %RCX,%RAX |
(1612) 0x71de3 JNE 71dd0 |
0x71de5 MOV -0xb8(%RBP),%RAX |
0x71dec MOV (%RAX,%R9,8),%RDX |
0x71df0 MOV 0x8(%RAX,%R9,8),%RAX |
0x71df5 MOV %RAX,%RSI |
0x71df8 SUB %RDX,%RSI |
0x71dfb JLE 711c0 |
0x71e01 MOV %RSI,%RCX |
0x71e04 AND $-0x4,%RCX |
0x71e08 JE 71e40 |
0x71e0a LEA -0x1(%RCX),%RDI |
0x71e0e LEA (%R13,%RDX,8),%R8 |
0x71e13 VBROADCASTSD %XMM18,%YMM0 |
0x71e19 XOR %R9D,%R9D |
0x71e1c NOPL (%RAX) |
(1611) 0x71e20 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(1611) 0x71e26 VMOVUPD %YMM1,(%R8,%R9,8) |
(1611) 0x71e2c ADD $0x4,%R9 |
(1611) 0x71e30 CMP %RDI,%R9 |
(1611) 0x71e33 JBE 71e20 |
0x71e35 CMP %RCX,%RSI |
0x71e38 JE 711c0 |
0x71e3e JMP 71e42 |
0x71e40 XOR %ECX,%ECX |
0x71e42 ADD %RDX,%RCX |
0x71e45 NOPW %CS:(%RAX,%RAX,1) |
(1610) 0x71e50 VMULSD (%R13,%RCX,8),%XMM18,%XMM0 |
(1610) 0x71e58 VMOVSD %XMM0,(%R13,%RCX,8) |
(1610) 0x71e5f INC %RCX |
(1610) 0x71e62 CMP %RCX,%RAX |
(1610) 0x71e65 JNE 71e50 |
0x71e67 JMP 711c0 |
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.71 |
CQA speedup if FP arith vectorized | 3.21 |
CQA speedup if fully vectorized | 12.89 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.43 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1805-1805,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.33 |
CQA cycles if no scalar integer | 12.67 |
CQA cycles if FP arith vectorized | 10.71 |
CQA cycles if fully vectorized | 2.66 |
Front-end cycles | 34.33 |
DIV/SQRT cycles | 11.00 |
P0 cycles | 11.00 |
P1 cycles | 24.00 |
P2 cycles | 24.00 |
P3 cycles | 8.00 |
P4 cycles | 11.00 |
P5 cycles | 11.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 11.00 |
P10 cycles | 24.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 33.98 - 34.00 |
Stall cycles (UFS) | 0.00 |
Nb insns | 199.00 |
Nb uops | 205.00 |
Nb loads | 72.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.04 |
Bytes prefetched | 0.00 |
Bytes loaded | 576.00 |
Bytes stored | 112.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 9.26 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 18.52 |
Vector-efficiency ratio all | 13.43 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 14.35 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.71 |
CQA speedup if FP arith vectorized | 3.21 |
CQA speedup if fully vectorized | 12.89 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.43 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1805-1805,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.33 |
CQA cycles if no scalar integer | 12.67 |
CQA cycles if FP arith vectorized | 10.71 |
CQA cycles if fully vectorized | 2.66 |
Front-end cycles | 34.33 |
DIV/SQRT cycles | 11.00 |
P0 cycles | 11.00 |
P1 cycles | 24.00 |
P2 cycles | 24.00 |
P3 cycles | 8.00 |
P4 cycles | 11.00 |
P5 cycles | 11.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 11.00 |
P10 cycles | 24.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 33.98 - 34.00 |
Stall cycles (UFS) | 0.00 |
Nb insns | 199.00 |
Nb uops | 205.00 |
Nb loads | 72.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.04 |
Bytes prefetched | 0.00 |
Bytes loaded | 576.00 |
Bytes stored | 112.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 9.26 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 18.52 |
Vector-efficiency ratio all | 13.43 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 14.35 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | libparcsr_ls.so |
nb instructions | 199 |
nb uops | 205 |
loop length | 945 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 34.33 cycles |
front end | 34.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.00 | 11.00 | 24.00 | 24.00 | 8.00 | 11.00 | 11.00 | 8.00 | 8.00 | 8.00 | 11.00 | 24.00 |
cycles | 11.00 | 11.00 | 24.00 | 24.00 | 8.00 | 11.00 | 11.00 | 8.00 | 8.00 | 8.00 | 11.00 | 24.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 33.98-34.00 |
Stall cycles | 0.00 |
Front-end | 34.33 |
Dispatch | 24.00 |
DIV/SQRT | 4.00 |
Overall L1 | 34.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 18% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x98(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 71e80 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1080> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RCX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RCX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xc,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 71400 <hypre_BoomerAMGBuildMultipass.extracted.28+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e4a0 <__intel_avx_rep_memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RAX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SALQ $0x6,-0xc0(%RBP) | 3 | 0.50 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x130(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x108(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xc,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 71600 <hypre_BoomerAMGBuildMultipass.extracted.28+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e4a0 <__intel_avx_rep_memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RDX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 71683 <hypre_BoomerAMGBuildMultipass.extracted.28+0x883> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 71703 <hypre_BoomerAMGBuildMultipass.extracted.28+0x903> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 71b40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 71794 <hypre_BoomerAMGBuildMultipass.extracted.28+0x994> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 71c15 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe15> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM16,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 71b76 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd76> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM17,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM18 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 71de5 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfe5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 71dc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 71dc2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfc2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71de5 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfe5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 71e40 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1040> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71e42 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1042> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | libparcsr_ls.so |
nb instructions | 199 |
nb uops | 205 |
loop length | 945 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 34.33 cycles |
front end | 34.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.00 | 11.00 | 24.00 | 24.00 | 8.00 | 11.00 | 11.00 | 8.00 | 8.00 | 8.00 | 11.00 | 24.00 |
cycles | 11.00 | 11.00 | 24.00 | 24.00 | 8.00 | 11.00 | 11.00 | 8.00 | 8.00 | 8.00 | 11.00 | 24.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 33.98-34.00 |
Stall cycles | 0.00 |
Front-end | 34.33 |
Dispatch | 24.00 |
DIV/SQRT | 4.00 |
Overall L1 | 34.33 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 18% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x98(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 71e80 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1080> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RCX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RCX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xc,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 71400 <hypre_BoomerAMGBuildMultipass.extracted.28+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e4a0 <__intel_avx_rep_memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RAX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SALQ $0x6,-0xc0(%RBP) | 3 | 0.50 | 0 | 0.33 | 0.33 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0.33 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 7142f <hypre_BoomerAMGBuildMultipass.extracted.28+0x62f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x130(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x108(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xc,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 71600 <hypre_BoomerAMGBuildMultipass.extracted.28+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e4a0 <__intel_avx_rep_memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RCX,%RDX,8),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 71630 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 71683 <hypre_BoomerAMGBuildMultipass.extracted.28+0x883> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 71703 <hypre_BoomerAMGBuildMultipass.extracted.28+0x903> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 71b40 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 71794 <hypre_BoomerAMGBuildMultipass.extracted.28+0x994> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 71c15 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe15> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM16,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 71b76 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd76> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM17,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM18 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 71de5 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfe5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 71dc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfc0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 71dc2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfc2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71de5 <hypre_BoomerAMGBuildMultipass.extracted.28+0xfe5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 71e40 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1040> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM18,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 71e42 <hypre_BoomerAMGBuildMultipass.extracted.28+0x1042> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 711c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |