Loop Id: 966 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.11% |
---|
Loop Id: 966 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.11% |
---|
0x4436d0 MOV -0xa8(%RBP),%RDX |
0x4436d7 INC %RDX |
0x4436da CMP -0xa0(%RBP),%RDX |
0x4436e1 JGE 44419b |
0x4436e7 MOV -0x118(%RBP),%RAX |
0x4436ee MOV %RDX,-0xa8(%RBP) |
0x4436f5 MOV (%RAX,%RDX,8),%R9 |
0x4436f9 MOV -0x120(%RBP),%RAX |
0x443700 MOV (%RAX,%R9,8),%R8 |
0x443704 MOV -0xb0(%RBP),%RAX |
0x44370b MOV (%RAX,%R9,8),%R12 |
0x44370f MOV 0x8(%RAX,%R9,8),%RSI |
0x443714 LEA (%RSI,%R8,1),%RAX |
0x443718 SUB %R12,%RAX |
0x44371b CMP %RAX,%R8 |
0x44371e MOV %R9,-0x30(%RBP) |
0x443722 JGE 4438ff |
0x443728 MOV -0x58(%RBP),%RAX |
0x44372c MOV (%RAX),%RCX |
0x44372f MOV %RSI,%RAX |
0x443732 SUB %R12,%RAX |
0x443735 CMP $0xd,%RAX |
0x443739 JB 4438c0 |
0x44373f MOV %RSI,-0x98(%RBP) |
0x443746 VMOVUPD %XMM10,-0x80(%RBP) |
0x44374b LEA (%R14,%R12,8),%RDI |
0x44374f LEA (,%RAX,8),%RDX |
0x443757 XOR %ESI,%ESI |
0x443759 MOV %R8,-0x38(%RBP) |
0x44375d MOV %RAX,-0x88(%RBP) |
0x443764 MOV %RCX,-0x90(%RBP) |
0x44376b VZEROUPPER |
0x44376e CALL 4efe80 <_intel_fast_memset> |
0x443773 MOV -0xc0(%RBP),%RAX |
0x44377a MOV -0x90(%RBP),%RCX |
0x443781 MOV (%RAX,%RCX,8),%RAX |
0x443785 MOV -0x88(%RBP),%RCX |
0x44378c SHR $0x3,%RCX |
0x443790 MOV -0x38(%RBP),%RDX |
0x443794 LEA 0x38(%RAX,%RDX,8),%RDX |
0x443799 MOV -0x100(%RBP),%RSI |
0x4437a0 LEA (%RSI,%R12,8),%RSI |
0x4437a4 MOV %RCX,%RDI |
0x4437a7 XOR %R8D,%R8D |
0x4437aa MOV -0x48(%RBP),%R11 |
0x4437ae XCHG %AX,%AX |
(984) 0x4437b0 MOV -0x38(%RDX,%R8,8),%R9 |
(984) 0x4437b5 LEA (%R12,%R8,1),%R10 |
(984) 0x4437b9 MOV %R10,(%R11,%R9,8) |
(984) 0x4437bd MOV %R9,-0x38(%RSI,%R8,8) |
(984) 0x4437c2 MOV -0x30(%RDX,%R8,8),%R9 |
(984) 0x4437c7 LEA 0x1(%R12,%R8,1),%R10 |
(984) 0x4437cc MOV %R10,(%R11,%R9,8) |
(984) 0x4437d0 MOV %R9,-0x30(%RSI,%R8,8) |
(984) 0x4437d5 MOV -0x28(%RDX,%R8,8),%R9 |
(984) 0x4437da LEA 0x2(%R12,%R8,1),%R10 |
(984) 0x4437df MOV %R10,(%R11,%R9,8) |
(984) 0x4437e3 MOV %R9,-0x28(%RSI,%R8,8) |
(984) 0x4437e8 MOV -0x20(%RDX,%R8,8),%R9 |
(984) 0x4437ed LEA 0x3(%R12,%R8,1),%R10 |
(984) 0x4437f2 MOV %R10,(%R11,%R9,8) |
(984) 0x4437f6 MOV %R9,-0x20(%RSI,%R8,8) |
(984) 0x4437fb MOV -0x18(%RDX,%R8,8),%R9 |
(984) 0x443800 LEA 0x4(%R12,%R8,1),%R10 |
(984) 0x443805 MOV %R10,(%R11,%R9,8) |
(984) 0x443809 MOV %R9,-0x18(%RSI,%R8,8) |
(984) 0x44380e MOV -0x10(%RDX,%R8,8),%R9 |
(984) 0x443813 LEA 0x5(%R12,%R8,1),%R10 |
(984) 0x443818 MOV %R10,(%R11,%R9,8) |
(984) 0x44381c MOV %R9,-0x10(%RSI,%R8,8) |
(984) 0x443821 MOV -0x8(%RDX,%R8,8),%R9 |
(984) 0x443826 LEA 0x6(%R12,%R8,1),%R10 |
(984) 0x44382b MOV %R10,(%R11,%R9,8) |
(984) 0x44382f MOV %R9,-0x8(%RSI,%R8,8) |
(984) 0x443834 MOV (%RDX,%R8,8),%R9 |
(984) 0x443838 LEA 0x7(%R12,%R8,1),%R10 |
(984) 0x44383d MOV %R10,(%R11,%R9,8) |
(984) 0x443841 MOV %R9,(%RSI,%R8,8) |
(984) 0x443845 ADD $0x8,%R8 |
(984) 0x443849 DEC %RDI |
(984) 0x44384c JNE 4437b0 |
0x443852 MOV -0x88(%RBP),%RSI |
0x443859 MOV %RSI,%RDX |
0x44385c AND $-0x8,%RDX |
0x443860 CMP %RSI,%RDX |
0x443863 MOV -0x40(%RBP),%R10 |
0x443867 MOV -0x50(%RBP),%RSI |
0x44386b VXORPD %XMM9,%XMM9,%XMM9 |
0x443870 MOV -0x30(%RBP),%R9 |
0x443874 VMOVUPD -0x80(%RBP),%XMM10 |
0x443879 MOV -0x98(%RBP),%RDI |
0x443880 MOV -0x38(%RBP),%R8 |
0x443884 JAE 4438ff |
0x443886 ADD %RDX,%R12 |
0x443889 SAL $0x6,%RCX |
0x44388d LEA (%RCX,%R8,8),%RCX |
0x443891 ADD %RCX,%RAX |
0x443894 NOPW %CS:(%RAX,%RAX,1) |
(985) 0x4438a0 MOV (%RAX),%RCX |
(985) 0x4438a3 MOV %R12,(%R11,%RCX,8) |
(985) 0x4438a7 MOV %RCX,(%RSI,%R12,8) |
(985) 0x4438ab INC %R12 |
(985) 0x4438ae ADD $0x8,%RAX |
(985) 0x4438b2 CMP %R12,%RDI |
(985) 0x4438b5 JNE 4438a0 |
0x4438b7 JMP 4438ff |
0x4438c0 SAL $0x3,%R8 |
0x4438c4 MOV -0xc0(%RBP),%RAX |
0x4438cb ADD (%RAX,%RCX,8),%R8 |
0x4438cf MOV -0x50(%RBP),%RCX |
0x4438d3 MOV -0x48(%RBP),%RDX |
0x4438d7 NOPW (%RAX,%RAX,1) |
(983) 0x4438e0 MOV (%R8),%RAX |
(983) 0x4438e3 MOV %R12,(%RDX,%RAX,8) |
(983) 0x4438e7 MOVQ $0,(%R14,%R12,8) |
(983) 0x4438ef MOV %RAX,(%RCX,%R12,8) |
(983) 0x4438f3 INC %R12 |
(983) 0x4438f6 ADD $0x8,%R8 |
(983) 0x4438fa CMP %R12,%RSI |
(983) 0x4438fd JNE 4438e0 |
0x4438ff MOV -0x128(%RBP),%RAX |
0x443906 MOV (%RAX,%R9,8),%R8 |
0x44390a MOV -0xb8(%RBP),%RAX |
0x443911 MOV (%RAX,%R9,8),%R12 |
0x443915 MOV 0x8(%RAX,%R9,8),%RCX |
0x44391a LEA (%RCX,%R8,1),%RAX |
0x44391e SUB %R12,%RAX |
0x443921 CMP %RAX,%R8 |
0x443924 JGE 443ae0 |
0x44392a MOV -0x58(%RBP),%RAX |
0x44392e MOV (%RAX),%RDX |
0x443931 MOV %RCX,%RAX |
0x443934 SUB %R12,%RAX |
0x443937 CMP $0xd,%RAX |
0x44393b JB 443ab0 |
0x443941 MOV %RCX,-0x88(%RBP) |
0x443948 VMOVUPD %XMM10,-0x80(%RBP) |
0x44394d LEA (%R13,%R12,8),%RDI |
0x443952 MOV %RDX,-0x90(%RBP) |
0x443959 LEA (,%RAX,8),%RDX |
0x443961 XOR %ESI,%ESI |
0x443963 MOV %R8,-0x98(%RBP) |
0x44396a MOV %RAX,-0x38(%RBP) |
0x44396e VZEROUPPER |
0x443971 CALL 4efe80 <_intel_fast_memset> |
0x443976 MOV -0x98(%RBP),%R11 |
0x44397d MOV -0xc8(%RBP),%RAX |
0x443984 MOV -0x90(%RBP),%RCX |
0x44398b MOV (%RAX,%RCX,8),%RAX |
0x44398f MOV -0x38(%RBP),%RCX |
0x443993 SHR $0x3,%RCX |
0x443997 LEA 0x38(%RAX,%R11,8),%RDX |
0x44399c MOV -0xf8(%RBP),%RSI |
0x4439a3 LEA (%RSI,%R12,8),%RSI |
0x4439a7 MOV %RCX,%RDI |
0x4439aa XOR %R8D,%R8D |
0x4439ad NOPL (%RAX) |
(981) 0x4439b0 MOV -0x38(%RDX,%R8,8),%R9 |
(981) 0x4439b5 LEA (%R12,%R8,1),%R10 |
(981) 0x4439b9 MOV %R10,(%R15,%R9,8) |
(981) 0x4439bd MOV %R9,-0x38(%RSI,%R8,8) |
(981) 0x4439c2 MOV -0x30(%RDX,%R8,8),%R9 |
(981) 0x4439c7 LEA 0x1(%R12,%R8,1),%R10 |
(981) 0x4439cc MOV %R10,(%R15,%R9,8) |
(981) 0x4439d0 MOV %R9,-0x30(%RSI,%R8,8) |
(981) 0x4439d5 MOV -0x28(%RDX,%R8,8),%R9 |
(981) 0x4439da LEA 0x2(%R12,%R8,1),%R10 |
(981) 0x4439df MOV %R10,(%R15,%R9,8) |
(981) 0x4439e3 MOV %R9,-0x28(%RSI,%R8,8) |
(981) 0x4439e8 MOV -0x20(%RDX,%R8,8),%R9 |
(981) 0x4439ed LEA 0x3(%R12,%R8,1),%R10 |
(981) 0x4439f2 MOV %R10,(%R15,%R9,8) |
(981) 0x4439f6 MOV %R9,-0x20(%RSI,%R8,8) |
(981) 0x4439fb MOV -0x18(%RDX,%R8,8),%R9 |
(981) 0x443a00 LEA 0x4(%R12,%R8,1),%R10 |
(981) 0x443a05 MOV %R10,(%R15,%R9,8) |
(981) 0x443a09 MOV %R9,-0x18(%RSI,%R8,8) |
(981) 0x443a0e MOV -0x10(%RDX,%R8,8),%R9 |
(981) 0x443a13 LEA 0x5(%R12,%R8,1),%R10 |
(981) 0x443a18 MOV %R10,(%R15,%R9,8) |
(981) 0x443a1c MOV %R9,-0x10(%RSI,%R8,8) |
(981) 0x443a21 MOV -0x8(%RDX,%R8,8),%R9 |
(981) 0x443a26 LEA 0x6(%R12,%R8,1),%R10 |
(981) 0x443a2b MOV %R10,(%R15,%R9,8) |
(981) 0x443a2f MOV %R9,-0x8(%RSI,%R8,8) |
(981) 0x443a34 MOV (%RDX,%R8,8),%R9 |
(981) 0x443a38 LEA 0x7(%R12,%R8,1),%R10 |
(981) 0x443a3d MOV %R10,(%R15,%R9,8) |
(981) 0x443a41 MOV %R9,(%RSI,%R8,8) |
(981) 0x443a45 ADD $0x8,%R8 |
(981) 0x443a49 DEC %RDI |
(981) 0x443a4c JNE 4439b0 |
0x443a52 MOV -0x38(%RBP),%RSI |
0x443a56 MOV %RSI,%RDX |
0x443a59 AND $-0x8,%RDX |
0x443a5d CMP %RSI,%RDX |
0x443a60 MOV -0x40(%RBP),%R10 |
0x443a64 VXORPD %XMM9,%XMM9,%XMM9 |
0x443a69 MOV -0x30(%RBP),%R9 |
0x443a6d VMOVUPD -0x80(%RBP),%XMM10 |
0x443a72 MOV -0x88(%RBP),%RSI |
0x443a79 JAE 443ae0 |
0x443a7b ADD %RDX,%R12 |
0x443a7e SAL $0x6,%RCX |
0x443a82 LEA (%RCX,%R11,8),%RCX |
0x443a86 ADD %RCX,%RAX |
0x443a89 NOPL (%RAX) |
(982) 0x443a90 MOV (%RAX),%RCX |
(982) 0x443a93 MOV %R12,(%R15,%RCX,8) |
(982) 0x443a97 MOV %RCX,(%R10,%R12,8) |
(982) 0x443a9b INC %R12 |
(982) 0x443a9e ADD $0x8,%RAX |
(982) 0x443aa2 CMP %R12,%RSI |
(982) 0x443aa5 JNE 443a90 |
0x443aa7 JMP 443ae0 |
0x443ab0 SAL $0x3,%R8 |
0x443ab4 MOV -0xc8(%RBP),%RAX |
0x443abb ADD (%RAX,%RDX,8),%R8 |
0x443abf NOP |
(980) 0x443ac0 MOV (%R8),%RAX |
(980) 0x443ac3 MOV %R12,(%R15,%RAX,8) |
(980) 0x443ac7 MOVQ $0,(%R13,%R12,8) |
(980) 0x443ad0 MOV %RAX,(%R10,%R12,8) |
(980) 0x443ad4 INC %R12 |
(980) 0x443ad7 ADD $0x8,%R8 |
(980) 0x443adb CMP %R12,%RCX |
(980) 0x443ade JNE 443ac0 |
0x443ae0 MOV -0xe0(%RBP),%RCX |
0x443ae7 MOV (%RCX,%R9,8),%RAX |
0x443aeb MOV 0x8(%RCX,%R9,8),%RCX |
0x443af0 CMP %RCX,%RAX |
0x443af3 MOV -0x48(%RBP),%R12 |
0x443af7 JGE 443b50 |
0x443af9 MOV -0x58(%RBP),%RDX |
0x443afd MOV (%RDX),%RDX |
0x443b00 DEC %RDX |
0x443b03 JMP 443b18 |
(979) 0x443b10 INC %RAX |
(979) 0x443b13 CMP %RCX,%RAX |
(979) 0x443b16 JGE 443b50 |
(979) 0x443b18 MOV -0x180(%RBP),%RSI |
(979) 0x443b1f MOV (%RSI,%RAX,8),%RSI |
(979) 0x443b23 MOV -0x190(%RBP),%RDI |
(979) 0x443b2a CMP %RDX,(%RDI,%RSI,8) |
(979) 0x443b2e JNE 443b10 |
(979) 0x443b30 MOV -0x68(%RBP),%RCX |
(979) 0x443b34 MOV %R9,(%RCX,%RSI,8) |
(979) 0x443b38 MOV -0xe0(%RBP),%RCX |
(979) 0x443b3f MOV 0x8(%RCX,%R9,8),%RCX |
(979) 0x443b44 JMP 443b10 |
0x443b50 MOV -0xe8(%RBP),%RCX |
0x443b57 MOV (%RCX,%R9,8),%RAX |
0x443b5b MOV 0x8(%RCX,%R9,8),%RCX |
0x443b60 CMP %RCX,%RAX |
0x443b63 JGE 443bc0 |
0x443b65 MOV -0x58(%RBP),%RDX |
0x443b69 MOV (%RDX),%RDX |
0x443b6c DEC %RDX |
0x443b6f JMP 443b88 |
(978) 0x443b80 INC %RAX |
(978) 0x443b83 CMP %RCX,%RAX |
(978) 0x443b86 JGE 443bc0 |
(978) 0x443b88 MOV -0x188(%RBP),%RSI |
(978) 0x443b8f MOV (%RSI,%RAX,8),%RSI |
(978) 0x443b93 MOV -0x198(%RBP),%RDI |
(978) 0x443b9a CMP %RDX,(%RDI,%RSI,8) |
(978) 0x443b9e JNE 443b80 |
(978) 0x443ba0 MOV -0x60(%RBP),%RCX |
(978) 0x443ba4 MOV %R9,(%RCX,%RSI,8) |
(978) 0x443ba8 MOV -0xe8(%RBP),%RCX |
(978) 0x443baf MOV 0x8(%RCX,%R9,8),%RCX |
(978) 0x443bb4 JMP 443b80 |
0x443bc0 MOV -0x108(%RBP),%RAX |
0x443bc7 MOV (%RAX,%R9,8),%RCX |
0x443bcb MOV 0x8(%RAX,%R9,8),%R11 |
0x443bd0 LEA 0x1(%RCX),%RDX |
0x443bd4 VXORPD %XMM1,%XMM1,%XMM1 |
0x443bd8 CMP %R11,%RDX |
0x443bdb MOV %RCX,-0x38(%RBP) |
0x443bdf VXORPD %XMM0,%XMM0,%XMM0 |
0x443be3 JGE 443f20 |
0x443be9 MOV -0x50(%RBP),%RAX |
0x443bed MOV %R11,-0x80(%RBP) |
0x443bf1 JMP 443c14 |
(973) 0x443c00 MOV -0x50(%RBP),%RAX |
(973) 0x443c04 MOV -0x30(%RBP),%R9 |
(973) 0x443c08 INC %RDX |
(973) 0x443c0b CMP %R11,%RDX |
(973) 0x443c0e JE 443f20 |
(973) 0x443c14 MOV -0x170(%RBP),%RSI |
(973) 0x443c1b MOV (%RSI,%RDX,8),%RSI |
(973) 0x443c1f MOV -0x68(%RBP),%RDI |
(973) 0x443c23 CMP %R9,(%RDI,%RSI,8) |
(973) 0x443c27 JNE 443c50 |
(973) 0x443c29 MOV -0xb0(%RBP),%R8 |
(973) 0x443c30 MOV (%R8,%RSI,8),%RDI |
(973) 0x443c34 MOV 0x8(%R8,%RSI,8),%R8 |
(973) 0x443c39 MOV %R8,%R9 |
(973) 0x443c3c SUB %RDI,%R9 |
(973) 0x443c3f JLE 443da9 |
(973) 0x443c45 CMP $0x4,%R9 |
(973) 0x443c49 JAE 443c80 |
(973) 0x443c4b JMP 443d55 |
(973) 0x443c50 MOV -0x158(%RBP),%RDI |
(973) 0x443c57 CMPQ $-0x3,(%RDI,%RSI,8) |
(973) 0x443c5c JE 443c08 |
(973) 0x443c5e CMPQ $0x1,-0xf0(%RBP) |
(973) 0x443c66 JE 443c79 |
(973) 0x443c68 MOV -0xd0(%RBP),%R8 |
(973) 0x443c6f MOV (%R8,%R9,8),%RDI |
(973) 0x443c73 CMP (%R8,%RSI,8),%RDI |
(973) 0x443c77 JNE 443c08 |
(973) 0x443c79 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(973) 0x443c7e JMP 443c08 |
(973) 0x443c80 MOV %R9,%R10 |
(973) 0x443c83 SHR $0x2,%R10 |
(973) 0x443c87 LEA 0x18(,%RDI,8),%R11 |
(973) 0x443c8f MOV %R12,%RCX |
(973) 0x443c92 NOPW %CS:(%RAX,%RAX,1) |
(976) 0x443ca0 MOV -0x18(%RAX,%R11,1),%R12 |
(976) 0x443ca5 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(976) 0x443cac VMOVSD (%RBX,%RDX,8),%XMM3 |
(976) 0x443cb1 MOV (%RCX,%R12,8),%R12 |
(976) 0x443cb5 VMOVSD (%R14,%R12,8),%XMM4 |
(976) 0x443cbb VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x443cc0 VMOVSD %XMM4,(%R14,%R12,8) |
(976) 0x443cc6 MOV -0x10(%RAX,%R11,1),%R12 |
(976) 0x443ccb VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(976) 0x443cd2 VMOVSD (%RBX,%RDX,8),%XMM5 |
(976) 0x443cd7 MOV (%RCX,%R12,8),%R12 |
(976) 0x443cdb VMOVSD (%R14,%R12,8),%XMM6 |
(976) 0x443ce1 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(976) 0x443ce6 VMOVSD %XMM6,(%R14,%R12,8) |
(976) 0x443cec MOV -0x8(%RAX,%R11,1),%R12 |
(976) 0x443cf1 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(976) 0x443cf8 VMOVSD (%RBX,%RDX,8),%XMM7 |
(976) 0x443cfd MOV (%RCX,%R12,8),%R12 |
(976) 0x443d01 VMOVSD (%R14,%R12,8),%XMM8 |
(976) 0x443d07 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(976) 0x443d0c VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x443d12 VMOVSD (%R14,%R11,1),%XMM8 |
(976) 0x443d18 VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(976) 0x443d1d MOV (%RAX,%R11,1),%R12 |
(976) 0x443d21 MOV (%RCX,%R12,8),%R12 |
(976) 0x443d25 VADDSD (%R14,%R12,8),%XMM10,%XMM8 |
(976) 0x443d2b VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x443d31 VFMADD213SD %XMM10,%XMM5,%XMM4 |
(976) 0x443d36 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x443d3b VFMADD231SD %XMM6,%XMM7,%XMM4 |
(976) 0x443d40 VADDSD %XMM1,%XMM4,%XMM1 |
(976) 0x443d44 VADDSD %XMM0,%XMM4,%XMM0 |
(976) 0x443d48 ADD $0x20,%R11 |
(976) 0x443d4c DEC %R10 |
(976) 0x443d4f JNE 443ca0 |
(973) 0x443d55 MOV %R9,%R10 |
(973) 0x443d58 AND $-0x4,%R10 |
(973) 0x443d5c CMP %R9,%R10 |
(973) 0x443d5f MOV -0x80(%RBP),%R11 |
(973) 0x443d63 JAE 443da1 |
(973) 0x443d65 ADD %R10,%RDI |
(973) 0x443d68 MOV -0x40(%RBP),%R10 |
(973) 0x443d6c MOV -0x48(%RBP),%R12 |
(977) 0x443d70 MOV (%RAX,%RDI,8),%R9 |
(977) 0x443d74 VMOVSD (%R14,%RDI,8),%XMM2 |
(977) 0x443d7a VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(977) 0x443d7f MOV (%R12,%R9,8),%R9 |
(977) 0x443d83 VADDSD (%R14,%R9,8),%XMM10,%XMM2 |
(977) 0x443d89 VMOVSD %XMM2,(%R14,%R9,8) |
(977) 0x443d8f VADDSD %XMM1,%XMM10,%XMM1 |
(977) 0x443d93 VADDSD %XMM0,%XMM10,%XMM0 |
(977) 0x443d97 INC %RDI |
(977) 0x443d9a CMP %RDI,%R8 |
(977) 0x443d9d JNE 443d70 |
(973) 0x443d9f JMP 443da9 |
(973) 0x443da1 MOV -0x40(%RBP),%R10 |
(973) 0x443da5 MOV -0x48(%RBP),%R12 |
(973) 0x443da9 MOV -0xb8(%RBP),%RAX |
(973) 0x443db0 MOV (%RAX,%RSI,8),%RDI |
(973) 0x443db4 MOV 0x8(%RAX,%RSI,8),%RSI |
(973) 0x443db9 MOV %RSI,%R8 |
(973) 0x443dbc SUB %RDI,%R8 |
(973) 0x443dbf JLE 443c00 |
(973) 0x443dc5 CMP $0x4,%R8 |
(973) 0x443dc9 JAE 443dd0 |
(973) 0x443dcb JMP 443eae |
(973) 0x443dd0 MOV %R8,%R9 |
(973) 0x443dd3 SHR $0x2,%R9 |
(973) 0x443dd7 MOV %R10,%RAX |
(973) 0x443dda LEA 0x18(,%RDI,8),%R10 |
(973) 0x443de2 NOPW %CS:(%RAX,%RAX,1) |
(974) 0x443df0 MOV -0x18(%RAX,%R10,1),%R11 |
(974) 0x443df5 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(974) 0x443dfc VMOVSD (%RBX,%RDX,8),%XMM3 |
(974) 0x443e01 MOV (%R15,%R11,8),%R11 |
(974) 0x443e05 VMOVSD (%R13,%R11,8),%XMM4 |
(974) 0x443e0c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x443e11 VMOVSD %XMM4,(%R13,%R11,8) |
(974) 0x443e18 MOV -0x10(%RAX,%R10,1),%R11 |
(974) 0x443e1d VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(974) 0x443e24 VMOVSD (%RBX,%RDX,8),%XMM5 |
(974) 0x443e29 MOV (%R15,%R11,8),%R11 |
(974) 0x443e2d VMOVSD (%R13,%R11,8),%XMM6 |
(974) 0x443e34 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(974) 0x443e39 VMOVSD %XMM6,(%R13,%R11,8) |
(974) 0x443e40 MOV -0x8(%RAX,%R10,1),%R11 |
(974) 0x443e45 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(974) 0x443e4c VMOVSD (%RBX,%RDX,8),%XMM7 |
(974) 0x443e51 MOV (%R15,%R11,8),%R11 |
(974) 0x443e55 VMOVSD (%R13,%R11,8),%XMM8 |
(974) 0x443e5c VFMADD231SD %XMM6,%XMM7,%XMM8 |
(974) 0x443e61 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x443e68 VMOVSD (%R13,%R10,1),%XMM8 |
(974) 0x443e6f VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(974) 0x443e74 MOV (%RAX,%R10,1),%R11 |
(974) 0x443e78 MOV (%R15,%R11,8),%R11 |
(974) 0x443e7c VADDSD (%R13,%R11,8),%XMM10,%XMM8 |
(974) 0x443e83 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x443e8a VFMADD213SD %XMM10,%XMM5,%XMM4 |
(974) 0x443e8f VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x443e94 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(974) 0x443e99 VADDSD %XMM1,%XMM4,%XMM1 |
(974) 0x443e9d VADDSD %XMM0,%XMM4,%XMM0 |
(974) 0x443ea1 ADD $0x20,%R10 |
(974) 0x443ea5 DEC %R9 |
(974) 0x443ea8 JNE 443df0 |
(973) 0x443eae MOV %R8,%R9 |
(973) 0x443eb1 AND $-0x4,%R9 |
(973) 0x443eb5 CMP %R8,%R9 |
(973) 0x443eb8 JAE 443f07 |
(973) 0x443eba ADD %R9,%RDI |
(973) 0x443ebd MOV -0x40(%RBP),%R10 |
(973) 0x443ec1 MOV -0x50(%RBP),%RAX |
(973) 0x443ec5 MOV -0x30(%RBP),%R9 |
(973) 0x443ec9 MOV -0x80(%RBP),%R11 |
(973) 0x443ecd NOPL (%RAX) |
(975) 0x443ed0 MOV (%R10,%RDI,8),%R8 |
(975) 0x443ed4 VMOVSD (%R13,%RDI,8),%XMM2 |
(975) 0x443edb VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(975) 0x443ee0 MOV (%R15,%R8,8),%R8 |
(975) 0x443ee4 VADDSD (%R13,%R8,8),%XMM10,%XMM2 |
(975) 0x443eeb VMOVSD %XMM2,(%R13,%R8,8) |
(975) 0x443ef2 VADDSD %XMM1,%XMM10,%XMM1 |
(975) 0x443ef6 VADDSD %XMM0,%XMM10,%XMM0 |
(975) 0x443efa INC %RDI |
(975) 0x443efd CMP %RDI,%RSI |
(975) 0x443f00 JNE 443ed0 |
(973) 0x443f02 JMP 443c08 |
(973) 0x443f07 MOV -0x40(%RBP),%R10 |
(973) 0x443f0b MOV -0x50(%RBP),%RAX |
(973) 0x443f0f MOV -0x30(%RBP),%R9 |
(973) 0x443f13 MOV -0x80(%RBP),%R11 |
(973) 0x443f17 JMP 443c08 |
0x443f20 MOV -0x110(%RBP),%RAX |
0x443f27 MOV (%RAX,%R9,8),%RCX |
0x443f2b MOV 0x8(%RAX,%R9,8),%RDX |
0x443f30 CMP %RDX,%RCX |
0x443f33 JL 443fe0 |
0x443f39 MOV -0x38(%RBP),%RAX |
0x443f3d VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x443f42 VUCOMISD %XMM9,%XMM1 |
0x443f47 JE 443f57 |
0x443f49 VXORPD 0xbb26d(%RIP){1to2},%XMM0,%XMM0 |
0x443f53 VDIVSD %XMM1,%XMM0,%XMM10 |
0x443f57 MOV -0xb0(%RBP),%RAX |
0x443f5e MOV (%RAX,%R9,8),%RDX |
0x443f62 MOV 0x8(%RAX,%R9,8),%RAX |
0x443f67 MOV %RAX,%RSI |
0x443f6a SUB %RDX,%RSI |
0x443f6d JLE 444114 |
0x443f73 MOV %RSI,%RCX |
0x443f76 AND $-0x4,%RCX |
0x443f7a JE 4440f8 |
0x443f80 LEA -0x1(%RCX),%RDI |
0x443f84 VBROADCASTSD %XMM10,%YMM0 |
0x443f89 LEA (%R14,%RDX,8),%R8 |
0x443f8d XOR %R9D,%R9D |
(970) 0x443f90 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(970) 0x443f96 VMOVUPD %YMM1,(%R8,%R9,8) |
(970) 0x443f9c ADD $0x4,%R9 |
(970) 0x443fa0 CMP %RDI,%R9 |
(970) 0x443fa3 JBE 443f90 |
0x443fa5 CMP %RCX,%RSI |
0x443fa8 MOV -0x30(%RBP),%R9 |
0x443fac JNE 4440fa |
0x443fb2 JMP 444114 |
(971) 0x443fc0 VADDSD (%R8,%RCX,8),%XMM0,%XMM0 |
(971) 0x443fc6 MOV %R12,%R11 |
(971) 0x443fc9 INC %RCX |
(971) 0x443fcc CMP %RDX,%RCX |
(971) 0x443fcf MOV -0x40(%RBP),%R10 |
(971) 0x443fd3 MOV %R11,%R12 |
(971) 0x443fd6 MOV -0x30(%RBP),%R9 |
(971) 0x443fda JE 443f39 |
(971) 0x443fe0 MOV -0x178(%RBP),%RAX |
(971) 0x443fe7 LEA (%RAX,%RCX,8),%RSI |
(971) 0x443feb CMPQ $0,-0x1a0(%RBP) |
(971) 0x443ff3 JE 444003 |
(971) 0x443ff5 MOV (%RSI),%RSI |
(971) 0x443ff8 MOV -0x160(%RBP),%RDI |
(971) 0x443fff LEA (%RDI,%RSI,8),%RSI |
(971) 0x444003 MOV (%RSI),%RDI |
(971) 0x444006 TEST %RDI,%RDI |
(971) 0x444009 JS 4440b0 |
(971) 0x44400f MOV -0x60(%RBP),%RSI |
(971) 0x444013 CMP %R9,(%RSI,%RDI,8) |
(971) 0x444017 JNE 4440b0 |
(971) 0x44401d MOV -0x150(%RBP),%RSI |
(971) 0x444024 MOV 0x8(%RSI,%RDI,8),%RSI |
(971) 0x444029 TEST %RSI,%RSI |
(971) 0x44402c JLE 443fc6 |
(971) 0x44402e MOV -0x138(%RBP),%R8 |
(971) 0x444035 MOV (%R8,%RDI,8),%RDI |
(971) 0x444039 ADD %RDI,%RSI |
(971) 0x44403c MOV -0x58(%RBP),%R8 |
(971) 0x444040 MOV (%R8),%R8 |
(971) 0x444043 MOV -0x140(%RBP),%R9 |
(971) 0x44404a MOV (%R9,%R8,8),%R8 |
(971) 0x44404e MOV %R12,%R11 |
(971) 0x444051 MOV -0xd8(%RBP),%R12 |
(971) 0x444058 MOV -0x148(%RBP),%RAX |
(971) 0x44405f NOP |
(972) 0x444060 MOV (%R8,%RDI,8),%R9 |
(972) 0x444064 VMOVSD (%RAX,%RDI,8),%XMM2 |
(972) 0x444069 VMULSD (%R12,%RCX,8),%XMM2,%XMM10 |
(972) 0x44406f TEST %R9,%R9 |
(972) 0x444072 LEA (%R15,%R9,8),%R10 |
(972) 0x444076 NOT %R9 |
(972) 0x444079 LEA (%R11,%R9,8),%R9 |
(972) 0x44407d CMOVNS %R10,%R9 |
(972) 0x444081 MOV %R13,%R10 |
(972) 0x444084 CMOVS %R14,%R10 |
(972) 0x444088 MOV (%R9),%R9 |
(972) 0x44408b VADDSD (%R10,%R9,8),%XMM10,%XMM2 |
(972) 0x444091 VMOVSD %XMM2,(%R10,%R9,8) |
(972) 0x444097 VADDSD %XMM1,%XMM10,%XMM1 |
(972) 0x44409b VADDSD %XMM0,%XMM10,%XMM0 |
(972) 0x44409f INC %RDI |
(972) 0x4440a2 CMP %RSI,%RDI |
(972) 0x4440a5 JL 444060 |
(971) 0x4440a7 JMP 443fc9 |
(971) 0x4440b0 MOV -0x168(%RBP),%RSI |
(971) 0x4440b7 CMPQ $-0x3,(%RSI,%RDI,8) |
(971) 0x4440bc JE 443fc6 |
(971) 0x4440c2 CMPQ $0x1,-0xf0(%RBP) |
(971) 0x4440ca MOV -0xd8(%RBP),%R8 |
(971) 0x4440d1 JE 443fc0 |
(971) 0x4440d7 MOV -0x130(%RBP),%RSI |
(971) 0x4440de MOV (%RSI,%RDI,8),%RSI |
(971) 0x4440e2 MOV -0xd0(%RBP),%RDI |
(971) 0x4440e9 CMP (%RDI,%R9,8),%RSI |
(971) 0x4440ed JE 443fc0 |
(971) 0x4440f3 JMP 443fc6 |
0x4440f8 XOR %ECX,%ECX |
0x4440fa ADD %RDX,%RCX |
0x4440fd NOPL (%RAX) |
(969) 0x444100 VMULSD (%R14,%RCX,8),%XMM10,%XMM0 |
(969) 0x444106 VMOVSD %XMM0,(%R14,%RCX,8) |
(969) 0x44410c INC %RCX |
(969) 0x44410f CMP %RCX,%RAX |
(969) 0x444112 JNE 444100 |
0x444114 MOV -0xb8(%RBP),%RAX |
0x44411b MOV (%RAX,%R9,8),%RDX |
0x44411f MOV 0x8(%RAX,%R9,8),%RAX |
0x444124 MOV %RAX,%RSI |
0x444127 SUB %RDX,%RSI |
0x44412a JLE 4436d0 |
0x444130 MOV %RSI,%RCX |
0x444133 AND $-0x4,%RCX |
0x444137 JE 444170 |
0x444139 LEA -0x1(%RCX),%RDI |
0x44413d VBROADCASTSD %XMM10,%YMM0 |
0x444142 LEA (%R13,%RDX,8),%R8 |
0x444147 XOR %R9D,%R9D |
0x44414a NOPW (%RAX,%RAX,1) |
(968) 0x444150 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(968) 0x444156 VMOVUPD %YMM1,(%R8,%R9,8) |
(968) 0x44415c ADD $0x4,%R9 |
(968) 0x444160 CMP %RDI,%R9 |
(968) 0x444163 JBE 444150 |
0x444165 CMP %RCX,%RSI |
0x444168 JE 4436d0 |
0x44416e JMP 444172 |
0x444170 XOR %ECX,%ECX |
0x444172 ADD %RDX,%RCX |
0x444175 NOPW %CS:(%RAX,%RAX,1) |
(967) 0x444180 VMULSD (%R13,%RCX,8),%XMM10,%XMM0 |
(967) 0x444187 VMOVSD %XMM0,(%R13,%RCX,8) |
(967) 0x44418e INC %RCX |
(967) 0x444191 CMP %RCX,%RAX |
(967) 0x444194 JNE 444180 |
0x444196 JMP 4436d0 |
/scratch_na/users/xoserete/qaas_runs/171-172-8218/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.50 |
CQA speedup if fully vectorized | 7.74 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.17 |
CQA cycles if no scalar integer | 14.17 |
CQA cycles if FP arith vectorized | 10.32 |
CQA cycles if fully vectorized | 4.67 |
Front-end cycles | 36.17 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 25.67 |
P2 cycles | 25.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 25.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.85 - 35.84 |
Stall cycles (UFS) | 0.00 |
Nb insns | 210.00 |
Nb uops | 214.00 |
Nb loads | 77.00 |
Nb stores | 14.00 |
Nb stack references | 25.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.01 |
Bytes prefetched | 0.00 |
Bytes loaded | 632.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.91 |
Vectorization ratio load | 37.50 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.49 |
Vector-efficiency ratio load | 17.19 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.50 |
CQA speedup if fully vectorized | 7.74 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.17 |
CQA cycles if no scalar integer | 14.17 |
CQA cycles if FP arith vectorized | 10.32 |
CQA cycles if fully vectorized | 4.67 |
Front-end cycles | 36.17 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 25.67 |
P2 cycles | 25.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 25.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.85 - 35.84 |
Stall cycles (UFS) | 0.00 |
Nb insns | 210.00 |
Nb uops | 214.00 |
Nb loads | 77.00 |
Nb stores | 14.00 |
Nb stack references | 25.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.01 |
Bytes prefetched | 0.00 |
Bytes loaded | 632.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.91 |
Vectorization ratio load | 37.50 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.49 |
Vector-efficiency ratio load | 17.19 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 210 |
nb uops | 214 |
loop length | 944 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 25 |
micro-operation queue | 36.17 cycles |
front end | 36.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
cycles | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 35.85-35.84 |
Stall cycles | 0.00 |
Front-end | 36.17 |
Dispatch | 25.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.17 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 23% |
load | 37% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 17% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa0(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 44419b <hypre_BoomerAMGBuildMultipass.extracted.28+0xe0b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4438c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x530> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efe80 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x100(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443ab0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x720> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efe80 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 443b50 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 443b18 <hypre_BoomerAMGBuildMultipass.extracted.28+0x788> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443bc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 443b88 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 443f20 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 443c14 <hypre_BoomerAMGBuildMultipass.extracted.28+0x884> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 443fe0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 443f57 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb26d(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444114 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4440f8 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd68> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4440fa <hypre_BoomerAMGBuildMultipass.extracted.28+0xd6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444114 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 444170 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444172 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 210 |
nb uops | 214 |
loop length | 944 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 25 |
micro-operation queue | 36.17 cycles |
front end | 36.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
cycles | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 35.85-35.84 |
Stall cycles | 0.00 |
Front-end | 36.17 |
Dispatch | 25.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.17 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 23% |
load | 37% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 17% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa0(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 44419b <hypre_BoomerAMGBuildMultipass.extracted.28+0xe0b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4438c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x530> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efe80 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x100(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4438ff <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443ab0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x720> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efe80 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443ae0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 443b50 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 443b18 <hypre_BoomerAMGBuildMultipass.extracted.28+0x788> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443bc0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 443b88 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 443f20 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 443c14 <hypre_BoomerAMGBuildMultipass.extracted.28+0x884> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 443fe0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 443f57 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb26d(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444114 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4440f8 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd68> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4440fa <hypre_BoomerAMGBuildMultipass.extracted.28+0xd6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444114 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 444170 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444172 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4436d0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |