Loop Id: 771 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.1% |
---|
Loop Id: 771 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.1% |
---|
0x4443c3 MOV -0x90(%RBP),%RDX |
0x4443ca INC %RDX |
0x4443cd CMP -0xa8(%RBP),%RDX |
0x4443d4 JGE 444f84 |
0x4443da MOV -0x118(%RBP),%RAX |
0x4443e1 MOV %RDX,-0x90(%RBP) |
0x4443e8 MOV (%RAX,%RDX,8),%R9 |
0x4443ec MOV -0x120(%RBP),%RAX |
0x4443f3 MOV (%RAX,%R9,8),%R8 |
0x4443f7 MOV -0x98(%RBP),%RAX |
0x4443fe MOV (%RAX,%R9,8),%R12 |
0x444402 MOV 0x8(%RAX,%R9,8),%RDI |
0x444407 LEA (%RDI,%R8,1),%RAX |
0x44440b SUB %R12,%RAX |
0x44440e CMP %RAX,%R8 |
0x444411 MOV %R9,-0x30(%RBP) |
0x444415 JGE 4445df |
0x44441b MOV -0x60(%RBP),%RAX |
0x44441f MOV (%RAX),%RAX |
0x444422 MOV -0xf8(%RBP),%RCX |
0x444429 MOV (%RCX,%RAX,8),%RAX |
0x44442d MOV %RDI,%RCX |
0x444430 SUB %R12,%RCX |
0x444433 CMP $0xd,%RCX |
0x444437 JB 4445b0 |
0x44443d MOV %RDI,-0x88(%RBP) |
0x444444 VMOVSD %XMM10,-0x40(%RBP) |
0x444449 LEA (%R14,%R12,8),%RDI |
0x44444d MOV %RCX,-0x80(%RBP) |
0x444451 LEA (,%RCX,8),%RDX |
0x444459 XOR %ESI,%ESI |
0x44445b MOV %RAX,-0x38(%RBP) |
0x44445f MOV %R8,-0x78(%RBP) |
0x444463 CALL 5011c0 <_intel_fast_memset> |
0x444468 MOV -0x80(%RBP),%RAX |
0x44446c MOV %RAX,%R10 |
0x44446f SHR $0x3,%RAX |
0x444473 MOV -0x78(%RBP),%RDX |
0x444477 MOV -0x38(%RBP),%RCX |
0x44447b LEA (%RCX,%RDX,8),%RCX |
0x44447f ADD $0x38,%RCX |
0x444483 MOV -0xf0(%RBP),%RDX |
0x44448a LEA (%RDX,%R12,8),%RDX |
0x44448e MOV %RAX,%RSI |
0x444491 XOR %EDI,%EDI |
0x444493 MOV -0x50(%RBP),%R11 |
0x444497 NOPW (%RAX,%RAX,1) |
(787) 0x4444a0 MOV -0x38(%RCX,%RDI,8),%R8 |
(787) 0x4444a5 LEA (%R12,%RDI,1),%R9 |
(787) 0x4444a9 MOV %R9,(%R11,%R8,8) |
(787) 0x4444ad MOV %R8,-0x38(%RDX,%RDI,8) |
(787) 0x4444b2 MOV -0x30(%RCX,%RDI,8),%R8 |
(787) 0x4444b7 LEA 0x1(%R12,%RDI,1),%R9 |
(787) 0x4444bc MOV %R9,(%R11,%R8,8) |
(787) 0x4444c0 MOV %R8,-0x30(%RDX,%RDI,8) |
(787) 0x4444c5 MOV -0x28(%RCX,%RDI,8),%R8 |
(787) 0x4444ca LEA 0x2(%R12,%RDI,1),%R9 |
(787) 0x4444cf MOV %R9,(%R11,%R8,8) |
(787) 0x4444d3 MOV %R8,-0x28(%RDX,%RDI,8) |
(787) 0x4444d8 MOV -0x20(%RCX,%RDI,8),%R8 |
(787) 0x4444dd LEA 0x3(%R12,%RDI,1),%R9 |
(787) 0x4444e2 MOV %R9,(%R11,%R8,8) |
(787) 0x4444e6 MOV %R8,-0x20(%RDX,%RDI,8) |
(787) 0x4444eb MOV -0x18(%RCX,%RDI,8),%R8 |
(787) 0x4444f0 LEA 0x4(%R12,%RDI,1),%R9 |
(787) 0x4444f5 MOV %R9,(%R11,%R8,8) |
(787) 0x4444f9 MOV %R8,-0x18(%RDX,%RDI,8) |
(787) 0x4444fe MOV -0x10(%RCX,%RDI,8),%R8 |
(787) 0x444503 LEA 0x5(%R12,%RDI,1),%R9 |
(787) 0x444508 MOV %R9,(%R11,%R8,8) |
(787) 0x44450c MOV %R8,-0x10(%RDX,%RDI,8) |
(787) 0x444511 MOV -0x8(%RCX,%RDI,8),%R8 |
(787) 0x444516 LEA 0x6(%R12,%RDI,1),%R9 |
(787) 0x44451b MOV %R9,(%R11,%R8,8) |
(787) 0x44451f MOV %R8,-0x8(%RDX,%RDI,8) |
(787) 0x444524 MOV (%RCX,%RDI,8),%R8 |
(787) 0x444528 LEA (%R12,%RDI,1),%R9 |
(787) 0x44452c ADD $0x7,%R9 |
(787) 0x444530 MOV %R9,(%R11,%R8,8) |
(787) 0x444534 MOV %R8,(%RDX,%RDI,8) |
(787) 0x444538 ADD $0x8,%RDI |
(787) 0x44453c DEC %RSI |
(787) 0x44453f JNE 4444a0 |
0x444545 MOV %R10,%RDX |
0x444548 MOV %R10,%RCX |
0x44454b AND $-0x8,%RCX |
0x44454f CMP %R10,%RCX |
0x444552 MOV -0x48(%RBP),%R10 |
0x444556 MOV -0x58(%RBP),%RDX |
0x44455a VXORPD %XMM9,%XMM9,%XMM9 |
0x44455f VMOVSD -0x40(%RBP),%XMM10 |
0x444564 MOV -0x30(%RBP),%R9 |
0x444568 MOV -0x88(%RBP),%RSI |
0x44456f MOV -0x38(%RBP),%RDI |
0x444573 MOV -0x78(%RBP),%R8 |
0x444577 JAE 4445df |
0x444579 ADD %RCX,%R12 |
0x44457c SAL $0x6,%RAX |
0x444580 LEA (%RAX,%R8,8),%RAX |
0x444584 ADD %RAX,%RDI |
0x444587 NOPW (%RAX,%RAX,1) |
(788) 0x444590 MOV (%RDI),%RAX |
(788) 0x444593 MOV %R12,(%R11,%RAX,8) |
(788) 0x444597 MOV %RAX,(%RDX,%R12,8) |
(788) 0x44459b INC %R12 |
(788) 0x44459e ADD $0x8,%RDI |
(788) 0x4445a2 CMP %R12,%RSI |
(788) 0x4445a5 JNE 444590 |
0x4445a7 JMP 4445df |
0x4445b0 LEA (%RAX,%R8,8),%RAX |
0x4445b4 MOV -0x58(%RBP),%RDX |
0x4445b8 MOV -0x50(%RBP),%RSI |
0x4445bc NOPL (%RAX) |
(786) 0x4445c0 MOV (%RAX),%RCX |
(786) 0x4445c3 MOV %R12,(%RSI,%RCX,8) |
(786) 0x4445c7 MOVQ $0,(%R14,%R12,8) |
(786) 0x4445cf MOV %RCX,(%RDX,%R12,8) |
(786) 0x4445d3 INC %R12 |
(786) 0x4445d6 ADD $0x8,%RAX |
(786) 0x4445da CMP %R12,%RDI |
(786) 0x4445dd JNE 4445c0 |
0x4445df MOV -0x128(%RBP),%RAX |
0x4445e6 MOV (%RAX,%R9,8),%RSI |
0x4445ea MOV -0xa0(%RBP),%RAX |
0x4445f1 MOV (%RAX,%R9,8),%R12 |
0x4445f5 MOV 0x8(%RAX,%R9,8),%RDX |
0x4445fa LEA (%RDX,%RSI,1),%RAX |
0x4445fe SUB %R12,%RAX |
0x444601 CMP %RAX,%RSI |
0x444604 JGE 4447c0 |
0x44460a MOV -0x60(%RBP),%RAX |
0x44460e MOV (%RAX),%RAX |
0x444611 MOV -0x100(%RBP),%RCX |
0x444618 MOV (%RCX,%RAX,8),%RAX |
0x44461c MOV %RDX,%RCX |
0x44461f SUB %R12,%RCX |
0x444622 CMP $0xd,%RCX |
0x444626 JB 444790 |
0x44462c MOV %RDX,-0x78(%RBP) |
0x444630 VMOVSD %XMM10,-0x40(%RBP) |
0x444635 LEA (,%R12,8),%RDI |
0x44463d ADD %R13,%RDI |
0x444640 LEA (,%RCX,8),%RDX |
0x444648 MOV %RSI,-0x38(%RBP) |
0x44464c XOR %ESI,%ESI |
0x44464e MOV %RAX,-0x88(%RBP) |
0x444655 MOV %RCX,-0x80(%RBP) |
0x444659 CALL 5011c0 <_intel_fast_memset> |
0x44465e MOV -0x88(%RBP),%R11 |
0x444665 MOV -0x80(%RBP),%RAX |
0x444669 MOV %RAX,%R10 |
0x44466c SHR $0x3,%RAX |
0x444670 MOV -0x38(%RBP),%RCX |
0x444674 LEA (%R11,%RCX,8),%RCX |
0x444678 ADD $0x38,%RCX |
0x44467c MOV -0xe8(%RBP),%RDX |
0x444683 LEA (%RDX,%R12,8),%RDX |
0x444687 MOV %RAX,%RSI |
0x44468a XOR %EDI,%EDI |
0x44468c NOPL (%RAX) |
(784) 0x444690 MOV -0x38(%RCX,%RDI,8),%R8 |
(784) 0x444695 LEA (%R12,%RDI,1),%R9 |
(784) 0x444699 MOV %R9,(%R15,%R8,8) |
(784) 0x44469d MOV %R8,-0x38(%RDX,%RDI,8) |
(784) 0x4446a2 MOV -0x30(%RCX,%RDI,8),%R8 |
(784) 0x4446a7 LEA 0x1(%R12,%RDI,1),%R9 |
(784) 0x4446ac MOV %R9,(%R15,%R8,8) |
(784) 0x4446b0 MOV %R8,-0x30(%RDX,%RDI,8) |
(784) 0x4446b5 MOV -0x28(%RCX,%RDI,8),%R8 |
(784) 0x4446ba LEA 0x2(%R12,%RDI,1),%R9 |
(784) 0x4446bf MOV %R9,(%R15,%R8,8) |
(784) 0x4446c3 MOV %R8,-0x28(%RDX,%RDI,8) |
(784) 0x4446c8 MOV -0x20(%RCX,%RDI,8),%R8 |
(784) 0x4446cd LEA 0x3(%R12,%RDI,1),%R9 |
(784) 0x4446d2 MOV %R9,(%R15,%R8,8) |
(784) 0x4446d6 MOV %R8,-0x20(%RDX,%RDI,8) |
(784) 0x4446db MOV -0x18(%RCX,%RDI,8),%R8 |
(784) 0x4446e0 LEA 0x4(%R12,%RDI,1),%R9 |
(784) 0x4446e5 MOV %R9,(%R15,%R8,8) |
(784) 0x4446e9 MOV %R8,-0x18(%RDX,%RDI,8) |
(784) 0x4446ee MOV -0x10(%RCX,%RDI,8),%R8 |
(784) 0x4446f3 LEA 0x5(%R12,%RDI,1),%R9 |
(784) 0x4446f8 MOV %R9,(%R15,%R8,8) |
(784) 0x4446fc MOV %R8,-0x10(%RDX,%RDI,8) |
(784) 0x444701 MOV -0x8(%RCX,%RDI,8),%R8 |
(784) 0x444706 LEA 0x6(%R12,%RDI,1),%R9 |
(784) 0x44470b MOV %R9,(%R15,%R8,8) |
(784) 0x44470f MOV %R8,-0x8(%RDX,%RDI,8) |
(784) 0x444714 MOV (%RCX,%RDI,8),%R8 |
(784) 0x444718 LEA (%R12,%RDI,1),%R9 |
(784) 0x44471c ADD $0x7,%R9 |
(784) 0x444720 MOV %R9,(%R15,%R8,8) |
(784) 0x444724 MOV %R8,(%RDX,%RDI,8) |
(784) 0x444728 ADD $0x8,%RDI |
(784) 0x44472c DEC %RSI |
(784) 0x44472f JNE 444690 |
0x444735 MOV %R10,%RDX |
0x444738 MOV %R10,%RCX |
0x44473b AND $-0x8,%RCX |
0x44473f CMP %R10,%RCX |
0x444742 MOV -0x48(%RBP),%R10 |
0x444746 VXORPD %XMM9,%XMM9,%XMM9 |
0x44474b VMOVSD -0x40(%RBP),%XMM10 |
0x444750 MOV -0x30(%RBP),%R9 |
0x444754 MOV -0x78(%RBP),%RDX |
0x444758 MOV -0x38(%RBP),%RSI |
0x44475c JAE 4447c0 |
0x44475e ADD %RCX,%R12 |
0x444761 SAL $0x6,%RAX |
0x444765 LEA (%RAX,%RSI,8),%RAX |
0x444769 ADD %RAX,%R11 |
0x44476c NOPL (%RAX) |
(785) 0x444770 MOV (%R11),%RAX |
(785) 0x444773 MOV %R12,(%R15,%RAX,8) |
(785) 0x444777 MOV %RAX,(%R10,%R12,8) |
(785) 0x44477b INC %R12 |
(785) 0x44477e ADD $0x8,%R11 |
(785) 0x444782 CMP %R12,%RDX |
(785) 0x444785 JNE 444770 |
0x444787 JMP 4447c0 |
0x444790 LEA (%RAX,%RSI,8),%RAX |
0x444794 NOPW %CS:(%RAX,%RAX,1) |
(783) 0x4447a0 MOV (%RAX),%RCX |
(783) 0x4447a3 MOV %R12,(%R15,%RCX,8) |
(783) 0x4447a7 MOVQ $0,(%R13,%R12,8) |
(783) 0x4447b0 MOV %RCX,(%R10,%R12,8) |
(783) 0x4447b4 INC %R12 |
(783) 0x4447b7 ADD $0x8,%RAX |
(783) 0x4447bb CMP %R12,%RDX |
(783) 0x4447be JNE 4447a0 |
0x4447c0 MOV -0xc0(%RBP),%RCX |
0x4447c7 MOV (%RCX,%R9,8),%RAX |
0x4447cb MOV 0x8(%RCX,%R9,8),%RCX |
0x4447d0 CMP %RCX,%RAX |
0x4447d3 MOV -0x50(%RBP),%R12 |
0x4447d7 JGE 444830 |
0x4447d9 MOV -0x60(%RBP),%RDX |
0x4447dd MOV (%RDX),%RDX |
0x4447e0 DEC %RDX |
0x4447e3 JMP 4447f8 |
(782) 0x4447f0 INC %RAX |
(782) 0x4447f3 CMP %RCX,%RAX |
(782) 0x4447f6 JGE 444830 |
(782) 0x4447f8 MOV -0x180(%RBP),%RSI |
(782) 0x4447ff MOV (%RSI,%RAX,8),%RSI |
(782) 0x444803 MOV -0x190(%RBP),%RDI |
(782) 0x44480a CMP %RDX,(%RDI,%RSI,8) |
(782) 0x44480e JNE 4447f0 |
(782) 0x444810 MOV -0x70(%RBP),%RCX |
(782) 0x444814 MOV %R9,(%RCX,%RSI,8) |
(782) 0x444818 MOV -0xc0(%RBP),%RCX |
(782) 0x44481f MOV 0x8(%RCX,%R9,8),%RCX |
(782) 0x444824 JMP 4447f0 |
0x444830 MOV -0xc8(%RBP),%RCX |
0x444837 MOV (%RCX,%R9,8),%RAX |
0x44483b MOV 0x8(%RCX,%R9,8),%RCX |
0x444840 CMP %RCX,%RAX |
0x444843 JGE 4448a0 |
0x444845 MOV -0x60(%RBP),%RDX |
0x444849 MOV (%RDX),%RDX |
0x44484c DEC %RDX |
0x44484f JMP 444868 |
(781) 0x444860 INC %RAX |
(781) 0x444863 CMP %RCX,%RAX |
(781) 0x444866 JGE 4448a0 |
(781) 0x444868 MOV -0x188(%RBP),%RSI |
(781) 0x44486f MOV (%RSI,%RAX,8),%RSI |
(781) 0x444873 MOV -0x198(%RBP),%RDI |
(781) 0x44487a CMP %RDX,(%RDI,%RSI,8) |
(781) 0x44487e JNE 444860 |
(781) 0x444880 MOV -0x68(%RBP),%RCX |
(781) 0x444884 MOV %R9,(%RCX,%RSI,8) |
(781) 0x444888 MOV -0xc8(%RBP),%RCX |
(781) 0x44488f MOV 0x8(%RCX,%R9,8),%RCX |
(781) 0x444894 JMP 444860 |
0x4448a0 MOV -0x108(%RBP),%RAX |
0x4448a7 MOV (%RAX,%R9,8),%RCX |
0x4448ab MOV 0x8(%RAX,%R9,8),%R11 |
0x4448b0 LEA 0x1(%RCX),%RDX |
0x4448b4 VXORPD %XMM1,%XMM1,%XMM1 |
0x4448b8 CMP %R11,%RDX |
0x4448bb MOV %RCX,-0x38(%RBP) |
0x4448bf VXORPD %XMM0,%XMM0,%XMM0 |
0x4448c3 JGE 444c00 |
0x4448c9 MOV -0x58(%RBP),%RAX |
0x4448cd MOV %R11,-0x40(%RBP) |
0x4448d1 JMP 4448f4 |
(776) 0x4448e0 MOV -0x58(%RBP),%RAX |
(776) 0x4448e4 MOV -0x30(%RBP),%R9 |
(776) 0x4448e8 INC %RDX |
(776) 0x4448eb CMP %R11,%RDX |
(776) 0x4448ee JE 444c00 |
(776) 0x4448f4 MOV -0x170(%RBP),%RSI |
(776) 0x4448fb MOV (%RSI,%RDX,8),%RSI |
(776) 0x4448ff MOV -0x70(%RBP),%RDI |
(776) 0x444903 CMP %R9,(%RDI,%RSI,8) |
(776) 0x444907 JNE 444930 |
(776) 0x444909 MOV -0x98(%RBP),%R8 |
(776) 0x444910 MOV (%R8,%RSI,8),%RDI |
(776) 0x444914 MOV 0x8(%R8,%RSI,8),%R8 |
(776) 0x444919 MOV %R8,%R9 |
(776) 0x44491c SUB %RDI,%R9 |
(776) 0x44491f JLE 444a89 |
(776) 0x444925 CMP $0x4,%R9 |
(776) 0x444929 JAE 444960 |
(776) 0x44492b JMP 444a35 |
(776) 0x444930 MOV -0x158(%RBP),%RDI |
(776) 0x444937 CMPQ $-0x3,(%RDI,%RSI,8) |
(776) 0x44493c JE 4448e8 |
(776) 0x44493e CMPQ $0x1,-0xd0(%RBP) |
(776) 0x444946 JE 444959 |
(776) 0x444948 MOV -0xb0(%RBP),%R8 |
(776) 0x44494f MOV (%R8,%R9,8),%RDI |
(776) 0x444953 CMP (%R8,%RSI,8),%RDI |
(776) 0x444957 JNE 4448e8 |
(776) 0x444959 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(776) 0x44495e JMP 4448e8 |
(776) 0x444960 MOV %R9,%R10 |
(776) 0x444963 SHR $0x2,%R10 |
(776) 0x444967 LEA 0x18(,%RDI,8),%R11 |
(776) 0x44496f MOV %R12,%RCX |
(776) 0x444972 NOPW %CS:(%RAX,%RAX,1) |
(779) 0x444980 MOV -0x18(%RAX,%R11,1),%R12 |
(779) 0x444985 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(779) 0x44498c VMOVSD (%RBX,%RDX,8),%XMM3 |
(779) 0x444991 MOV (%RCX,%R12,8),%R12 |
(779) 0x444995 VMOVSD (%R14,%R12,8),%XMM4 |
(779) 0x44499b VFMADD231SD %XMM2,%XMM3,%XMM4 |
(779) 0x4449a0 VMOVSD %XMM4,(%R14,%R12,8) |
(779) 0x4449a6 MOV -0x10(%RAX,%R11,1),%R12 |
(779) 0x4449ab VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(779) 0x4449b2 VMOVSD (%RBX,%RDX,8),%XMM5 |
(779) 0x4449b7 MOV (%RCX,%R12,8),%R12 |
(779) 0x4449bb VMOVSD (%R14,%R12,8),%XMM6 |
(779) 0x4449c1 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(779) 0x4449c6 VMOVSD %XMM6,(%R14,%R12,8) |
(779) 0x4449cc MOV -0x8(%RAX,%R11,1),%R12 |
(779) 0x4449d1 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(779) 0x4449d8 VMOVSD (%RBX,%RDX,8),%XMM7 |
(779) 0x4449dd MOV (%RCX,%R12,8),%R12 |
(779) 0x4449e1 VMOVSD (%R14,%R12,8),%XMM8 |
(779) 0x4449e7 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(779) 0x4449ec VMOVSD %XMM8,(%R14,%R12,8) |
(779) 0x4449f2 MOV (%RAX,%R11,1),%R12 |
(779) 0x4449f6 VMOVSD (%R14,%R11,1),%XMM8 |
(779) 0x4449fc VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(779) 0x444a01 MOV (%RCX,%R12,8),%R12 |
(779) 0x444a05 VADDSD (%R14,%R12,8),%XMM10,%XMM8 |
(779) 0x444a0b VMOVSD %XMM8,(%R14,%R12,8) |
(779) 0x444a11 VFMADD213SD %XMM10,%XMM5,%XMM4 |
(779) 0x444a16 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(779) 0x444a1b VFMADD231SD %XMM6,%XMM7,%XMM4 |
(779) 0x444a20 VADDSD %XMM1,%XMM4,%XMM1 |
(779) 0x444a24 VADDSD %XMM0,%XMM4,%XMM0 |
(779) 0x444a28 ADD $0x20,%R11 |
(779) 0x444a2c DEC %R10 |
(779) 0x444a2f JNE 444980 |
(776) 0x444a35 MOV %R9,%R10 |
(776) 0x444a38 AND $-0x4,%R10 |
(776) 0x444a3c CMP %R9,%R10 |
(776) 0x444a3f MOV -0x40(%RBP),%R11 |
(776) 0x444a43 JAE 444a81 |
(776) 0x444a45 ADD %R10,%RDI |
(776) 0x444a48 MOV -0x48(%RBP),%R10 |
(776) 0x444a4c MOV -0x50(%RBP),%R12 |
(780) 0x444a50 MOV (%RAX,%RDI,8),%R9 |
(780) 0x444a54 VMOVSD (%R14,%RDI,8),%XMM2 |
(780) 0x444a5a VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(780) 0x444a5f MOV (%R12,%R9,8),%R9 |
(780) 0x444a63 VADDSD (%R14,%R9,8),%XMM10,%XMM2 |
(780) 0x444a69 VMOVSD %XMM2,(%R14,%R9,8) |
(780) 0x444a6f VADDSD %XMM1,%XMM10,%XMM1 |
(780) 0x444a73 VADDSD %XMM0,%XMM10,%XMM0 |
(780) 0x444a77 INC %RDI |
(780) 0x444a7a CMP %RDI,%R8 |
(780) 0x444a7d JNE 444a50 |
(776) 0x444a7f JMP 444a89 |
(776) 0x444a81 MOV -0x48(%RBP),%R10 |
(776) 0x444a85 MOV -0x50(%RBP),%R12 |
(776) 0x444a89 MOV -0xa0(%RBP),%RAX |
(776) 0x444a90 MOV (%RAX,%RSI,8),%RDI |
(776) 0x444a94 MOV 0x8(%RAX,%RSI,8),%RSI |
(776) 0x444a99 MOV %RSI,%R8 |
(776) 0x444a9c SUB %RDI,%R8 |
(776) 0x444a9f JLE 4448e0 |
(776) 0x444aa5 CMP $0x4,%R8 |
(776) 0x444aa9 JAE 444ab0 |
(776) 0x444aab JMP 444b8e |
(776) 0x444ab0 MOV %R8,%R9 |
(776) 0x444ab3 SHR $0x2,%R9 |
(776) 0x444ab7 MOV %R10,%RAX |
(776) 0x444aba LEA 0x18(,%RDI,8),%R10 |
(776) 0x444ac2 NOPW %CS:(%RAX,%RAX,1) |
(777) 0x444ad0 MOV -0x18(%RAX,%R10,1),%R11 |
(777) 0x444ad5 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(777) 0x444adc VMOVSD (%RBX,%RDX,8),%XMM3 |
(777) 0x444ae1 MOV (%R15,%R11,8),%R11 |
(777) 0x444ae5 VMOVSD (%R13,%R11,8),%XMM4 |
(777) 0x444aec VFMADD231SD %XMM2,%XMM3,%XMM4 |
(777) 0x444af1 VMOVSD %XMM4,(%R13,%R11,8) |
(777) 0x444af8 MOV -0x10(%RAX,%R10,1),%R11 |
(777) 0x444afd VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(777) 0x444b04 VMOVSD (%RBX,%RDX,8),%XMM5 |
(777) 0x444b09 MOV (%R15,%R11,8),%R11 |
(777) 0x444b0d VMOVSD (%R13,%R11,8),%XMM6 |
(777) 0x444b14 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(777) 0x444b19 VMOVSD %XMM6,(%R13,%R11,8) |
(777) 0x444b20 MOV -0x8(%RAX,%R10,1),%R11 |
(777) 0x444b25 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(777) 0x444b2c VMOVSD (%RBX,%RDX,8),%XMM7 |
(777) 0x444b31 MOV (%R15,%R11,8),%R11 |
(777) 0x444b35 VMOVSD (%R13,%R11,8),%XMM8 |
(777) 0x444b3c VFMADD231SD %XMM6,%XMM7,%XMM8 |
(777) 0x444b41 VMOVSD %XMM8,(%R13,%R11,8) |
(777) 0x444b48 MOV (%RAX,%R10,1),%R11 |
(777) 0x444b4c VMOVSD (%R13,%R10,1),%XMM8 |
(777) 0x444b53 VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(777) 0x444b58 MOV (%R15,%R11,8),%R11 |
(777) 0x444b5c VADDSD (%R13,%R11,8),%XMM10,%XMM8 |
(777) 0x444b63 VMOVSD %XMM8,(%R13,%R11,8) |
(777) 0x444b6a VFMADD213SD %XMM10,%XMM5,%XMM4 |
(777) 0x444b6f VFMADD231SD %XMM2,%XMM3,%XMM4 |
(777) 0x444b74 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(777) 0x444b79 VADDSD %XMM1,%XMM4,%XMM1 |
(777) 0x444b7d VADDSD %XMM0,%XMM4,%XMM0 |
(777) 0x444b81 ADD $0x20,%R10 |
(777) 0x444b85 DEC %R9 |
(777) 0x444b88 JNE 444ad0 |
(776) 0x444b8e MOV %R8,%R9 |
(776) 0x444b91 AND $-0x4,%R9 |
(776) 0x444b95 CMP %R8,%R9 |
(776) 0x444b98 JAE 444be7 |
(776) 0x444b9a ADD %R9,%RDI |
(776) 0x444b9d MOV -0x48(%RBP),%R10 |
(776) 0x444ba1 MOV -0x58(%RBP),%RAX |
(776) 0x444ba5 MOV -0x30(%RBP),%R9 |
(776) 0x444ba9 MOV -0x40(%RBP),%R11 |
(776) 0x444bad NOPL (%RAX) |
(778) 0x444bb0 MOV (%R10,%RDI,8),%R8 |
(778) 0x444bb4 VMOVSD (%R13,%RDI,8),%XMM2 |
(778) 0x444bbb VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(778) 0x444bc0 MOV (%R15,%R8,8),%R8 |
(778) 0x444bc4 VADDSD (%R13,%R8,8),%XMM10,%XMM2 |
(778) 0x444bcb VMOVSD %XMM2,(%R13,%R8,8) |
(778) 0x444bd2 VADDSD %XMM1,%XMM10,%XMM1 |
(778) 0x444bd6 VADDSD %XMM0,%XMM10,%XMM0 |
(778) 0x444bda INC %RDI |
(778) 0x444bdd CMP %RDI,%RSI |
(778) 0x444be0 JNE 444bb0 |
(776) 0x444be2 JMP 4448e8 |
(776) 0x444be7 MOV -0x48(%RBP),%R10 |
(776) 0x444beb MOV -0x58(%RBP),%RAX |
(776) 0x444bef MOV -0x30(%RBP),%R9 |
(776) 0x444bf3 MOV -0x40(%RBP),%R11 |
(776) 0x444bf7 JMP 4448e8 |
0x444c00 MOV -0x110(%RBP),%RAX |
0x444c07 MOV (%RAX,%R9,8),%RCX |
0x444c0b MOV 0x8(%RAX,%R9,8),%RDX |
0x444c10 CMP %RDX,%RCX |
0x444c13 JL 444d10 |
0x444c19 MOV -0x38(%RBP),%RAX |
0x444c1d VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x444c22 VUCOMISD %XMM9,%XMM1 |
0x444c27 JE 444c35 |
0x444c29 VXORPD 0xcc03f(%RIP),%XMM0,%XMM0 |
0x444c31 VDIVSD %XMM1,%XMM0,%XMM10 |
0x444c35 MOV -0x98(%RBP),%RCX |
0x444c3c MOV (%RCX,%R9,8),%RAX |
0x444c40 MOV 0x8(%RCX,%R9,8),%RCX |
0x444c45 SUB %RAX,%RCX |
0x444c48 JLE 444ea4 |
0x444c4e CMP $0x8,%RCX |
0x444c52 JB 444cc7 |
0x444c54 MOV %RCX,%RDX |
0x444c57 SHR $0x3,%RDX |
0x444c5b MOV -0xe0(%RBP),%RSI |
0x444c62 LEA (%RSI,%RAX,8),%RSI |
0x444c66 NOPW %CS:(%RAX,%RAX,1) |
(773) 0x444c70 VMULSD -0x38(%RSI),%XMM10,%XMM0 |
(773) 0x444c75 VMOVSD %XMM0,-0x38(%RSI) |
(773) 0x444c7a VMULSD -0x30(%RSI),%XMM10,%XMM0 |
(773) 0x444c7f VMOVSD %XMM0,-0x30(%RSI) |
(773) 0x444c84 VMULSD -0x28(%RSI),%XMM10,%XMM0 |
(773) 0x444c89 VMOVSD %XMM0,-0x28(%RSI) |
(773) 0x444c8e VMULSD -0x20(%RSI),%XMM10,%XMM0 |
(773) 0x444c93 VMOVSD %XMM0,-0x20(%RSI) |
(773) 0x444c98 VMULSD -0x18(%RSI),%XMM10,%XMM0 |
(773) 0x444c9d VMOVSD %XMM0,-0x18(%RSI) |
(773) 0x444ca2 VMULSD -0x10(%RSI),%XMM10,%XMM0 |
(773) 0x444ca7 VMOVSD %XMM0,-0x10(%RSI) |
(773) 0x444cac VMULSD -0x8(%RSI),%XMM10,%XMM0 |
(773) 0x444cb1 VMOVSD %XMM0,-0x8(%RSI) |
(773) 0x444cb6 VMULSD (%RSI),%XMM10,%XMM0 |
(773) 0x444cba VMOVSD %XMM0,(%RSI) |
(773) 0x444cbe ADD $0x40,%RSI |
(773) 0x444cc2 DEC %RDX |
(773) 0x444cc5 JNE 444c70 |
0x444cc7 MOV %ECX,%EDX |
0x444cc9 AND $0x7,%EDX |
0x444ccc DEC %RDX |
0x444ccf CMP $0x6,%RDX |
0x444cd3 JA 444ea4 |
(774) 0x444cf0 VADDSD (%R8,%RCX,8),%XMM0,%XMM0 |
(774) 0x444cf6 MOV %R12,%R11 |
(774) 0x444cf9 INC %RCX |
(774) 0x444cfc CMP %RDX,%RCX |
(774) 0x444cff MOV -0x48(%RBP),%R10 |
(774) 0x444d03 MOV %R11,%R12 |
(774) 0x444d06 MOV -0x30(%RBP),%R9 |
(774) 0x444d0a JE 444c19 |
(774) 0x444d10 MOV -0x178(%RBP),%RAX |
(774) 0x444d17 LEA (%RAX,%RCX,8),%RSI |
(774) 0x444d1b CMPQ $0,-0x1a0(%RBP) |
(774) 0x444d23 JE 444d33 |
(774) 0x444d25 MOV (%RSI),%RSI |
(774) 0x444d28 MOV -0x160(%RBP),%RDI |
(774) 0x444d2f LEA (%RDI,%RSI,8),%RSI |
(774) 0x444d33 MOV (%RSI),%RDI |
(774) 0x444d36 TEST %RDI,%RDI |
(774) 0x444d39 JS 444de0 |
(774) 0x444d3f MOV -0x68(%RBP),%RSI |
(774) 0x444d43 CMP %R9,(%RSI,%RDI,8) |
(774) 0x444d47 JNE 444de0 |
(774) 0x444d4d MOV -0x150(%RBP),%RSI |
(774) 0x444d54 MOV 0x8(%RSI,%RDI,8),%RSI |
(774) 0x444d59 TEST %RSI,%RSI |
(774) 0x444d5c JLE 444cf6 |
(774) 0x444d5e MOV -0x138(%RBP),%R8 |
(774) 0x444d65 MOV (%R8,%RDI,8),%RDI |
(774) 0x444d69 ADD %RDI,%RSI |
(774) 0x444d6c MOV -0x60(%RBP),%R8 |
(774) 0x444d70 MOV (%R8),%R8 |
(774) 0x444d73 MOV -0x140(%RBP),%R9 |
(774) 0x444d7a MOV (%R9,%R8,8),%R8 |
(774) 0x444d7e MOV %R12,%R11 |
(774) 0x444d81 MOV -0xb8(%RBP),%R12 |
(774) 0x444d88 MOV -0x148(%RBP),%RAX |
(774) 0x444d8f NOP |
(775) 0x444d90 MOV (%R8,%RDI,8),%R9 |
(775) 0x444d94 VMOVSD (%RAX,%RDI,8),%XMM2 |
(775) 0x444d99 VMULSD (%R12,%RCX,8),%XMM2,%XMM10 |
(775) 0x444d9f TEST %R9,%R9 |
(775) 0x444da2 LEA (%R15,%R9,8),%R10 |
(775) 0x444da6 NOT %R9 |
(775) 0x444da9 LEA (%R11,%R9,8),%R9 |
(775) 0x444dad CMOVNS %R10,%R9 |
(775) 0x444db1 MOV %R13,%R10 |
(775) 0x444db4 CMOVS %R14,%R10 |
(775) 0x444db8 MOV (%R9),%R9 |
(775) 0x444dbb VADDSD (%R10,%R9,8),%XMM10,%XMM2 |
(775) 0x444dc1 VMOVSD %XMM2,(%R10,%R9,8) |
(775) 0x444dc7 VADDSD %XMM1,%XMM10,%XMM1 |
(775) 0x444dcb VADDSD %XMM0,%XMM10,%XMM0 |
(775) 0x444dcf INC %RDI |
(775) 0x444dd2 CMP %RSI,%RDI |
(775) 0x444dd5 JL 444d90 |
(774) 0x444dd7 JMP 444cf9 |
(774) 0x444de0 MOV -0x168(%RBP),%RSI |
(774) 0x444de7 CMPQ $-0x3,(%RSI,%RDI,8) |
(774) 0x444dec JE 444cf6 |
(774) 0x444df2 CMPQ $0x1,-0xd0(%RBP) |
(774) 0x444dfa MOV -0xb8(%RBP),%R8 |
(774) 0x444e01 JE 444cf0 |
(774) 0x444e07 MOV -0x130(%RBP),%RSI |
(774) 0x444e0e MOV (%RSI,%RDI,8),%RSI |
(774) 0x444e12 MOV -0xb0(%RBP),%RDI |
(774) 0x444e19 CMP (%RDI,%R9,8),%RSI |
(774) 0x444e1d JE 444cf0 |
(774) 0x444e23 JMP 444cf6 |
0x444ea4 MOV -0xa0(%RBP),%RCX |
0x444eab MOV (%RCX,%R9,8),%RAX |
0x444eaf MOV 0x8(%RCX,%R9,8),%RCX |
0x444eb4 SUB %RAX,%RCX |
0x444eb7 JLE 4443c3 |
0x444ebd CMP $0x8,%RCX |
0x444ec1 JB 444f37 |
0x444ec3 MOV %RCX,%RDX |
0x444ec6 SHR $0x3,%RDX |
0x444eca MOV -0xd8(%RBP),%RSI |
0x444ed1 LEA (%RSI,%RAX,8),%RSI |
0x444ed5 NOPW %CS:(%RAX,%RAX,1) |
(772) 0x444ee0 VMULSD -0x38(%RSI),%XMM10,%XMM0 |
(772) 0x444ee5 VMOVSD %XMM0,-0x38(%RSI) |
(772) 0x444eea VMULSD -0x30(%RSI),%XMM10,%XMM0 |
(772) 0x444eef VMOVSD %XMM0,-0x30(%RSI) |
(772) 0x444ef4 VMULSD -0x28(%RSI),%XMM10,%XMM0 |
(772) 0x444ef9 VMOVSD %XMM0,-0x28(%RSI) |
(772) 0x444efe VMULSD -0x20(%RSI),%XMM10,%XMM0 |
(772) 0x444f03 VMOVSD %XMM0,-0x20(%RSI) |
(772) 0x444f08 VMULSD -0x18(%RSI),%XMM10,%XMM0 |
(772) 0x444f0d VMOVSD %XMM0,-0x18(%RSI) |
(772) 0x444f12 VMULSD -0x10(%RSI),%XMM10,%XMM0 |
(772) 0x444f17 VMOVSD %XMM0,-0x10(%RSI) |
(772) 0x444f1c VMULSD -0x8(%RSI),%XMM10,%XMM0 |
(772) 0x444f21 VMOVSD %XMM0,-0x8(%RSI) |
(772) 0x444f26 VMULSD (%RSI),%XMM10,%XMM0 |
(772) 0x444f2a VMOVSD %XMM0,(%RSI) |
(772) 0x444f2e ADD $0x40,%RSI |
(772) 0x444f32 DEC %RDX |
(772) 0x444f35 JNE 444ee0 |
0x444f37 MOV %ECX,%EDX |
0x444f39 AND $0x7,%EDX |
0x444f3c DEC %RDX |
0x444f3f CMP $0x6,%RDX |
0x444f43 JA 4443c3 |
/scratch_na/users/xoserete/qaas_runs/171-172-8218/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 3.12 |
CQA speedup if FP arith vectorized | 3.66 |
CQA speedup if fully vectorized | 7.96 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.39 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.33 |
CQA cycles if no scalar integer | 11.00 |
CQA cycles if FP arith vectorized | 9.37 |
CQA cycles if fully vectorized | 4.31 |
Front-end cycles | 34.33 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 10.40 |
P1 cycles | 24.67 |
P2 cycles | 24.67 |
P3 cycles | 8.00 |
P4 cycles | 10.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 10.20 |
P10 cycles | 24.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 34.32 - 34.34 |
Stall cycles (UFS) | 0.00 |
Nb insns | 204.00 |
Nb uops | 205.00 |
Nb loads | 74.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.74 |
Bytes prefetched | 0.00 |
Bytes loaded | 600.00 |
Bytes stored | 112.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 11.90 |
Vectorization ratio load | 14.29 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 22.73 |
Vector-efficiency ratio all | 13.99 |
Vector-efficiency ratio load | 14.29 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 15.34 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 3.12 |
CQA speedup if FP arith vectorized | 3.66 |
CQA speedup if fully vectorized | 7.96 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.39 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.33 |
CQA cycles if no scalar integer | 11.00 |
CQA cycles if FP arith vectorized | 9.37 |
CQA cycles if fully vectorized | 4.31 |
Front-end cycles | 34.33 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 10.40 |
P1 cycles | 24.67 |
P2 cycles | 24.67 |
P3 cycles | 8.00 |
P4 cycles | 10.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 10.20 |
P10 cycles | 24.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 34.32 - 34.34 |
Stall cycles (UFS) | 0.00 |
Nb insns | 204.00 |
Nb uops | 205.00 |
Nb loads | 74.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.74 |
Bytes prefetched | 0.00 |
Bytes loaded | 600.00 |
Bytes stored | 112.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 11.90 |
Vectorization ratio load | 14.29 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 22.73 |
Vector-efficiency ratio all | 13.99 |
Vector-efficiency ratio load | 14.29 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 15.34 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 204 |
nb uops | 205 |
loop length | 897 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 34.33 cycles |
front end | 34.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 10.40 | 24.67 | 24.67 | 8.00 | 10.40 | 13.00 | 8.00 | 8.00 | 8.00 | 10.20 | 24.67 |
cycles | 13.00 | 10.40 | 24.67 | 24.67 | 8.00 | 10.40 | 13.00 | 8.00 | 8.00 | 8.00 | 10.20 | 24.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 34.32-34.34 |
Stall cycles | 0.00 |
Front-end | 34.33 |
Dispatch | 24.67 |
DIV/SQRT | 4.00 |
Overall L1 | 34.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 41% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 83% |
all | 11% |
load | 14% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 22% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 15% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 22% |
all | 13% |
load | 14% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa8(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 444f84 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf74> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4445b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x5a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5011c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x40(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444790 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R13,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5011c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xe8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x40(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 444830 <hypre_BoomerAMGBuildMultipass.extracted.28+0x820> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4447f8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7e8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4448a0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x890> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 444868 <hypre_BoomerAMGBuildMultipass.extracted.28+0x858> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 444c00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4448f4 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 444d10 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 444c35 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc25> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xcc03f(%RIP),%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444ea4 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe94> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444cc7 <hypre_BoomerAMGBuildMultipass.extracted.28+0xcb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x6,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 444ea4 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe94> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xa0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4443c3 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3b3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444f37 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf27> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xd8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x6,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4443c3 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3b3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 204 |
nb uops | 205 |
loop length | 897 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 34.33 cycles |
front end | 34.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 10.40 | 24.67 | 24.67 | 8.00 | 10.40 | 13.00 | 8.00 | 8.00 | 8.00 | 10.20 | 24.67 |
cycles | 13.00 | 10.40 | 24.67 | 24.67 | 8.00 | 10.40 | 13.00 | 8.00 | 8.00 | 8.00 | 10.20 | 24.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 34.32-34.34 |
Stall cycles | 0.00 |
Front-end | 34.33 |
Dispatch | 24.67 |
DIV/SQRT | 4.00 |
Overall L1 | 34.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 41% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 83% |
all | 11% |
load | 14% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 22% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 15% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 22% |
all | 13% |
load | 14% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa8(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 444f84 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf74> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4445b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x5a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5011c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x40(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4445df <hypre_BoomerAMGBuildMultipass.extracted.28+0x5cf> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444790 <hypre_BoomerAMGBuildMultipass.extracted.28+0x780> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM10,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R13,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5011c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x88(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xe8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x40(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4447c0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7b0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x50(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 444830 <hypre_BoomerAMGBuildMultipass.extracted.28+0x820> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4447f8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7e8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xc8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4448a0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x890> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 444868 <hypre_BoomerAMGBuildMultipass.extracted.28+0x858> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 444c00 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4448f4 <hypre_BoomerAMGBuildMultipass.extracted.28+0x8e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 444d10 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 444c35 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc25> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xcc03f(%RIP),%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444ea4 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe94> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444cc7 <hypre_BoomerAMGBuildMultipass.extracted.28+0xcb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x6,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 444ea4 <hypre_BoomerAMGBuildMultipass.extracted.28+0xe94> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xa0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 4443c3 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3b3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 444f37 <hypre_BoomerAMGBuildMultipass.extracted.28+0xf27> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xd8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x6,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4443c3 <hypre_BoomerAMGBuildMultipass.extracted.28+0x3b3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |