Loop Id: 966 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.24% |
---|
Loop Id: 966 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.24% |
---|
0x443c10 MOV -0xa8(%RBP),%RDX |
0x443c17 INC %RDX |
0x443c1a CMP -0xa0(%RBP),%RDX |
0x443c21 JGE 4446db |
0x443c27 MOV -0x118(%RBP),%RAX |
0x443c2e MOV %RDX,-0xa8(%RBP) |
0x443c35 MOV (%RAX,%RDX,8),%R9 |
0x443c39 MOV -0x120(%RBP),%RAX |
0x443c40 MOV (%RAX,%R9,8),%R8 |
0x443c44 MOV -0xb0(%RBP),%RAX |
0x443c4b MOV (%RAX,%R9,8),%R12 |
0x443c4f MOV 0x8(%RAX,%R9,8),%RSI |
0x443c54 LEA (%RSI,%R8,1),%RAX |
0x443c58 SUB %R12,%RAX |
0x443c5b CMP %RAX,%R8 |
0x443c5e MOV %R9,-0x30(%RBP) |
0x443c62 JGE 443e3f |
0x443c68 MOV -0x58(%RBP),%RAX |
0x443c6c MOV (%RAX),%RCX |
0x443c6f MOV %RSI,%RAX |
0x443c72 SUB %R12,%RAX |
0x443c75 CMP $0xd,%RAX |
0x443c79 JB 443e00 |
0x443c7f MOV %RSI,-0x98(%RBP) |
0x443c86 VMOVUPD %XMM10,-0x80(%RBP) |
0x443c8b LEA (%R14,%R12,8),%RDI |
0x443c8f LEA (,%RAX,8),%RDX |
0x443c97 XOR %ESI,%ESI |
0x443c99 MOV %R8,-0x38(%RBP) |
0x443c9d MOV %RAX,-0x88(%RBP) |
0x443ca4 MOV %RCX,-0x90(%RBP) |
0x443cab VZEROUPPER |
0x443cae CALL 4f03c0 <_intel_fast_memset> |
0x443cb3 MOV -0xc0(%RBP),%RAX |
0x443cba MOV -0x90(%RBP),%RCX |
0x443cc1 MOV (%RAX,%RCX,8),%RAX |
0x443cc5 MOV -0x88(%RBP),%RCX |
0x443ccc SHR $0x3,%RCX |
0x443cd0 MOV -0x38(%RBP),%RDX |
0x443cd4 LEA 0x38(%RAX,%RDX,8),%RDX |
0x443cd9 MOV -0x100(%RBP),%RSI |
0x443ce0 LEA (%RSI,%R12,8),%RSI |
0x443ce4 MOV %RCX,%RDI |
0x443ce7 XOR %R8D,%R8D |
0x443cea MOV -0x48(%RBP),%R11 |
0x443cee XCHG %AX,%AX |
(984) 0x443cf0 MOV -0x38(%RDX,%R8,8),%R9 |
(984) 0x443cf5 LEA (%R12,%R8,1),%R10 |
(984) 0x443cf9 MOV %R10,(%R11,%R9,8) |
(984) 0x443cfd MOV %R9,-0x38(%RSI,%R8,8) |
(984) 0x443d02 MOV -0x30(%RDX,%R8,8),%R9 |
(984) 0x443d07 LEA 0x1(%R12,%R8,1),%R10 |
(984) 0x443d0c MOV %R10,(%R11,%R9,8) |
(984) 0x443d10 MOV %R9,-0x30(%RSI,%R8,8) |
(984) 0x443d15 MOV -0x28(%RDX,%R8,8),%R9 |
(984) 0x443d1a LEA 0x2(%R12,%R8,1),%R10 |
(984) 0x443d1f MOV %R10,(%R11,%R9,8) |
(984) 0x443d23 MOV %R9,-0x28(%RSI,%R8,8) |
(984) 0x443d28 MOV -0x20(%RDX,%R8,8),%R9 |
(984) 0x443d2d LEA 0x3(%R12,%R8,1),%R10 |
(984) 0x443d32 MOV %R10,(%R11,%R9,8) |
(984) 0x443d36 MOV %R9,-0x20(%RSI,%R8,8) |
(984) 0x443d3b MOV -0x18(%RDX,%R8,8),%R9 |
(984) 0x443d40 LEA 0x4(%R12,%R8,1),%R10 |
(984) 0x443d45 MOV %R10,(%R11,%R9,8) |
(984) 0x443d49 MOV %R9,-0x18(%RSI,%R8,8) |
(984) 0x443d4e MOV -0x10(%RDX,%R8,8),%R9 |
(984) 0x443d53 LEA 0x5(%R12,%R8,1),%R10 |
(984) 0x443d58 MOV %R10,(%R11,%R9,8) |
(984) 0x443d5c MOV %R9,-0x10(%RSI,%R8,8) |
(984) 0x443d61 MOV -0x8(%RDX,%R8,8),%R9 |
(984) 0x443d66 LEA 0x6(%R12,%R8,1),%R10 |
(984) 0x443d6b MOV %R10,(%R11,%R9,8) |
(984) 0x443d6f MOV %R9,-0x8(%RSI,%R8,8) |
(984) 0x443d74 MOV (%RDX,%R8,8),%R9 |
(984) 0x443d78 LEA 0x7(%R12,%R8,1),%R10 |
(984) 0x443d7d MOV %R10,(%R11,%R9,8) |
(984) 0x443d81 MOV %R9,(%RSI,%R8,8) |
(984) 0x443d85 ADD $0x8,%R8 |
(984) 0x443d89 DEC %RDI |
(984) 0x443d8c JNE 443cf0 |
0x443d92 MOV -0x88(%RBP),%RSI |
0x443d99 MOV %RSI,%RDX |
0x443d9c AND $-0x8,%RDX |
0x443da0 CMP %RSI,%RDX |
0x443da3 MOV -0x40(%RBP),%R10 |
0x443da7 MOV -0x50(%RBP),%RSI |
0x443dab VXORPD %XMM9,%XMM9,%XMM9 |
0x443db0 MOV -0x30(%RBP),%R9 |
0x443db4 VMOVUPD -0x80(%RBP),%XMM10 |
0x443db9 MOV -0x98(%RBP),%RDI |
0x443dc0 MOV -0x38(%RBP),%R8 |
0x443dc4 JAE 443e3f |
0x443dc6 ADD %RDX,%R12 |
0x443dc9 SAL $0x6,%RCX |
0x443dcd LEA (%RCX,%R8,8),%RCX |
0x443dd1 ADD %RCX,%RAX |
0x443dd4 NOPW %CS:(%RAX,%RAX,1) |
(985) 0x443de0 MOV (%RAX),%RCX |
(985) 0x443de3 MOV %R12,(%R11,%RCX,8) |
(985) 0x443de7 MOV %RCX,(%RSI,%R12,8) |
(985) 0x443deb INC %R12 |
(985) 0x443dee ADD $0x8,%RAX |
(985) 0x443df2 CMP %R12,%RDI |
(985) 0x443df5 JNE 443de0 |
0x443df7 JMP 443e3f |
0x443e00 SAL $0x3,%R8 |
0x443e04 MOV -0xc0(%RBP),%RAX |
0x443e0b ADD (%RAX,%RCX,8),%R8 |
0x443e0f MOV -0x50(%RBP),%RCX |
0x443e13 MOV -0x48(%RBP),%RDX |
0x443e17 NOPW (%RAX,%RAX,1) |
(983) 0x443e20 MOV (%R8),%RAX |
(983) 0x443e23 MOV %R12,(%RDX,%RAX,8) |
(983) 0x443e27 MOVQ $0,(%R14,%R12,8) |
(983) 0x443e2f MOV %RAX,(%RCX,%R12,8) |
(983) 0x443e33 INC %R12 |
(983) 0x443e36 ADD $0x8,%R8 |
(983) 0x443e3a CMP %R12,%RSI |
(983) 0x443e3d JNE 443e20 |
0x443e3f MOV -0x128(%RBP),%RAX |
0x443e46 MOV (%RAX,%R9,8),%R8 |
0x443e4a MOV -0xb8(%RBP),%RAX |
0x443e51 MOV (%RAX,%R9,8),%R12 |
0x443e55 MOV 0x8(%RAX,%R9,8),%RCX |
0x443e5a LEA (%RCX,%R8,1),%RAX |
0x443e5e SUB %R12,%RAX |
0x443e61 CMP %RAX,%R8 |
0x443e64 JGE 444020 |
0x443e6a MOV -0x58(%RBP),%RAX |
0x443e6e MOV (%RAX),%RDX |
0x443e71 MOV %RCX,%RAX |
0x443e74 SUB %R12,%RAX |
0x443e77 CMP $0xd,%RAX |
0x443e7b JB 443ff0 |
0x443e81 MOV %RCX,-0x88(%RBP) |
0x443e88 VMOVUPD %XMM10,-0x80(%RBP) |
0x443e8d LEA (%R13,%R12,8),%RDI |
0x443e92 MOV %RDX,-0x90(%RBP) |
0x443e99 LEA (,%RAX,8),%RDX |
0x443ea1 XOR %ESI,%ESI |
0x443ea3 MOV %R8,-0x98(%RBP) |
0x443eaa MOV %RAX,-0x38(%RBP) |
0x443eae VZEROUPPER |
0x443eb1 CALL 4f03c0 <_intel_fast_memset> |
0x443eb6 MOV -0x98(%RBP),%R11 |
0x443ebd MOV -0xc8(%RBP),%RAX |
0x443ec4 MOV -0x90(%RBP),%RCX |
0x443ecb MOV (%RAX,%RCX,8),%RAX |
0x443ecf MOV -0x38(%RBP),%RCX |
0x443ed3 SHR $0x3,%RCX |
0x443ed7 LEA 0x38(%RAX,%R11,8),%RDX |
0x443edc MOV -0xf8(%RBP),%RSI |
0x443ee3 LEA (%RSI,%R12,8),%RSI |
0x443ee7 MOV %RCX,%RDI |
0x443eea XOR %R8D,%R8D |
0x443eed NOPL (%RAX) |
(981) 0x443ef0 MOV -0x38(%RDX,%R8,8),%R9 |
(981) 0x443ef5 LEA (%R12,%R8,1),%R10 |
(981) 0x443ef9 MOV %R10,(%R15,%R9,8) |
(981) 0x443efd MOV %R9,-0x38(%RSI,%R8,8) |
(981) 0x443f02 MOV -0x30(%RDX,%R8,8),%R9 |
(981) 0x443f07 LEA 0x1(%R12,%R8,1),%R10 |
(981) 0x443f0c MOV %R10,(%R15,%R9,8) |
(981) 0x443f10 MOV %R9,-0x30(%RSI,%R8,8) |
(981) 0x443f15 MOV -0x28(%RDX,%R8,8),%R9 |
(981) 0x443f1a LEA 0x2(%R12,%R8,1),%R10 |
(981) 0x443f1f MOV %R10,(%R15,%R9,8) |
(981) 0x443f23 MOV %R9,-0x28(%RSI,%R8,8) |
(981) 0x443f28 MOV -0x20(%RDX,%R8,8),%R9 |
(981) 0x443f2d LEA 0x3(%R12,%R8,1),%R10 |
(981) 0x443f32 MOV %R10,(%R15,%R9,8) |
(981) 0x443f36 MOV %R9,-0x20(%RSI,%R8,8) |
(981) 0x443f3b MOV -0x18(%RDX,%R8,8),%R9 |
(981) 0x443f40 LEA 0x4(%R12,%R8,1),%R10 |
(981) 0x443f45 MOV %R10,(%R15,%R9,8) |
(981) 0x443f49 MOV %R9,-0x18(%RSI,%R8,8) |
(981) 0x443f4e MOV -0x10(%RDX,%R8,8),%R9 |
(981) 0x443f53 LEA 0x5(%R12,%R8,1),%R10 |
(981) 0x443f58 MOV %R10,(%R15,%R9,8) |
(981) 0x443f5c MOV %R9,-0x10(%RSI,%R8,8) |
(981) 0x443f61 MOV -0x8(%RDX,%R8,8),%R9 |
(981) 0x443f66 LEA 0x6(%R12,%R8,1),%R10 |
(981) 0x443f6b MOV %R10,(%R15,%R9,8) |
(981) 0x443f6f MOV %R9,-0x8(%RSI,%R8,8) |
(981) 0x443f74 MOV (%RDX,%R8,8),%R9 |
(981) 0x443f78 LEA 0x7(%R12,%R8,1),%R10 |
(981) 0x443f7d MOV %R10,(%R15,%R9,8) |
(981) 0x443f81 MOV %R9,(%RSI,%R8,8) |
(981) 0x443f85 ADD $0x8,%R8 |
(981) 0x443f89 DEC %RDI |
(981) 0x443f8c JNE 443ef0 |
0x443f92 MOV -0x38(%RBP),%RSI |
0x443f96 MOV %RSI,%RDX |
0x443f99 AND $-0x8,%RDX |
0x443f9d CMP %RSI,%RDX |
0x443fa0 MOV -0x40(%RBP),%R10 |
0x443fa4 VXORPD %XMM9,%XMM9,%XMM9 |
0x443fa9 MOV -0x30(%RBP),%R9 |
0x443fad VMOVUPD -0x80(%RBP),%XMM10 |
0x443fb2 MOV -0x88(%RBP),%RSI |
0x443fb9 JAE 444020 |
0x443fbb ADD %RDX,%R12 |
0x443fbe SAL $0x6,%RCX |
0x443fc2 LEA (%RCX,%R11,8),%RCX |
0x443fc6 ADD %RCX,%RAX |
0x443fc9 NOPL (%RAX) |
(982) 0x443fd0 MOV (%RAX),%RCX |
(982) 0x443fd3 MOV %R12,(%R15,%RCX,8) |
(982) 0x443fd7 MOV %RCX,(%R10,%R12,8) |
(982) 0x443fdb INC %R12 |
(982) 0x443fde ADD $0x8,%RAX |
(982) 0x443fe2 CMP %R12,%RSI |
(982) 0x443fe5 JNE 443fd0 |
0x443fe7 JMP 444020 |
0x443ff0 SAL $0x3,%R8 |
0x443ff4 MOV -0xc8(%RBP),%RAX |
0x443ffb ADD (%RAX,%RDX,8),%R8 |
0x443fff NOP |
(980) 0x444000 MOV (%R8),%RAX |
(980) 0x444003 MOV %R12,(%R15,%RAX,8) |
(980) 0x444007 MOVQ $0,(%R13,%R12,8) |
(980) 0x444010 MOV %RAX,(%R10,%R12,8) |
(980) 0x444014 INC %R12 |
(980) 0x444017 ADD $0x8,%R8 |
(980) 0x44401b CMP %R12,%RCX |
(980) 0x44401e JNE 444000 |
0x444020 MOV -0xe0(%RBP),%RCX |
0x444027 MOV (%RCX,%R9,8),%RAX |
0x44402b MOV 0x8(%RCX,%R9,8),%RCX |
0x444030 CMP %RCX,%RAX |
0x444033 MOV -0x48(%RBP),%R12 |
0x444037 JGE 444090 |
0x444039 MOV -0x58(%RBP),%RDX |
0x44403d MOV (%RDX),%RDX |
0x444040 DEC %RDX |
0x444043 JMP 444058 |
(979) 0x444050 INC %RAX |
(979) 0x444053 CMP %RCX,%RAX |
(979) 0x444056 JGE 444090 |
(979) 0x444058 MOV -0x180(%RBP),%RSI |
(979) 0x44405f MOV (%RSI,%RAX,8),%RSI |
(979) 0x444063 MOV -0x190(%RBP),%RDI |
(979) 0x44406a CMP %RDX,(%RDI,%RSI,8) |
(979) 0x44406e JNE 444050 |
(979) 0x444070 MOV -0x68(%RBP),%RCX |
(979) 0x444074 MOV %R9,(%RCX,%RSI,8) |
(979) 0x444078 MOV -0xe0(%RBP),%RCX |
(979) 0x44407f MOV 0x8(%RCX,%R9,8),%RCX |
(979) 0x444084 JMP 444050 |
0x444090 MOV -0xe8(%RBP),%RCX |
0x444097 MOV (%RCX,%R9,8),%RAX |
0x44409b MOV 0x8(%RCX,%R9,8),%RCX |
0x4440a0 CMP %RCX,%RAX |
0x4440a3 JGE 444100 |
0x4440a5 MOV -0x58(%RBP),%RDX |
0x4440a9 MOV (%RDX),%RDX |
0x4440ac DEC %RDX |
0x4440af JMP 4440c8 |
(978) 0x4440c0 INC %RAX |
(978) 0x4440c3 CMP %RCX,%RAX |
(978) 0x4440c6 JGE 444100 |
(978) 0x4440c8 MOV -0x188(%RBP),%RSI |
(978) 0x4440cf MOV (%RSI,%RAX,8),%RSI |
(978) 0x4440d3 MOV -0x198(%RBP),%RDI |
(978) 0x4440da CMP %RDX,(%RDI,%RSI,8) |
(978) 0x4440de JNE 4440c0 |
(978) 0x4440e0 MOV -0x60(%RBP),%RCX |
(978) 0x4440e4 MOV %R9,(%RCX,%RSI,8) |
(978) 0x4440e8 MOV -0xe8(%RBP),%RCX |
(978) 0x4440ef MOV 0x8(%RCX,%R9,8),%RCX |
(978) 0x4440f4 JMP 4440c0 |
0x444100 MOV -0x108(%RBP),%RAX |
0x444107 MOV (%RAX,%R9,8),%RCX |
0x44410b MOV 0x8(%RAX,%R9,8),%R11 |
0x444110 LEA 0x1(%RCX),%RDX |
0x444114 VXORPD %XMM1,%XMM1,%XMM1 |
0x444118 CMP %R11,%RDX |
0x44411b MOV %RCX,-0x38(%RBP) |
0x44411f VXORPD %XMM0,%XMM0,%XMM0 |
0x444123 JGE 444460 |
0x444129 MOV -0x50(%RBP),%RAX |
0x44412d MOV %R11,-0x80(%RBP) |
0x444131 JMP 444154 |
(973) 0x444140 MOV -0x50(%RBP),%RAX |
(973) 0x444144 MOV -0x30(%RBP),%R9 |
(973) 0x444148 INC %RDX |
(973) 0x44414b CMP %R11,%RDX |
(973) 0x44414e JE 444460 |
(973) 0x444154 MOV -0x170(%RBP),%RSI |
(973) 0x44415b MOV (%RSI,%RDX,8),%RSI |
(973) 0x44415f MOV -0x68(%RBP),%RDI |
(973) 0x444163 CMP %R9,(%RDI,%RSI,8) |
(973) 0x444167 JNE 444190 |
(973) 0x444169 MOV -0xb0(%RBP),%R8 |
(973) 0x444170 MOV (%R8,%RSI,8),%RDI |
(973) 0x444174 MOV 0x8(%R8,%RSI,8),%R8 |
(973) 0x444179 MOV %R8,%R9 |
(973) 0x44417c SUB %RDI,%R9 |
(973) 0x44417f JLE 4442e9 |
(973) 0x444185 CMP $0x4,%R9 |
(973) 0x444189 JAE 4441c0 |
(973) 0x44418b JMP 444295 |
(973) 0x444190 MOV -0x158(%RBP),%RDI |
(973) 0x444197 CMPQ $-0x3,(%RDI,%RSI,8) |
(973) 0x44419c JE 444148 |
(973) 0x44419e CMPQ $0x1,-0xf0(%RBP) |
(973) 0x4441a6 JE 4441b9 |
(973) 0x4441a8 MOV -0xd0(%RBP),%R8 |
(973) 0x4441af MOV (%R8,%R9,8),%RDI |
(973) 0x4441b3 CMP (%R8,%RSI,8),%RDI |
(973) 0x4441b7 JNE 444148 |
(973) 0x4441b9 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(973) 0x4441be JMP 444148 |
(973) 0x4441c0 MOV %R9,%R10 |
(973) 0x4441c3 SHR $0x2,%R10 |
(973) 0x4441c7 LEA 0x18(,%RDI,8),%R11 |
(973) 0x4441cf MOV %R12,%RCX |
(973) 0x4441d2 NOPW %CS:(%RAX,%RAX,1) |
(976) 0x4441e0 MOV -0x18(%RAX,%R11,1),%R12 |
(976) 0x4441e5 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(976) 0x4441ec VMOVSD (%RBX,%RDX,8),%XMM3 |
(976) 0x4441f1 MOV (%RCX,%R12,8),%R12 |
(976) 0x4441f5 VMOVSD (%R14,%R12,8),%XMM4 |
(976) 0x4441fb VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x444200 VMOVSD %XMM4,(%R14,%R12,8) |
(976) 0x444206 MOV -0x10(%RAX,%R11,1),%R12 |
(976) 0x44420b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(976) 0x444212 VMOVSD (%RBX,%RDX,8),%XMM5 |
(976) 0x444217 MOV (%RCX,%R12,8),%R12 |
(976) 0x44421b VMOVSD (%R14,%R12,8),%XMM6 |
(976) 0x444221 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(976) 0x444226 VMOVSD %XMM6,(%R14,%R12,8) |
(976) 0x44422c MOV -0x8(%RAX,%R11,1),%R12 |
(976) 0x444231 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(976) 0x444238 VMOVSD (%RBX,%RDX,8),%XMM7 |
(976) 0x44423d MOV (%RCX,%R12,8),%R12 |
(976) 0x444241 VMOVSD (%R14,%R12,8),%XMM8 |
(976) 0x444247 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(976) 0x44424c VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x444252 VMOVSD (%R14,%R11,1),%XMM8 |
(976) 0x444258 VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(976) 0x44425d MOV (%RAX,%R11,1),%R12 |
(976) 0x444261 MOV (%RCX,%R12,8),%R12 |
(976) 0x444265 VADDSD (%R14,%R12,8),%XMM10,%XMM8 |
(976) 0x44426b VMOVSD %XMM8,(%R14,%R12,8) |
(976) 0x444271 VFMADD213SD %XMM10,%XMM5,%XMM4 |
(976) 0x444276 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(976) 0x44427b VFMADD231SD %XMM6,%XMM7,%XMM4 |
(976) 0x444280 VADDSD %XMM1,%XMM4,%XMM1 |
(976) 0x444284 VADDSD %XMM0,%XMM4,%XMM0 |
(976) 0x444288 ADD $0x20,%R11 |
(976) 0x44428c DEC %R10 |
(976) 0x44428f JNE 4441e0 |
(973) 0x444295 MOV %R9,%R10 |
(973) 0x444298 AND $-0x4,%R10 |
(973) 0x44429c CMP %R9,%R10 |
(973) 0x44429f MOV -0x80(%RBP),%R11 |
(973) 0x4442a3 JAE 4442e1 |
(973) 0x4442a5 ADD %R10,%RDI |
(973) 0x4442a8 MOV -0x40(%RBP),%R10 |
(973) 0x4442ac MOV -0x48(%RBP),%R12 |
(977) 0x4442b0 MOV (%RAX,%RDI,8),%R9 |
(977) 0x4442b4 VMOVSD (%R14,%RDI,8),%XMM2 |
(977) 0x4442ba VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(977) 0x4442bf MOV (%R12,%R9,8),%R9 |
(977) 0x4442c3 VADDSD (%R14,%R9,8),%XMM10,%XMM2 |
(977) 0x4442c9 VMOVSD %XMM2,(%R14,%R9,8) |
(977) 0x4442cf VADDSD %XMM1,%XMM10,%XMM1 |
(977) 0x4442d3 VADDSD %XMM0,%XMM10,%XMM0 |
(977) 0x4442d7 INC %RDI |
(977) 0x4442da CMP %RDI,%R8 |
(977) 0x4442dd JNE 4442b0 |
(973) 0x4442df JMP 4442e9 |
(973) 0x4442e1 MOV -0x40(%RBP),%R10 |
(973) 0x4442e5 MOV -0x48(%RBP),%R12 |
(973) 0x4442e9 MOV -0xb8(%RBP),%RAX |
(973) 0x4442f0 MOV (%RAX,%RSI,8),%RDI |
(973) 0x4442f4 MOV 0x8(%RAX,%RSI,8),%RSI |
(973) 0x4442f9 MOV %RSI,%R8 |
(973) 0x4442fc SUB %RDI,%R8 |
(973) 0x4442ff JLE 444140 |
(973) 0x444305 CMP $0x4,%R8 |
(973) 0x444309 JAE 444310 |
(973) 0x44430b JMP 4443ee |
(973) 0x444310 MOV %R8,%R9 |
(973) 0x444313 SHR $0x2,%R9 |
(973) 0x444317 MOV %R10,%RAX |
(973) 0x44431a LEA 0x18(,%RDI,8),%R10 |
(973) 0x444322 NOPW %CS:(%RAX,%RAX,1) |
(974) 0x444330 MOV -0x18(%RAX,%R10,1),%R11 |
(974) 0x444335 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(974) 0x44433c VMOVSD (%RBX,%RDX,8),%XMM3 |
(974) 0x444341 MOV (%R15,%R11,8),%R11 |
(974) 0x444345 VMOVSD (%R13,%R11,8),%XMM4 |
(974) 0x44434c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x444351 VMOVSD %XMM4,(%R13,%R11,8) |
(974) 0x444358 MOV -0x10(%RAX,%R10,1),%R11 |
(974) 0x44435d VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(974) 0x444364 VMOVSD (%RBX,%RDX,8),%XMM5 |
(974) 0x444369 MOV (%R15,%R11,8),%R11 |
(974) 0x44436d VMOVSD (%R13,%R11,8),%XMM6 |
(974) 0x444374 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(974) 0x444379 VMOVSD %XMM6,(%R13,%R11,8) |
(974) 0x444380 MOV -0x8(%RAX,%R10,1),%R11 |
(974) 0x444385 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(974) 0x44438c VMOVSD (%RBX,%RDX,8),%XMM7 |
(974) 0x444391 MOV (%R15,%R11,8),%R11 |
(974) 0x444395 VMOVSD (%R13,%R11,8),%XMM8 |
(974) 0x44439c VFMADD231SD %XMM6,%XMM7,%XMM8 |
(974) 0x4443a1 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x4443a8 VMOVSD (%R13,%R10,1),%XMM8 |
(974) 0x4443af VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(974) 0x4443b4 MOV (%RAX,%R10,1),%R11 |
(974) 0x4443b8 MOV (%R15,%R11,8),%R11 |
(974) 0x4443bc VADDSD (%R13,%R11,8),%XMM10,%XMM8 |
(974) 0x4443c3 VMOVSD %XMM8,(%R13,%R11,8) |
(974) 0x4443ca VFMADD213SD %XMM10,%XMM5,%XMM4 |
(974) 0x4443cf VFMADD231SD %XMM2,%XMM3,%XMM4 |
(974) 0x4443d4 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(974) 0x4443d9 VADDSD %XMM1,%XMM4,%XMM1 |
(974) 0x4443dd VADDSD %XMM0,%XMM4,%XMM0 |
(974) 0x4443e1 ADD $0x20,%R10 |
(974) 0x4443e5 DEC %R9 |
(974) 0x4443e8 JNE 444330 |
(973) 0x4443ee MOV %R8,%R9 |
(973) 0x4443f1 AND $-0x4,%R9 |
(973) 0x4443f5 CMP %R8,%R9 |
(973) 0x4443f8 JAE 444447 |
(973) 0x4443fa ADD %R9,%RDI |
(973) 0x4443fd MOV -0x40(%RBP),%R10 |
(973) 0x444401 MOV -0x50(%RBP),%RAX |
(973) 0x444405 MOV -0x30(%RBP),%R9 |
(973) 0x444409 MOV -0x80(%RBP),%R11 |
(973) 0x44440d NOPL (%RAX) |
(975) 0x444410 MOV (%R10,%RDI,8),%R8 |
(975) 0x444414 VMOVSD (%R13,%RDI,8),%XMM2 |
(975) 0x44441b VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(975) 0x444420 MOV (%R15,%R8,8),%R8 |
(975) 0x444424 VADDSD (%R13,%R8,8),%XMM10,%XMM2 |
(975) 0x44442b VMOVSD %XMM2,(%R13,%R8,8) |
(975) 0x444432 VADDSD %XMM1,%XMM10,%XMM1 |
(975) 0x444436 VADDSD %XMM0,%XMM10,%XMM0 |
(975) 0x44443a INC %RDI |
(975) 0x44443d CMP %RDI,%RSI |
(975) 0x444440 JNE 444410 |
(973) 0x444442 JMP 444148 |
(973) 0x444447 MOV -0x40(%RBP),%R10 |
(973) 0x44444b MOV -0x50(%RBP),%RAX |
(973) 0x44444f MOV -0x30(%RBP),%R9 |
(973) 0x444453 MOV -0x80(%RBP),%R11 |
(973) 0x444457 JMP 444148 |
0x444460 MOV -0x110(%RBP),%RAX |
0x444467 MOV (%RAX,%R9,8),%RCX |
0x44446b MOV 0x8(%RAX,%R9,8),%RDX |
0x444470 CMP %RDX,%RCX |
0x444473 JL 444520 |
0x444479 MOV -0x38(%RBP),%RAX |
0x44447d VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x444482 VUCOMISD %XMM9,%XMM1 |
0x444487 JE 444497 |
0x444489 VXORPD 0xbb7cd(%RIP){1to2},%XMM0,%XMM0 |
0x444493 VDIVSD %XMM1,%XMM0,%XMM10 |
0x444497 MOV -0xb0(%RBP),%RAX |
0x44449e MOV (%RAX,%R9,8),%RDX |
0x4444a2 MOV 0x8(%RAX,%R9,8),%RAX |
0x4444a7 MOV %RAX,%RSI |
0x4444aa SUB %RDX,%RSI |
0x4444ad JLE 444654 |
0x4444b3 MOV %RSI,%RCX |
0x4444b6 AND $-0x4,%RCX |
0x4444ba JE 444638 |
0x4444c0 LEA -0x1(%RCX),%RDI |
0x4444c4 VBROADCASTSD %XMM10,%YMM0 |
0x4444c9 LEA (%R14,%RDX,8),%R8 |
0x4444cd XOR %R9D,%R9D |
(970) 0x4444d0 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(970) 0x4444d6 VMOVUPD %YMM1,(%R8,%R9,8) |
(970) 0x4444dc ADD $0x4,%R9 |
(970) 0x4444e0 CMP %RDI,%R9 |
(970) 0x4444e3 JBE 4444d0 |
0x4444e5 CMP %RCX,%RSI |
0x4444e8 MOV -0x30(%RBP),%R9 |
0x4444ec JNE 44463a |
0x4444f2 JMP 444654 |
(971) 0x444500 VADDSD (%R8,%RCX,8),%XMM0,%XMM0 |
(971) 0x444506 MOV %R12,%R11 |
(971) 0x444509 INC %RCX |
(971) 0x44450c CMP %RDX,%RCX |
(971) 0x44450f MOV -0x40(%RBP),%R10 |
(971) 0x444513 MOV %R11,%R12 |
(971) 0x444516 MOV -0x30(%RBP),%R9 |
(971) 0x44451a JE 444479 |
(971) 0x444520 MOV -0x178(%RBP),%RAX |
(971) 0x444527 LEA (%RAX,%RCX,8),%RSI |
(971) 0x44452b CMPQ $0,-0x1a0(%RBP) |
(971) 0x444533 JE 444543 |
(971) 0x444535 MOV (%RSI),%RSI |
(971) 0x444538 MOV -0x160(%RBP),%RDI |
(971) 0x44453f LEA (%RDI,%RSI,8),%RSI |
(971) 0x444543 MOV (%RSI),%RDI |
(971) 0x444546 TEST %RDI,%RDI |
(971) 0x444549 JS 4445f0 |
(971) 0x44454f MOV -0x60(%RBP),%RSI |
(971) 0x444553 CMP %R9,(%RSI,%RDI,8) |
(971) 0x444557 JNE 4445f0 |
(971) 0x44455d MOV -0x150(%RBP),%RSI |
(971) 0x444564 MOV 0x8(%RSI,%RDI,8),%RSI |
(971) 0x444569 TEST %RSI,%RSI |
(971) 0x44456c JLE 444506 |
(971) 0x44456e MOV -0x138(%RBP),%R8 |
(971) 0x444575 MOV (%R8,%RDI,8),%RDI |
(971) 0x444579 ADD %RDI,%RSI |
(971) 0x44457c MOV -0x58(%RBP),%R8 |
(971) 0x444580 MOV (%R8),%R8 |
(971) 0x444583 MOV -0x140(%RBP),%R9 |
(971) 0x44458a MOV (%R9,%R8,8),%R8 |
(971) 0x44458e MOV %R12,%R11 |
(971) 0x444591 MOV -0xd8(%RBP),%R12 |
(971) 0x444598 MOV -0x148(%RBP),%RAX |
(971) 0x44459f NOP |
(972) 0x4445a0 MOV (%R8,%RDI,8),%R9 |
(972) 0x4445a4 VMOVSD (%RAX,%RDI,8),%XMM2 |
(972) 0x4445a9 VMULSD (%R12,%RCX,8),%XMM2,%XMM10 |
(972) 0x4445af TEST %R9,%R9 |
(972) 0x4445b2 LEA (%R15,%R9,8),%R10 |
(972) 0x4445b6 NOT %R9 |
(972) 0x4445b9 LEA (%R11,%R9,8),%R9 |
(972) 0x4445bd CMOVNS %R10,%R9 |
(972) 0x4445c1 MOV %R13,%R10 |
(972) 0x4445c4 CMOVS %R14,%R10 |
(972) 0x4445c8 MOV (%R9),%R9 |
(972) 0x4445cb VADDSD (%R10,%R9,8),%XMM10,%XMM2 |
(972) 0x4445d1 VMOVSD %XMM2,(%R10,%R9,8) |
(972) 0x4445d7 VADDSD %XMM1,%XMM10,%XMM1 |
(972) 0x4445db VADDSD %XMM0,%XMM10,%XMM0 |
(972) 0x4445df INC %RDI |
(972) 0x4445e2 CMP %RSI,%RDI |
(972) 0x4445e5 JL 4445a0 |
(971) 0x4445e7 JMP 444509 |
(971) 0x4445f0 MOV -0x168(%RBP),%RSI |
(971) 0x4445f7 CMPQ $-0x3,(%RSI,%RDI,8) |
(971) 0x4445fc JE 444506 |
(971) 0x444602 CMPQ $0x1,-0xf0(%RBP) |
(971) 0x44460a MOV -0xd8(%RBP),%R8 |
(971) 0x444611 JE 444500 |
(971) 0x444617 MOV -0x130(%RBP),%RSI |
(971) 0x44461e MOV (%RSI,%RDI,8),%RSI |
(971) 0x444622 MOV -0xd0(%RBP),%RDI |
(971) 0x444629 CMP (%RDI,%R9,8),%RSI |
(971) 0x44462d JE 444500 |
(971) 0x444633 JMP 444506 |
0x444638 XOR %ECX,%ECX |
0x44463a ADD %RDX,%RCX |
0x44463d NOPL (%RAX) |
(969) 0x444640 VMULSD (%R14,%RCX,8),%XMM10,%XMM0 |
(969) 0x444646 VMOVSD %XMM0,(%R14,%RCX,8) |
(969) 0x44464c INC %RCX |
(969) 0x44464f CMP %RCX,%RAX |
(969) 0x444652 JNE 444640 |
0x444654 MOV -0xb8(%RBP),%RAX |
0x44465b MOV (%RAX,%R9,8),%RDX |
0x44465f MOV 0x8(%RAX,%R9,8),%RAX |
0x444664 MOV %RAX,%RSI |
0x444667 SUB %RDX,%RSI |
0x44466a JLE 443c10 |
0x444670 MOV %RSI,%RCX |
0x444673 AND $-0x4,%RCX |
0x444677 JE 4446b0 |
0x444679 LEA -0x1(%RCX),%RDI |
0x44467d VBROADCASTSD %XMM10,%YMM0 |
0x444682 LEA (%R13,%RDX,8),%R8 |
0x444687 XOR %R9D,%R9D |
0x44468a NOPW (%RAX,%RAX,1) |
(968) 0x444690 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(968) 0x444696 VMOVUPD %YMM1,(%R8,%R9,8) |
(968) 0x44469c ADD $0x4,%R9 |
(968) 0x4446a0 CMP %RDI,%R9 |
(968) 0x4446a3 JBE 444690 |
0x4446a5 CMP %RCX,%RSI |
0x4446a8 JE 443c10 |
0x4446ae JMP 4446b2 |
0x4446b0 XOR %ECX,%ECX |
0x4446b2 ADD %RDX,%RCX |
0x4446b5 NOPW %CS:(%RAX,%RAX,1) |
(967) 0x4446c0 VMULSD (%R13,%RCX,8),%XMM10,%XMM0 |
(967) 0x4446c7 VMOVSD %XMM0,(%R13,%RCX,8) |
(967) 0x4446ce INC %RCX |
(967) 0x4446d1 CMP %RCX,%RAX |
(967) 0x4446d4 JNE 4446c0 |
0x4446d6 JMP 443c10 |
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.50 |
CQA speedup if fully vectorized | 7.74 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.17 |
CQA cycles if no scalar integer | 14.17 |
CQA cycles if FP arith vectorized | 10.32 |
CQA cycles if fully vectorized | 4.67 |
Front-end cycles | 36.17 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 25.67 |
P2 cycles | 25.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 25.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.85 - 35.84 |
Stall cycles (UFS) | 0.00 |
Nb insns | 210.00 |
Nb uops | 214.00 |
Nb loads | 77.00 |
Nb stores | 14.00 |
Nb stack references | 25.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.01 |
Bytes prefetched | 0.00 |
Bytes loaded | 632.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.91 |
Vectorization ratio load | 37.50 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.49 |
Vector-efficiency ratio load | 17.19 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.55 |
CQA speedup if FP arith vectorized | 3.50 |
CQA speedup if fully vectorized | 7.74 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1865-1865,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.17 |
CQA cycles if no scalar integer | 14.17 |
CQA cycles if FP arith vectorized | 10.32 |
CQA cycles if fully vectorized | 4.67 |
Front-end cycles | 36.17 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 25.67 |
P2 cycles | 25.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 25.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.85 - 35.84 |
Stall cycles (UFS) | 0.00 |
Nb insns | 210.00 |
Nb uops | 214.00 |
Nb loads | 77.00 |
Nb stores | 14.00 |
Nb stack references | 25.00 |
FLOP/cycle | 0.06 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.01 |
Bytes prefetched | 0.00 |
Bytes loaded | 632.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.91 |
Vectorization ratio load | 37.50 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.49 |
Vector-efficiency ratio load | 17.19 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 210 |
nb uops | 214 |
loop length | 944 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 25 |
micro-operation queue | 36.17 cycles |
front end | 36.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
cycles | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 35.85-35.84 |
Stall cycles | 0.00 |
Front-end | 36.17 |
Dispatch | 25.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.17 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 23% |
load | 37% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 17% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa0(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 4446db <hypre_BoomerAMGBuildMultipass.extracted.28+0xe0b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443e00 <hypre_BoomerAMGBuildMultipass.extracted.28+0x530> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f03c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x100(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443ff0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x720> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f03c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 444090 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 444058 <hypre_BoomerAMGBuildMultipass.extracted.28+0x788> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 444100 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4440c8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 444460 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 444154 <hypre_BoomerAMGBuildMultipass.extracted.28+0x884> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 444520 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 444497 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb7cd(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444654 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 444638 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd68> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 44463a <hypre_BoomerAMGBuildMultipass.extracted.28+0xd6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444654 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4446b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4446b2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 210 |
nb uops | 214 |
loop length | 944 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 25 |
micro-operation queue | 36.17 cycles |
front end | 36.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
cycles | 13.00 | 12.40 | 25.67 | 25.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 25.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 35.85-35.84 |
Stall cycles | 0.00 |
Front-end | 36.17 |
Dispatch | 25.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.17 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 23% |
load | 37% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 17% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0xa0(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 4446db <hypre_BoomerAMGBuildMultipass.extracted.28+0xe0b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443e00 <hypre_BoomerAMGBuildMultipass.extracted.28+0x530> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f03c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x100(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443e3f <hypre_BoomerAMGBuildMultipass.extracted.28+0x56f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443ff0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x720> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f03c0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x80(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444020 <hypre_BoomerAMGBuildMultipass.extracted.28+0x750> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 444090 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 444058 <hypre_BoomerAMGBuildMultipass.extracted.28+0x788> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 444100 <hypre_BoomerAMGBuildMultipass.extracted.28+0x830> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4440c8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 444460 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb90> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 444154 <hypre_BoomerAMGBuildMultipass.extracted.28+0x884> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 444520 <hypre_BoomerAMGBuildMultipass.extracted.28+0xc50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 444497 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb7cd(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 444654 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 444638 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd68> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 44463a <hypre_BoomerAMGBuildMultipass.extracted.28+0xd6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 444654 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4446b0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4446b2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xde2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443c10 <hypre_BoomerAMGBuildMultipass.extracted.28+0x340> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |