Loop Id: 955 | Module: exec | Source: par_multi_interp.c:1585-1660 [...] | Coverage: 0.03% |
---|
Loop Id: 955 | Module: exec | Source: par_multi_interp.c:1585-1660 [...] | Coverage: 0.03% |
---|
0x442e70 MOV -0x50(%RBP),%RDX |
0x442e74 INC %RDX |
0x442e77 CMP -0x38(%RBP),%RDX |
0x442e7b JGE 443367 |
0x442e81 MOV -0xd8(%RBP),%RAX |
0x442e88 MOV %RDX,-0x50(%RBP) |
0x442e8c MOV (%RAX,%RDX,8),%RAX |
0x442e90 MOV -0xe0(%RBP),%RCX |
0x442e97 MOV (%RCX,%RAX,8),%RDI |
0x442e9b MOV -0x58(%RBP),%RCX |
0x442e9f MOV (%RCX,%RAX,8),%RSI |
0x442ea3 MOV 0x8(%RCX,%RAX,8),%RCX |
0x442ea8 LEA (%RCX,%RDI,1),%RDX |
0x442eac SUB %RSI,%RDX |
0x442eaf CMP %RDX,%RDI |
0x442eb2 JGE 442f85 |
0x442eb8 MOV -0xc0(%RBP),%RDX |
0x442ebf MOV 0x8(%RDX),%R8 |
0x442ec3 SUB %RSI,%RCX |
0x442ec6 CMP $0x8,%RCX |
0x442eca JB 442f50 |
0x442ed0 MOV %RCX,%R9 |
0x442ed3 SHR $0x3,%R9 |
0x442ed7 LEA 0x38(%R8,%RDI,8),%R10 |
0x442edc NOPL (%RAX) |
(965) 0x442ee0 MOV -0x38(%R10),%RDX |
(965) 0x442ee4 MOV (%R15,%RDX,8),%RDX |
(965) 0x442ee8 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442eed MOV -0x30(%R10),%RDX |
(965) 0x442ef1 MOV (%R15,%RDX,8),%RDX |
(965) 0x442ef5 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442efa MOV -0x28(%R10),%RDX |
(965) 0x442efe MOV (%R15,%RDX,8),%RDX |
(965) 0x442f02 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f07 MOV -0x20(%R10),%RDX |
(965) 0x442f0b MOV (%R15,%RDX,8),%RDX |
(965) 0x442f0f MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f14 MOV -0x18(%R10),%RDX |
(965) 0x442f18 MOV (%R15,%RDX,8),%RDX |
(965) 0x442f1c MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f21 MOV -0x10(%R10),%RDX |
(965) 0x442f25 MOV (%R15,%RDX,8),%RDX |
(965) 0x442f29 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f2e MOV -0x8(%R10),%RDX |
(965) 0x442f32 MOV (%R15,%RDX,8),%RDX |
(965) 0x442f36 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f3b MOV (%R10),%RDX |
(965) 0x442f3e MOV (%R15,%RDX,8),%RDX |
(965) 0x442f42 MOV %RAX,(%R13,%RDX,8) |
(965) 0x442f47 ADD $0x40,%R10 |
(965) 0x442f4b DEC %R9 |
(965) 0x442f4e JNE 442ee0 |
0x442f50 MOV %RCX,%R9 |
0x442f53 AND $-0x8,%R9 |
0x442f57 CMP %RCX,%R9 |
0x442f5a MOV -0x40(%RBP),%R10 |
0x442f5e JAE 442f85 |
0x442f60 LEA (%R8,%RDI,8),%RDI |
0x442f64 NOPW %CS:(%RAX,%RAX,1) |
(964) 0x442f70 MOV (%RDI,%R9,8),%RDX |
(964) 0x442f74 MOV (%R15,%RDX,8),%RDX |
(964) 0x442f78 MOV %RAX,(%R13,%RDX,8) |
(964) 0x442f7d INC %R9 |
(964) 0x442f80 CMP %R9,%RCX |
(964) 0x442f83 JNE 442f70 |
0x442f85 MOV -0x58(%RBP),%RCX |
0x442f89 MOV (%RCX,%RAX,8),%RDX |
0x442f8d MOV -0x68(%RBP),%RCX |
0x442f91 MOV (%RCX,%RAX,8),%RDI |
0x442f95 MOV 0x8(%RCX,%RAX,8),%R8 |
0x442f9a INC %RDI |
0x442f9d VXORPD %XMM4,%XMM4,%XMM4 |
0x442fa1 CMP %R8,%RDI |
0x442fa4 MOV %RDX,-0x30(%RBP) |
0x442fa8 VXORPD %XMM3,%XMM3,%XMM3 |
0x442fac JGE 443060 |
0x442fb2 MOV -0xb8(%RBP),%RCX |
0x442fb9 MOV -0x78(%RBP),%RSI |
0x442fbd JMP 442fcc |
(963) 0x442fc0 INC %RDI |
(963) 0x442fc3 CMP %R8,%RDI |
(963) 0x442fc6 JGE 443060 |
(963) 0x442fcc MOV (%RSI,%RDI,8),%R9 |
(963) 0x442fd0 CMPQ $-0x3,(%R10,%R9,8) |
(963) 0x442fd5 JE 442fff |
(963) 0x442fd7 CMPQ $0x1,-0xa0(%RBP) |
(963) 0x442fdf JE 442ff6 |
(963) 0x442fe1 MOV -0x90(%RBP),%RSI |
(963) 0x442fe8 MOV (%RSI,%RAX,8),%RDX |
(963) 0x442fec CMP (%RSI,%R9,8),%RDX |
(963) 0x442ff0 MOV -0x78(%RBP),%RSI |
(963) 0x442ff4 JNE 442fff |
(963) 0x442ff6 MOV -0x70(%RBP),%RDX |
(963) 0x442ffa VADDSD (%RDX,%RDI,8),%XMM3,%XMM3 |
(963) 0x442fff CMP $-0x1,%R9 |
(963) 0x443003 JE 442fc0 |
(963) 0x443005 CMP %RAX,(%R13,%R9,8) |
(963) 0x44300a JNE 442fc0 |
(963) 0x44300c MOV -0x70(%RBP),%R8 |
(963) 0x443010 VMOVSD (%R8,%RDI,8),%XMM5 |
(963) 0x443016 MOV -0x88(%RBP),%RDX |
(963) 0x44301d MOV -0x30(%RBP),%R11 |
(963) 0x443021 VMOVSD %XMM5,(%RDX,%R11,8) |
(963) 0x443027 MOV -0x108(%RBP),%RDX |
(963) 0x44302e MOV (%RDX,%R9,8),%RDX |
(963) 0x443032 MOV %RDX,(%RCX,%R11,8) |
(963) 0x443036 INC %R11 |
(963) 0x443039 MOV %R11,-0x30(%RBP) |
(963) 0x44303d VADDSD (%R8,%RDI,8),%XMM4,%XMM4 |
(963) 0x443043 MOV -0x68(%RBP),%RDX |
(963) 0x443047 MOV 0x8(%RDX,%RAX,8),%R8 |
(963) 0x44304c JMP 442fc0 |
0x443060 MOV -0xe8(%RBP),%RDX |
0x443067 MOV (%RDX,%RAX,8),%R8 |
0x44306b MOV -0x60(%RBP),%RCX |
0x44306f MOV (%RCX,%RAX,8),%RSI |
0x443073 MOV 0x8(%RCX,%RAX,8),%RDI |
0x443078 LEA (%RDI,%R8,1),%RDX |
0x44307c SUB %RSI,%RDX |
0x44307f CMP %RDX,%R8 |
0x443082 JGE 443144 |
0x443088 MOV -0xc8(%RBP),%RDX |
0x44308f MOV 0x8(%RDX),%R9 |
0x443093 SUB %RSI,%RDI |
0x443096 CMP $0x8,%RDI |
0x44309a JB 443118 |
0x4430a0 MOV %RDI,%R10 |
0x4430a3 SHR $0x3,%R10 |
0x4430a7 LEA 0x38(%R9,%R8,8),%R11 |
0x4430ac NOPL (%RAX) |
(962) 0x4430b0 MOV -0x38(%R11),%RDX |
(962) 0x4430b4 MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430b8 MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430bc MOV -0x30(%R11),%RDX |
(962) 0x4430c0 MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430c4 MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430c8 MOV -0x28(%R11),%RDX |
(962) 0x4430cc MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430d0 MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430d4 MOV -0x20(%R11),%RDX |
(962) 0x4430d8 MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430dc MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430e0 MOV -0x18(%R11),%RDX |
(962) 0x4430e4 MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430e8 MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430ec MOV -0x10(%R11),%RDX |
(962) 0x4430f0 MOV (%RBX,%RDX,8),%RDX |
(962) 0x4430f4 MOV %RAX,(%R14,%RDX,8) |
(962) 0x4430f8 MOV -0x8(%R11),%RDX |
(962) 0x4430fc MOV (%RBX,%RDX,8),%RDX |
(962) 0x443100 MOV %RAX,(%R14,%RDX,8) |
(962) 0x443104 MOV (%R11),%RDX |
(962) 0x443107 MOV (%RBX,%RDX,8),%RDX |
(962) 0x44310b MOV %RAX,(%R14,%RDX,8) |
(962) 0x44310f ADD $0x40,%R11 |
(962) 0x443113 DEC %R10 |
(962) 0x443116 JNE 4430b0 |
0x443118 MOV %RDI,%R10 |
0x44311b AND $-0x8,%R10 |
0x44311f CMP %RDI,%R10 |
0x443122 JAE 443144 |
0x443124 LEA (%R9,%R8,8),%R8 |
0x443128 NOPL (%RAX,%RAX,1) |
(961) 0x443130 MOV (%R8,%R10,8),%RDX |
(961) 0x443134 MOV (%RBX,%RDX,8),%RDX |
(961) 0x443138 MOV %RAX,(%R14,%RDX,8) |
(961) 0x44313c INC %R10 |
(961) 0x44313f CMP %R10,%RDI |
(961) 0x443142 JNE 443130 |
0x443144 MOV -0x60(%RBP),%RCX |
0x443148 MOV (%RCX,%RAX,8),%RDI |
0x44314c MOV -0x80(%RBP),%RCX |
0x443150 MOV (%RCX,%RAX,8),%R8 |
0x443154 MOV 0x8(%RCX,%RAX,8),%R10 |
0x443159 CMP %R10,%R8 |
0x44315c JGE 443240 |
0x443162 MOV -0xb0(%RBP),%RCX |
0x443169 LEA (%RCX,%R8,8),%R9 |
0x44316d MOV -0xd0(%RBP),%RSI |
0x443174 JMP 443190 |
(960) 0x443180 INC %R8 |
(960) 0x443183 ADD $0x8,%R9 |
(960) 0x443187 CMP %R10,%R8 |
(960) 0x44318a JGE 443240 |
(960) 0x443190 MOV %R9,%RDX |
(960) 0x443193 TEST %RSI,%RSI |
(960) 0x443196 JE 4431a6 |
(960) 0x443198 MOV (%R9),%RDX |
(960) 0x44319b MOV -0x110(%RBP),%R11 |
(960) 0x4431a2 LEA (%R11,%RDX,8),%RDX |
(960) 0x4431a6 MOV (%RDX),%R11 |
(960) 0x4431a9 CMPQ $-0x3,(%R12,%R11,8) |
(960) 0x4431ae JE 4431e5 |
(960) 0x4431b0 CMPQ $0x1,-0xa0(%RBP) |
(960) 0x4431b8 JE 4431d8 |
(960) 0x4431ba MOV -0x90(%RBP),%RDX |
(960) 0x4431c1 MOV (%RDX,%RAX,8),%RDX |
(960) 0x4431c5 MOV %R12,%RCX |
(960) 0x4431c8 MOV -0xf0(%RBP),%R12 |
(960) 0x4431cf CMP (%R12,%R11,8),%RDX |
(960) 0x4431d3 MOV %RCX,%R12 |
(960) 0x4431d6 JNE 4431e5 |
(960) 0x4431d8 MOV -0x98(%RBP),%RCX |
(960) 0x4431df VADDSD (%RCX,%R8,8),%XMM3,%XMM3 |
(960) 0x4431e5 CMP $-0x1,%R11 |
(960) 0x4431e9 JE 443180 |
(960) 0x4431eb CMP %RAX,(%R14,%R11,8) |
(960) 0x4431ef JNE 443180 |
(960) 0x4431f1 MOV -0x98(%RBP),%R10 |
(960) 0x4431f8 VMOVSD (%R10,%R8,8),%XMM5 |
(960) 0x4431fe MOV -0x48(%RBP),%RCX |
(960) 0x443202 VMOVSD %XMM5,(%RCX,%RDI,8) |
(960) 0x443207 MOV -0x100(%RBP),%RDX |
(960) 0x44320e MOV (%RDX,%R11,8),%RDX |
(960) 0x443212 MOV -0xf8(%RBP),%RCX |
(960) 0x443219 MOV %RDX,(%RCX,%RDI,8) |
(960) 0x44321d INC %RDI |
(960) 0x443220 VADDSD (%R10,%R8,8),%XMM4,%XMM4 |
(960) 0x443226 MOV -0x80(%RBP),%RCX |
(960) 0x44322a MOV 0x8(%RCX,%RAX,8),%R10 |
(960) 0x44322f JMP 443180 |
0x443240 MOV -0x68(%RBP),%RCX |
0x443244 MOV (%RCX,%RAX,8),%RDX |
0x443248 MOV -0x70(%RBP),%RCX |
0x44324c VMULSD (%RCX,%RDX,8),%XMM4,%XMM4 |
0x443251 VUCOMISD %XMM0,%XMM4 |
0x443255 JE 44325f |
0x443257 VXORPD %XMM1,%XMM3,%XMM2 |
0x44325b VDIVSD %XMM4,%XMM2,%XMM2 |
0x44325f MOV -0x58(%RBP),%RCX |
0x443263 MOV (%RCX,%RAX,8),%R9 |
0x443267 MOV -0x30(%RBP),%RSI |
0x44326b MOV %RSI,%R10 |
0x44326e SUB %R9,%R10 |
0x443271 MOV -0x48(%RBP),%RDX |
0x443275 MOV -0x88(%RBP),%RCX |
0x44327c JLE 4432e4 |
0x44327e MOV %R10,%R8 |
0x443281 AND $-0x4,%R8 |
0x443285 JE 4432c2 |
0x443287 LEA -0x1(%R8),%R11 |
0x44328b VBROADCASTSD %XMM2,%YMM3 |
0x443290 LEA (%RCX,%R9,8),%RSI |
0x443294 XOR %EDX,%EDX |
0x443296 NOPW %CS:(%RAX,%RAX,1) |
(959) 0x4432a0 VMULPD (%RSI,%RDX,8),%YMM3,%YMM4 |
(959) 0x4432a5 VMOVUPD %YMM4,(%RSI,%RDX,8) |
(959) 0x4432aa ADD $0x4,%RDX |
(959) 0x4432ae CMP %R11,%RDX |
(959) 0x4432b1 JBE 4432a0 |
0x4432b3 CMP %R8,%R10 |
0x4432b6 MOV -0x48(%RBP),%RDX |
0x4432ba MOV -0x30(%RBP),%RSI |
0x4432be JNE 4432c5 |
0x4432c0 JMP 4432e4 |
0x4432c2 XOR %R8D,%R8D |
0x4432c5 ADD %R9,%R8 |
0x4432c8 NOPL (%RAX,%RAX,1) |
(958) 0x4432d0 VMULSD (%RCX,%R8,8),%XMM2,%XMM3 |
(958) 0x4432d6 VMOVSD %XMM3,(%RCX,%R8,8) |
(958) 0x4432dc INC %R8 |
(958) 0x4432df CMP %R8,%RSI |
(958) 0x4432e2 JNE 4432d0 |
0x4432e4 MOV -0x60(%RBP),%RCX |
0x4432e8 MOV (%RCX,%RAX,8),%RCX |
0x4432ec MOV %RDI,%R8 |
0x4432ef SUB %RCX,%R8 |
0x4432f2 MOV -0x40(%RBP),%R10 |
0x4432f6 JLE 442e70 |
0x4432fc MOV %R8,%RAX |
0x4432ff AND $-0x4,%RAX |
0x443303 JE 443342 |
0x443305 LEA -0x1(%RAX),%R9 |
0x443309 VBROADCASTSD %XMM2,%YMM3 |
0x44330e LEA (%RDX,%RCX,8),%RSI |
0x443312 XOR %EDX,%EDX |
0x443314 NOPW %CS:(%RAX,%RAX,1) |
(957) 0x443320 VMULPD (%RSI,%RDX,8),%YMM3,%YMM4 |
(957) 0x443325 VMOVUPD %YMM4,(%RSI,%RDX,8) |
(957) 0x44332a ADD $0x4,%RDX |
(957) 0x44332e CMP %R9,%RDX |
(957) 0x443331 JBE 443320 |
0x443333 CMP %RAX,%R8 |
0x443336 MOV -0x48(%RBP),%RDX |
0x44333a JE 442e70 |
0x443340 JMP 443344 |
0x443342 XOR %EAX,%EAX |
0x443344 ADD %RCX,%RAX |
0x443347 NOPW (%RAX,%RAX,1) |
(956) 0x443350 VMULSD (%RDX,%RAX,8),%XMM2,%XMM3 |
(956) 0x443355 VMOVSD %XMM3,(%RDX,%RAX,8) |
(956) 0x44335a INC %RAX |
(956) 0x44335d CMP %RAX,%RDI |
(956) 0x443360 JNE 443350 |
0x443362 JMP 442e70 |
/scratch_na/users/xoserete/qaas_runs/171-172-8218/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1585 - 1660 |
-------------------------------------------------------------------------------- |
1585: if (n_fine) |
[...] |
1598: thread_start = pass_pointer[1] + (pass_length/num_threads)*my_thread_num; |
[...] |
1605: for (i=thread_start; i < thread_stop; i++) |
1606: { |
1607: i1 = pass_array[i]; |
1608: sum_C = 0; |
1609: sum_N = 0; |
1610: j_start = P_diag_start[i1]; |
1611: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1612: for (j=j_start; j < j_end; j++) |
1613: { |
1614: k1 = P_diag_pass[1][j]; |
1615: tmp_marker[C_array[k1]] = i1; |
1616: } |
1617: cnt = P_diag_i[i1]; |
1618: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1619: { |
1620: j1 = A_diag_j[j]; |
1621: if (CF_marker[j1] != -3 && |
1622: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1623: sum_N += A_diag_data[j]; |
1624: if (j1 != -1 && tmp_marker[j1] == i1) |
1625: { |
1626: P_diag_data[cnt] = A_diag_data[j]; |
1627: P_diag_j[cnt++] = fine_to_coarse[j1]; |
1628: sum_C += A_diag_data[j]; |
1629: } |
1630: } |
1631: j_start = P_offd_start[i1]; |
1632: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1633: for (j=j_start; j < j_end; j++) |
1634: { |
1635: k1 = P_offd_pass[1][j]; |
1636: tmp_marker_offd[C_array_offd[k1]] = i1; |
1637: } |
1638: cnt_offd = P_offd_i[i1]; |
1639: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1640: { |
1641: if (col_offd_S_to_A) |
1642: j1 = map_A_to_S[A_offd_j[j]]; |
1643: else |
1644: j1 = A_offd_j[j]; |
1645: if (CF_marker_offd[j1] != -3 && |
1646: (num_functions == 1 || dof_func[i1] == dof_func_offd[j1])) |
1647: sum_N += A_offd_data[j]; |
1648: if (j1 != -1 && tmp_marker_offd[j1] == i1) |
1649: { |
1650: P_offd_data[cnt_offd] = A_offd_data[j]; |
1651: P_offd_j[cnt_offd++] = map_S_to_new[j1]; |
1652: sum_C += A_offd_data[j]; |
1653: } |
1654: } |
1655: diagonal = A_diag_data[A_diag_i[i1]]; |
1656: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1657: for (j=P_diag_i[i1]; j < cnt; j++) |
1658: P_diag_data[j] *= alfa; |
1659: for (j=P_offd_i[i1]; j < cnt_offd; j++) |
1660: P_offd_data[j] *= alfa; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.52 |
CQA speedup if FP arith vectorized | 3.23 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.42 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.27 |
Source | par_multi_interp.c:1585-1585,par_multi_interp.c:1598-1598,par_multi_interp.c:1605-1607,par_multi_interp.c:1610-1614,par_multi_interp.c:1617-1618,par_multi_interp.c:1631-1635,par_multi_interp.c:1638-1639,par_multi_interp.c:1655-1660 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 22.67 |
CQA cycles if no scalar integer | 9.00 |
CQA cycles if FP arith vectorized | 7.02 |
CQA cycles if fully vectorized | 2.83 |
Front-end cycles | 22.67 |
DIV/SQRT cycles | 10.00 |
P0 cycles | 9.80 |
P1 cycles | 16.00 |
P2 cycles | 16.00 |
P3 cycles | 1.00 |
P4 cycles | 9.60 |
P5 cycles | 10.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 9.60 |
P10 cycles | 16.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 23.04 - 23.06 |
Stall cycles (UFS) | 0.00 |
Nb insns | 135.00 |
Nb uops | 135.00 |
Nb loads | 48.00 |
Nb stores | 2.00 |
Nb stack references | 20.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.65 |
Bytes prefetched | 0.00 |
Bytes loaded | 384.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 8.33 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 13.64 |
Vector-efficiency ratio all | 13.54 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 14.20 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.52 |
CQA speedup if FP arith vectorized | 3.23 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.42 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.27 |
Source | par_multi_interp.c:1585-1585,par_multi_interp.c:1598-1598,par_multi_interp.c:1605-1607,par_multi_interp.c:1610-1614,par_multi_interp.c:1617-1618,par_multi_interp.c:1631-1635,par_multi_interp.c:1638-1639,par_multi_interp.c:1655-1660 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 22.67 |
CQA cycles if no scalar integer | 9.00 |
CQA cycles if FP arith vectorized | 7.02 |
CQA cycles if fully vectorized | 2.83 |
Front-end cycles | 22.67 |
DIV/SQRT cycles | 10.00 |
P0 cycles | 9.80 |
P1 cycles | 16.00 |
P2 cycles | 16.00 |
P3 cycles | 1.00 |
P4 cycles | 9.60 |
P5 cycles | 10.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 9.60 |
P10 cycles | 16.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 23.04 - 23.06 |
Stall cycles (UFS) | 0.00 |
Nb insns | 135.00 |
Nb uops | 135.00 |
Nb loads | 48.00 |
Nb stores | 2.00 |
Nb stack references | 20.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.65 |
Bytes prefetched | 0.00 |
Bytes loaded | 384.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 8.33 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 13.64 |
Vector-efficiency ratio all | 13.54 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 14.20 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.27 |
Source file and lines | par_multi_interp.c:1585-1660 |
Module | exec |
nb instructions | 135 |
nb uops | 135 |
loop length | 575 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 20 |
micro-operation queue | 22.67 cycles |
front end | 22.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.00 | 9.80 | 16.00 | 16.00 | 1.00 | 9.60 | 10.00 | 1.00 | 1.00 | 1.00 | 9.60 | 16.00 |
cycles | 10.00 | 9.80 | 16.00 | 16.00 | 1.00 | 9.60 | 10.00 | 1.00 | 1.00 | 1.00 | 9.60 | 16.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 23.04-23.06 |
Stall cycles | 0.00 |
Front-end | 22.67 |
Dispatch | 16.00 |
DIV/SQRT | 4.00 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 13% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x38(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 443367 <hypre_BoomerAMGBuildMultipass.extracted.27+0x767> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RDI,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 442f85 <hypre_BoomerAMGBuildMultipass.extracted.27+0x385> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 442f50 <hypre_BoomerAMGBuildMultipass.extracted.27+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%R8,%RDI,8),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RCX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 442f85 <hypre_BoomerAMGBuildMultipass.extracted.27+0x385> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 443060 <hypre_BoomerAMGBuildMultipass.extracted.27+0x460> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 442fcc <hypre_BoomerAMGBuildMultipass.extracted.27+0x3cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443144 <hypre_BoomerAMGBuildMultipass.extracted.27+0x544> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x8,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443118 <hypre_BoomerAMGBuildMultipass.extracted.27+0x518> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%R9,%R8,8),%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 443144 <hypre_BoomerAMGBuildMultipass.extracted.27+0x544> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R9,%R8,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443240 <hypre_BoomerAMGBuildMultipass.extracted.27+0x640> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443190 <hypre_BoomerAMGBuildMultipass.extracted.27+0x590> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RCX,%RDX,8),%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM0,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 44325f <hypre_BoomerAMGBuildMultipass.extracted.27+0x65f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM1,%XMM3,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM4,%XMM2,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4432e4 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4432c2 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%R9,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4432c5 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4432e4 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443342 <hypre_BoomerAMGBuildMultipass.extracted.27+0x742> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RAX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443344 <hypre_BoomerAMGBuildMultipass.extracted.27+0x744> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.27 |
Source file and lines | par_multi_interp.c:1585-1660 |
Module | exec |
nb instructions | 135 |
nb uops | 135 |
loop length | 575 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 20 |
micro-operation queue | 22.67 cycles |
front end | 22.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.00 | 9.80 | 16.00 | 16.00 | 1.00 | 9.60 | 10.00 | 1.00 | 1.00 | 1.00 | 9.60 | 16.00 |
cycles | 10.00 | 9.80 | 16.00 | 16.00 | 1.00 | 9.60 | 10.00 | 1.00 | 1.00 | 1.00 | 9.60 | 16.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 23.04-23.06 |
Stall cycles | 0.00 |
Front-end | 22.67 |
Dispatch | 16.00 |
DIV/SQRT | 4.00 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 37% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 13% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x50(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x38(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 443367 <hypre_BoomerAMGBuildMultipass.extracted.27+0x767> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RDI,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 442f85 <hypre_BoomerAMGBuildMultipass.extracted.27+0x385> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 442f50 <hypre_BoomerAMGBuildMultipass.extracted.27+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%R8,%RDI,8),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RCX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JAE 442f85 <hypre_BoomerAMGBuildMultipass.extracted.27+0x385> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JGE 443060 <hypre_BoomerAMGBuildMultipass.extracted.27+0x460> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 442fcc <hypre_BoomerAMGBuildMultipass.extracted.27+0x3cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%R8,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443144 <hypre_BoomerAMGBuildMultipass.extracted.27+0x544> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x8,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443118 <hypre_BoomerAMGBuildMultipass.extracted.27+0x518> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%R9,%R8,8),%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RDI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 443144 <hypre_BoomerAMGBuildMultipass.extracted.27+0x544> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R9,%R8,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%RAX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443240 <hypre_BoomerAMGBuildMultipass.extracted.27+0x640> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443190 <hypre_BoomerAMGBuildMultipass.extracted.27+0x590> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RCX,%RDX,8),%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM0,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 44325f <hypre_BoomerAMGBuildMultipass.extracted.27+0x65f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD %XMM1,%XMM3,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VDIVSD %XMM4,%XMM2,%XMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4432e4 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4432c2 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%R9,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4432c5 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4432e4 <hypre_BoomerAMGBuildMultipass.extracted.27+0x6e4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RAX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443342 <hypre_BoomerAMGBuildMultipass.extracted.27+0x742> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RAX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443344 <hypre_BoomerAMGBuildMultipass.extracted.27+0x744> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 442e70 <hypre_BoomerAMGBuildMultipass.extracted.27+0x270> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |