Function: .omp_outlined..2#0x29a750 | Module: exec | Source: par_strength.c:246-513 [...] | Coverage: 0.69% |
---|
Function: .omp_outlined..2#0x29a750 | Module: exec | Source: par_strength.c:246-513 [...] | Coverage: 0.69% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-859-5251/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 246 - 513 |
-------------------------------------------------------------------------------- |
246: #pragma omp parallel private(i,diag,row_scale,row_sum,jA,jS) |
247: #endif |
248: { |
249: HYPRE_Int start, stop; |
250: hypre_GetSimpleThreadPartition(&start, &stop, num_variables); |
251: HYPRE_Int jS_diag = 0, jS_offd = 0; |
252: |
253: for (i = start; i < stop; i++) |
254: { |
255: S_diag_i[i] = jS_diag; |
256: if (num_cols_offd) |
257: { |
258: S_offd_i[i] = jS_offd; |
259: } |
260: |
261: diag = A_diag_data[A_diag_i[i]]; |
262: |
263: /* compute scaling factor and row sum */ |
264: row_scale = 0.0; |
265: row_sum = diag; |
266: if (num_functions > 1) |
267: { |
268: if (diag < 0) |
269: { |
270: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
271: { |
272: if (dof_func[i] == dof_func[A_diag_j[jA]]) |
273: { |
274: row_scale = hypre_max(row_scale, A_diag_data[jA]); |
275: row_sum += A_diag_data[jA]; |
276: } |
277: } |
278: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
279: { |
280: if (dof_func[i] == dof_func_offd[A_offd_j[jA]]) |
281: { |
282: row_scale = hypre_max(row_scale, A_offd_data[jA]); |
283: row_sum += A_offd_data[jA]; |
[...] |
289: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
290: { |
291: if (dof_func[i] == dof_func[A_diag_j[jA]]) |
292: { |
293: row_scale = hypre_min(row_scale, A_diag_data[jA]); |
294: row_sum += A_diag_data[jA]; |
295: } |
296: } |
297: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
298: { |
299: if (dof_func[i] == dof_func_offd[A_offd_j[jA]]) |
300: { |
301: row_scale = hypre_min(row_scale, A_offd_data[jA]); |
302: row_sum += A_offd_data[jA]; |
[...] |
309: if (diag < 0) |
310: { |
311: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
312: { |
313: row_scale = hypre_max(row_scale, A_diag_data[jA]); |
314: row_sum += A_diag_data[jA]; |
315: } |
316: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
317: { |
318: row_scale = hypre_max(row_scale, A_offd_data[jA]); |
319: row_sum += A_offd_data[jA]; |
320: } |
321: } |
322: else |
323: { |
324: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
325: { |
326: row_scale = hypre_min(row_scale, A_diag_data[jA]); |
327: row_sum += A_diag_data[jA]; |
328: } |
329: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
330: { |
331: row_scale = hypre_min(row_scale, A_offd_data[jA]); |
332: row_sum += A_offd_data[jA]; |
333: } |
334: } /* diag >= 0*/ |
335: } /* num_functions <= 1 */ |
336: |
337: jS_diag += A_diag_i[i + 1] - A_diag_i[i] - 1; |
338: jS_offd += A_offd_i[i + 1] - A_offd_i[i]; |
339: |
340: /* compute row entries of S */ |
341: S_temp_diag_j[A_diag_i[i]] = -1; |
342: if ((fabs(row_sum) > fabs(diag)*max_row_sum) && (max_row_sum < 1.0)) |
343: { |
344: /* make all dependencies weak */ |
345: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
346: { |
347: S_temp_diag_j[jA] = -1; |
348: } |
349: jS_diag -= A_diag_i[i + 1] - (A_diag_i[i] + 1); |
350: |
351: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
352: { |
353: S_temp_offd_j[jA] = -1; |
354: } |
355: jS_offd -= A_offd_i[i + 1] - A_offd_i[i]; |
356: } |
357: else |
358: { |
359: if (num_functions > 1) |
360: { |
361: if (diag < 0) |
362: { |
363: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
364: { |
365: if (A_diag_data[jA] <= strength_threshold * row_scale |
366: || dof_func[i] != dof_func[A_diag_j[jA]]) |
367: { |
368: S_temp_diag_j[jA] = -1; |
369: --jS_diag; |
370: } |
371: else |
372: { |
373: S_temp_diag_j[jA] = A_diag_j[jA]; |
374: } |
375: } |
376: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
377: { |
378: if (A_offd_data[jA] <= strength_threshold * row_scale |
379: || dof_func[i] != dof_func_offd[A_offd_j[jA]]) |
380: { |
381: S_temp_offd_j[jA] = -1; |
382: --jS_offd; |
383: } |
384: else |
385: { |
386: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
392: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
393: { |
394: if (A_diag_data[jA] >= strength_threshold * row_scale |
395: || dof_func[i] != dof_func[A_diag_j[jA]]) |
396: { |
397: S_temp_diag_j[jA] = -1; |
398: --jS_diag; |
399: } |
400: else |
401: { |
402: S_temp_diag_j[jA] = A_diag_j[jA]; |
403: } |
404: } |
405: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
406: { |
407: if (A_offd_data[jA] >= strength_threshold * row_scale |
408: || dof_func[i] != dof_func_offd[A_offd_j[jA]]) |
409: { |
410: S_temp_offd_j[jA] = -1; |
411: --jS_offd; |
412: } |
413: else |
414: { |
415: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
422: if (diag < 0) |
423: { |
424: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
425: { |
426: if (A_diag_data[jA] <= strength_threshold * row_scale) |
427: { |
428: S_temp_diag_j[jA] = -1; |
429: --jS_diag; |
430: } |
431: else |
432: { |
433: S_temp_diag_j[jA] = A_diag_j[jA]; |
434: } |
435: } |
436: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
437: { |
438: if (A_offd_data[jA] <= strength_threshold * row_scale) |
439: { |
440: S_temp_offd_j[jA] = -1; |
441: --jS_offd; |
442: } |
443: else |
444: { |
445: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
451: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
452: { |
453: if (A_diag_data[jA] >= strength_threshold * row_scale) |
454: { |
455: S_temp_diag_j[jA] = -1; |
456: --jS_diag; |
457: } |
458: else |
459: { |
460: S_temp_diag_j[jA] = A_diag_j[jA]; |
461: } |
462: } |
463: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
464: { |
465: if (A_offd_data[jA] >= strength_threshold * row_scale) |
466: { |
467: S_temp_offd_j[jA] = -1; |
468: --jS_offd; |
469: } |
470: else |
471: { |
472: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
480: hypre_prefix_sum_pair(&jS_diag, S_diag_i + num_variables, &jS_offd, S_offd_i + num_variables, prefix_sum_workspace); |
[...] |
492: for (i = start; i < stop; i++) |
493: { |
494: S_diag_i[i] += jS_diag; |
495: S_offd_i[i] += jS_offd; |
496: |
497: jS = S_diag_i[i]; |
498: for (jA = A_diag_i[i]; jA < A_diag_i[i+1]; jA++) |
499: { |
500: if (S_temp_diag_j[jA] > -1) |
501: { |
502: S_diag_j[jS] = S_temp_diag_j[jA]; |
503: jS++; |
504: } |
505: } |
506: |
507: jS = S_offd_i[i]; |
508: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
509: { |
510: if (S_temp_offd_j[jA] > -1) |
511: { |
512: S_offd_j[jS] = S_temp_offd_j[jA]; |
513: jS++; |
0x29a750 PUSH %RBP |
0x29a751 MOV %RSP,%RBP |
0x29a754 PUSH %R15 |
0x29a756 PUSH %R14 |
0x29a758 PUSH %R13 |
0x29a75a PUSH %R12 |
0x29a75c PUSH %RBX |
0x29a75d SUB $0x88,%RSP |
0x29a764 MOV %RDX,-0x90(%RBP) |
0x29a76b MOV (%RDX),%RDX |
0x29a76e LEA -0x80(%RBP),%RDI |
0x29a772 LEA -0x48(%RBP),%RSI |
0x29a776 MOV %R9,-0x58(%RBP) |
0x29a77a MOV %R8,-0xa8(%RBP) |
0x29a781 MOV %RCX,%RBX |
0x29a784 CALL 2d3d70 <hypre_GetSimpleThreadPartition> |
0x29a789 MOVQ $0,-0x30(%RBP) |
0x29a791 MOVQ $0,-0x38(%RBP) |
0x29a799 MOV -0x80(%RBP),%R10 |
0x29a79d MOV %RBX,-0x88(%RBP) |
0x29a7a4 MOV (%RBX),%RAX |
0x29a7a7 MOV %RAX,-0x60(%RBP) |
0x29a7ab CMP %R10,-0x48(%RBP) |
0x29a7af JLE 29aebe |
0x29a7b5 MOV -0x58(%RBP),%RSI |
0x29a7b9 MOV 0x60(%RBP),%RDX |
0x29a7bd MOV 0x28(%RBP),%RBX |
0x29a7c1 MOV 0x10(%RBP),%RCX |
0x29a7c5 MOV 0x30(%RBP),%R11 |
0x29a7c9 MOV 0x50(%RBP),%RDI |
0x29a7cd MOV 0x48(%RBP),%R8 |
0x29a7d1 MOV 0x40(%RBP),%R9 |
0x29a7d5 MOV 0x70(%RBP),%RAX |
0x29a7d9 VMOVSD -0x8b7e1(%RIP),%XMM3 |
0x29a7e1 VMOVDDUP -0x8b809(%RIP),%XMM4 |
0x29a7e9 VXORPD %XMM2,%XMM2,%XMM2 |
0x29a7ed MOV (%RSI),%RSI |
0x29a7f0 VMOVSD (%RDX),%XMM0 |
0x29a7f4 MOV (%RBX),%RDX |
0x29a7f7 VMOVSD (%RAX),%XMM1 |
0x29a7fb MOV %RSI,-0xa0(%RBP) |
0x29a802 MOV (%RCX),%RSI |
0x29a805 MOV 0x18(%RBP),%RCX |
0x29a809 MOV %RDX,-0x40(%RBP) |
0x29a80d MOV (%R11),%RDX |
0x29a810 MOV (%RCX),%R14 |
0x29a813 MOV 0x38(%RBP),%RCX |
0x29a817 MOV %RDX,-0x78(%RBP) |
0x29a81b MOV (%R8),%RDX |
0x29a81e MOV (%RCX),%R12 |
0x29a821 MOV (%RDI),%RCX |
0x29a824 MOV (%R9),%RDI |
0x29a827 MOV %RDX,-0x68(%RBP) |
0x29a82b MOV 0x68(%RBP),%RDX |
0x29a82f MOV %RDI,-0x70(%RBP) |
0x29a833 MOV 0x58(%RBP),%RDI |
0x29a837 MOV (%RDX),%RAX |
0x29a83a MOV (%RDI),%R13 |
0x29a83d MOV %RAX,-0x98(%RBP) |
0x29a844 JMP 29a861 |
(812) 0x29a850 SUB %RDX,%RAX |
(812) 0x29a853 ADD %RAX,-0x38(%RBP) |
(812) 0x29a857 CMP %R10,-0x48(%RBP) |
(812) 0x29a85b JLE 29aebe |
(812) 0x29a861 MOV -0x30(%RBP),%RAX |
(812) 0x29a865 MOV -0x60(%RBP),%RDX |
(812) 0x29a869 MOV %R10,%R15 |
(812) 0x29a86c MOV %RAX,(%RDX,%R10,8) |
(812) 0x29a870 MOV -0xa8(%RBP),%RDX |
(812) 0x29a877 CMPQ $0,(%RDX) |
(812) 0x29a87b JE 29a88c |
(812) 0x29a87d MOV -0x38(%RBP),%RAX |
(812) 0x29a881 MOV -0xa0(%RBP),%RDX |
(812) 0x29a888 MOV %RAX,(%RDX,%R15,8) |
(812) 0x29a88c MOV (%R14,%R15,8),%R9 |
(812) 0x29a890 MOV 0x20(%RBP),%RDX |
(812) 0x29a894 MOV 0x8(%R14,%R15,8),%RAX |
(812) 0x29a899 LEA 0x1(%R15),%R10 |
(812) 0x29a89d VMOVSD (%RSI,%R9,8),%XMM6 |
(812) 0x29a8a3 CMPQ $0x2,(%RDX) |
(812) 0x29a8a7 LEA 0x1(%R9),%R11 |
(812) 0x29a8ab JL 29a910 |
(812) 0x29a8ad VUCOMISD %XMM6,%XMM2 |
(812) 0x29a8b1 VXORPD %XMM5,%XMM5,%XMM5 |
(812) 0x29a8b5 VMOVAPD %XMM6,%XMM7 |
(812) 0x29a8b9 JBE 29a980 |
(812) 0x29a8bf CMP %RAX,%R11 |
(812) 0x29a8c2 JGE 29a9d0 |
(812) 0x29a8c8 MOV -0x40(%RBP),%RDX |
(812) 0x29a8cc VMOVAPD %XMM6,%XMM7 |
(812) 0x29a8d0 MOV (%RDX,%R15,8),%RDX |
(812) 0x29a8d4 JMP 29a8ec |
(830) 0x29a8e0 INC %R11 |
(830) 0x29a8e3 CMP %R11,%RAX |
(830) 0x29a8e6 JE 29a9d0 |
(830) 0x29a8ec MOV -0x78(%RBP),%RDI |
(830) 0x29a8f0 MOV -0x40(%RBP),%R8 |
(830) 0x29a8f4 MOV (%RDI,%R11,8),%RDI |
(830) 0x29a8f8 CMP %RDX,(%R8,%RDI,8) |
(830) 0x29a8fc JNE 29a8e0 |
(830) 0x29a8fe VMOVSD (%RSI,%R11,8),%XMM8 |
(830) 0x29a904 VADDSD %XMM7,%XMM8,%XMM7 |
(830) 0x29a908 VMAXSD %XMM5,%XMM8,%XMM5 |
(830) 0x29a90c JMP 29a8e0 |
(812) 0x29a910 VUCOMISD %XMM6,%XMM2 |
(812) 0x29a914 VXORPD %XMM5,%XMM5,%XMM5 |
(812) 0x29a918 VMOVAPD %XMM6,%XMM7 |
(812) 0x29a91c JBE 29aa40 |
(812) 0x29a922 CMP %RAX,%R11 |
(812) 0x29a925 JGE 29a946 |
(812) 0x29a927 VMOVAPD %XMM6,%XMM7 |
(812) 0x29a92b NOPL (%RAX,%RAX,1) |
(826) 0x29a930 VMOVSD (%RSI,%R11,8),%XMM8 |
(826) 0x29a936 INC %R11 |
(826) 0x29a939 VADDSD %XMM7,%XMM8,%XMM7 |
(826) 0x29a93d VMAXSD %XMM5,%XMM8,%XMM5 |
(826) 0x29a941 CMP %R11,%RAX |
(826) 0x29a944 JNE 29a930 |
(812) 0x29a946 MOV (%R12,%R15,8),%RDX |
(812) 0x29a94a MOV 0x8(%R12,%R15,8),%RDI |
(812) 0x29a94f CMP %RDI,%RDX |
(812) 0x29a952 JGE 29ab00 |
(812) 0x29a958 NOPL (%RAX,%RAX,1) |
(825) 0x29a960 VMOVSD (%RCX,%RDX,8),%XMM8 |
(825) 0x29a965 INC %RDX |
(825) 0x29a968 VADDSD %XMM7,%XMM8,%XMM7 |
(825) 0x29a96c VMAXSD %XMM5,%XMM8,%XMM5 |
(825) 0x29a970 CMP %RDX,%RDI |
(825) 0x29a973 JNE 29a960 |
(812) 0x29a975 JMP 29ab00 |
(812) 0x29a980 CMP %RAX,%R11 |
(812) 0x29a983 JGE 29aaa0 |
(812) 0x29a989 MOV -0x40(%RBP),%RDX |
(812) 0x29a98d VMOVAPD %XMM6,%XMM7 |
(812) 0x29a991 MOV (%RDX,%R15,8),%RDX |
(812) 0x29a995 JMP 29a9ac |
(828) 0x29a9a0 INC %R11 |
(828) 0x29a9a3 CMP %R11,%RAX |
(828) 0x29a9a6 JE 29aaa0 |
(828) 0x29a9ac MOV -0x78(%RBP),%RDI |
(828) 0x29a9b0 MOV -0x40(%RBP),%R8 |
(828) 0x29a9b4 MOV (%RDI,%R11,8),%RDI |
(828) 0x29a9b8 CMP %RDX,(%R8,%RDI,8) |
(828) 0x29a9bc JNE 29a9a0 |
(828) 0x29a9be VMOVSD (%RSI,%R11,8),%XMM8 |
(828) 0x29a9c4 VADDSD %XMM7,%XMM8,%XMM7 |
(828) 0x29a9c8 VMINSD %XMM8,%XMM5,%XMM5 |
(828) 0x29a9cd JMP 29a9a0 |
(812) 0x29a9d0 MOV (%R12,%R15,8),%R11 |
(812) 0x29a9d4 MOV 0x8(%R12,%R15,8),%RDX |
(812) 0x29a9d9 CMP %RDX,%R11 |
(812) 0x29a9dc JGE 29ab00 |
(812) 0x29a9e2 MOV -0x40(%RBP),%RDI |
(812) 0x29a9e6 MOV 0x50(%RBP),%R8 |
(812) 0x29a9ea MOV (%RDI,%R15,8),%RDI |
(812) 0x29a9ee MOV (%R8),%R8 |
(812) 0x29a9f1 MOV %R8,-0x50(%RBP) |
(812) 0x29a9f5 JMP 29aa0c |
(829) 0x29aa00 INC %R11 |
(829) 0x29aa03 CMP %R11,%RDX |
(829) 0x29aa06 JE 29ab00 |
(829) 0x29aa0c MOV -0x68(%RBP),%R8 |
(829) 0x29aa10 MOV -0x70(%RBP),%RBX |
(829) 0x29aa14 MOV (%R8,%R11,8),%R8 |
(829) 0x29aa18 CMP %RDI,(%RBX,%R8,8) |
(829) 0x29aa1c JNE 29aa00 |
(829) 0x29aa1e MOV -0x50(%RBP),%R8 |
(829) 0x29aa22 VMOVSD (%R8,%R11,8),%XMM8 |
(829) 0x29aa28 VADDSD %XMM7,%XMM8,%XMM7 |
(829) 0x29aa2c VMAXSD %XMM5,%XMM8,%XMM5 |
(829) 0x29aa30 JMP 29aa00 |
(812) 0x29aa40 CMP %RAX,%R11 |
(812) 0x29aa43 JGE 29aa67 |
(812) 0x29aa45 VMOVAPD %XMM6,%XMM7 |
(812) 0x29aa49 NOPL (%RAX) |
(824) 0x29aa50 VMOVSD (%RSI,%R11,8),%XMM8 |
(824) 0x29aa56 INC %R11 |
(824) 0x29aa59 VADDSD %XMM7,%XMM8,%XMM7 |
(824) 0x29aa5d VMINSD %XMM8,%XMM5,%XMM5 |
(824) 0x29aa62 CMP %R11,%RAX |
(824) 0x29aa65 JNE 29aa50 |
(812) 0x29aa67 MOV (%R12,%R15,8),%RDX |
(812) 0x29aa6b MOV 0x8(%R12,%R15,8),%RDI |
(812) 0x29aa70 CMP %RDI,%RDX |
(812) 0x29aa73 JGE 29ab00 |
(812) 0x29aa79 NOPL (%RAX) |
(823) 0x29aa80 VMOVSD (%RCX,%RDX,8),%XMM8 |
(823) 0x29aa85 INC %RDX |
(823) 0x29aa88 VADDSD %XMM7,%XMM8,%XMM7 |
(823) 0x29aa8c VMINSD %XMM8,%XMM5,%XMM5 |
(823) 0x29aa91 CMP %RDX,%RDI |
(823) 0x29aa94 JNE 29aa80 |
(812) 0x29aa96 JMP 29ab00 |
(812) 0x29aaa0 MOV (%R12,%R15,8),%R11 |
(812) 0x29aaa4 MOV 0x8(%R12,%R15,8),%RDX |
(812) 0x29aaa9 CMP %RDX,%R11 |
(812) 0x29aaac JGE 29ab00 |
(812) 0x29aaae MOV -0x40(%RBP),%RDI |
(812) 0x29aab2 MOV 0x50(%RBP),%R8 |
(812) 0x29aab6 MOV (%RDI,%R15,8),%RDI |
(812) 0x29aaba MOV (%R8),%R8 |
(812) 0x29aabd MOV %R8,-0x50(%RBP) |
(812) 0x29aac1 JMP 29aad8 |
(827) 0x29aad0 INC %R11 |
(827) 0x29aad3 CMP %R11,%RDX |
(827) 0x29aad6 JE 29ab00 |
(827) 0x29aad8 MOV -0x68(%RBP),%R8 |
(827) 0x29aadc MOV -0x70(%RBP),%RBX |
(827) 0x29aae0 MOV (%R8,%R11,8),%R8 |
(827) 0x29aae4 CMP %RDI,(%RBX,%R8,8) |
(827) 0x29aae8 JNE 29aad0 |
(827) 0x29aaea MOV -0x50(%RBP),%R8 |
(827) 0x29aaee VMOVSD (%R8,%R11,8),%XMM8 |
(827) 0x29aaf4 VADDSD %XMM7,%XMM8,%XMM7 |
(827) 0x29aaf8 VMINSD %XMM8,%XMM5,%XMM5 |
(827) 0x29aafd JMP 29aad0 |
(812) 0x29ab00 NOT %R9 |
(812) 0x29ab03 ADD %R9,%RAX |
(812) 0x29ab06 ADD %RAX,-0x30(%RBP) |
(812) 0x29ab0a MOV (%R12,%R10,8),%RAX |
(812) 0x29ab0e SUB (%R12,%R15,8),%RAX |
(812) 0x29ab12 ADD %RAX,-0x38(%RBP) |
(812) 0x29ab16 VUCOMISD %XMM0,%XMM3 |
(812) 0x29ab1a MOV (%R14,%R15,8),%RAX |
(812) 0x29ab1e MOVQ $-0x1,(%R13,%RAX,8) |
(812) 0x29ab27 JBE 29abc0 |
(812) 0x29ab2d VANDPD %XMM4,%XMM6,%XMM8 |
(812) 0x29ab31 VANDPD %XMM4,%XMM7,%XMM7 |
(812) 0x29ab35 VMULSD %XMM0,%XMM8,%XMM8 |
(812) 0x29ab39 VUCOMISD %XMM8,%XMM7 |
(812) 0x29ab3e JBE 29abc0 |
(812) 0x29ab44 MOV (%R14,%R15,8),%RDI |
(812) 0x29ab48 MOV (%R14,%R10,8),%RDX |
(812) 0x29ab4c MOV -0x98(%RBP),%R8 |
(812) 0x29ab53 LEA 0x1(%RDI),%RAX |
(812) 0x29ab57 CMP %RDX,%RAX |
(812) 0x29ab5a JGE 29ab79 |
(812) 0x29ab5c NOPL (%RAX) |
(822) 0x29ab60 MOVQ $-0x1,(%R13,%RAX,8) |
(822) 0x29ab69 INC %RAX |
(822) 0x29ab6c MOV (%R14,%R10,8),%RDX |
(822) 0x29ab70 CMP %RDX,%RAX |
(822) 0x29ab73 JL 29ab60 |
(812) 0x29ab75 MOV (%R14,%R15,8),%RDI |
(812) 0x29ab79 MOV -0x30(%RBP),%RAX |
(812) 0x29ab7d SUB %RDX,%RDI |
(812) 0x29ab80 LEA 0x1(%RDI,%RAX,1),%RAX |
(812) 0x29ab85 MOV %RAX,-0x30(%RBP) |
(812) 0x29ab89 MOV (%R12,%R15,8),%RAX |
(812) 0x29ab8d MOV (%R12,%R10,8),%RDX |
(812) 0x29ab91 CMP %RDX,%RAX |
(812) 0x29ab94 JGE 29a850 |
(812) 0x29ab9a NOPW (%RAX,%RAX,1) |
(821) 0x29aba0 MOVQ $-0x1,(%R8,%RAX,8) |
(821) 0x29aba8 INC %RAX |
(821) 0x29abab MOV (%R12,%R10,8),%RDX |
(821) 0x29abaf CMP %RDX,%RAX |
(821) 0x29abb2 JL 29aba0 |
(812) 0x29abb4 MOV (%R12,%R15,8),%RAX |
(812) 0x29abb8 JMP 29a850 |
(812) 0x29abc0 MOV (%R14,%R15,8),%RAX |
(812) 0x29abc4 MOV 0x20(%RBP),%RDX |
(812) 0x29abc8 INC %RAX |
(812) 0x29abcb CMPQ $0x2,(%RDX) |
(812) 0x29abcf MOV (%R14,%R10,8),%RDX |
(812) 0x29abd3 JL 29ac40 |
(812) 0x29abd5 VUCOMISD %XMM6,%XMM2 |
(812) 0x29abd9 JBE 29ac8c |
(812) 0x29abdf CMP %RDX,%RAX |
(812) 0x29abe2 JGE 29acea |
(812) 0x29abe8 MOV 0x28(%RBP),%RDX |
(812) 0x29abec MOV 0x30(%RBP),%RDI |
(812) 0x29abf0 VMULSD %XMM5,%XMM1,%XMM6 |
(812) 0x29abf4 MOV (%RDX),%RDX |
(812) 0x29abf7 MOV (%RDI),%RDI |
(812) 0x29abfa JMP 29ac1a |
(820) 0x29ac00 MOVQ $-0x1,(%R13,%RAX,8) |
(820) 0x29ac09 DECQ -0x30(%RBP) |
(820) 0x29ac0d INC %RAX |
(820) 0x29ac10 CMP %RAX,(%R14,%R10,8) |
(820) 0x29ac14 JLE 29acea |
(820) 0x29ac1a VUCOMISD (%RSI,%RAX,8),%XMM6 |
(820) 0x29ac1f JAE 29ac00 |
(820) 0x29ac21 MOV (%RDI,%RAX,8),%R8 |
(820) 0x29ac25 MOV (%RDX,%R15,8),%R9 |
(820) 0x29ac29 CMP %R9,(%RDX,%R8,8) |
(820) 0x29ac2d JNE 29ac00 |
(820) 0x29ac2f MOV %R8,(%R13,%RAX,8) |
(820) 0x29ac34 JMP 29ac0d |
(812) 0x29ac40 VUCOMISD %XMM6,%XMM2 |
(812) 0x29ac44 JBE 29ad54 |
(812) 0x29ac4a CMP %RDX,%RAX |
(812) 0x29ac4d JGE 29ada0 |
(812) 0x29ac53 MOV 0x30(%RBP),%RDX |
(812) 0x29ac57 VMULSD %XMM5,%XMM1,%XMM6 |
(812) 0x29ac5b MOV (%RDX),%RDX |
(812) 0x29ac5e JMP 29ac76 |
(816) 0x29ac60 MOV (%RDX,%RAX,8),%RDI |
(816) 0x29ac64 MOV %RDI,(%R13,%RAX,8) |
(816) 0x29ac69 INC %RAX |
(816) 0x29ac6c CMP %RAX,(%R14,%R10,8) |
(816) 0x29ac70 JLE 29ada0 |
(816) 0x29ac76 VUCOMISD (%RSI,%RAX,8),%XMM6 |
(816) 0x29ac7b JB 29ac60 |
(816) 0x29ac7d MOVQ $-0x1,(%R13,%RAX,8) |
(816) 0x29ac86 DECQ -0x30(%RBP) |
(816) 0x29ac8a JMP 29ac69 |
(812) 0x29ac8c CMP %RDX,%RAX |
(812) 0x29ac8f JGE 29adfa |
(812) 0x29ac95 MOV 0x28(%RBP),%RDX |
(812) 0x29ac99 MOV 0x30(%RBP),%RDI |
(812) 0x29ac9d VMULSD %XMM5,%XMM1,%XMM6 |
(812) 0x29aca1 MOV (%RDX),%RDX |
(812) 0x29aca4 MOV (%RDI),%RDI |
(812) 0x29aca7 JMP 29acca |
(818) 0x29acb0 MOVQ $-0x1,(%R13,%RAX,8) |
(818) 0x29acb9 DECQ -0x30(%RBP) |
(818) 0x29acbd INC %RAX |
(818) 0x29acc0 CMP %RAX,(%R14,%R10,8) |
(818) 0x29acc4 JLE 29adfa |
(818) 0x29acca VMOVSD (%RSI,%RAX,8),%XMM7 |
(818) 0x29accf VUCOMISD %XMM6,%XMM7 |
(818) 0x29acd3 JAE 29acb0 |
(818) 0x29acd5 MOV (%RDI,%RAX,8),%R8 |
(818) 0x29acd9 MOV (%RDX,%R15,8),%R9 |
(818) 0x29acdd CMP %R9,(%RDX,%R8,8) |
(818) 0x29ace1 JNE 29acb0 |
(818) 0x29ace3 MOV %R8,(%R13,%RAX,8) |
(818) 0x29ace8 JMP 29acbd |
(812) 0x29acea MOV (%R12,%R15,8),%RAX |
(812) 0x29acee CMP %RAX,(%R12,%R10,8) |
(812) 0x29acf2 JLE 29a857 |
(812) 0x29acf8 MOV 0x40(%RBP),%RDI |
(812) 0x29acfc MOV 0x48(%RBP),%R8 |
(812) 0x29ad00 MOV 0x68(%RBP),%R9 |
(812) 0x29ad04 MOV 0x28(%RBP),%RDX |
(812) 0x29ad08 VMULSD %XMM5,%XMM1,%XMM5 |
(812) 0x29ad0c MOV (%RDX),%RBX |
(812) 0x29ad0f MOV (%RDI),%RDI |
(812) 0x29ad12 MOV (%R8),%R8 |
(812) 0x29ad15 MOV (%R9),%R9 |
(812) 0x29ad18 JMP 29ad39 |
(819) 0x29ad20 MOVQ $-0x1,(%R9,%RAX,8) |
(819) 0x29ad28 DECQ -0x38(%RBP) |
(819) 0x29ad2c INC %RAX |
(819) 0x29ad2f CMP %RAX,(%R12,%R10,8) |
(819) 0x29ad33 JLE 29a857 |
(819) 0x29ad39 VUCOMISD (%RCX,%RAX,8),%XMM5 |
(819) 0x29ad3e JAE 29ad20 |
(819) 0x29ad40 MOV (%R8,%RAX,8),%R11 |
(819) 0x29ad44 MOV (%RBX,%R15,8),%RDX |
(819) 0x29ad48 CMP %RDX,(%RDI,%R11,8) |
(819) 0x29ad4c JNE 29ad20 |
(819) 0x29ad4e MOV %R11,(%R9,%RAX,8) |
(819) 0x29ad52 JMP 29ad2c |
(812) 0x29ad54 VMULSD %XMM5,%XMM1,%XMM5 |
(812) 0x29ad58 CMP %RDX,%RAX |
(812) 0x29ad5b JGE 29ae68 |
(812) 0x29ad61 MOV 0x30(%RBP),%RDX |
(812) 0x29ad65 MOV (%RDX),%RDX |
(812) 0x29ad68 JMP 29ad86 |
(814) 0x29ad70 MOV (%RDX,%RAX,8),%RDI |
(814) 0x29ad74 MOV %RDI,(%R13,%RAX,8) |
(814) 0x29ad79 INC %RAX |
(814) 0x29ad7c CMP %RAX,(%R14,%R10,8) |
(814) 0x29ad80 JLE 29ae68 |
(814) 0x29ad86 VMOVSD (%RSI,%RAX,8),%XMM6 |
(814) 0x29ad8b VUCOMISD %XMM5,%XMM6 |
(814) 0x29ad8f JB 29ad70 |
(814) 0x29ad91 MOVQ $-0x1,(%R13,%RAX,8) |
(814) 0x29ad9a DECQ -0x30(%RBP) |
(814) 0x29ad9e JMP 29ad79 |
(812) 0x29ada0 MOV (%R12,%R15,8),%RAX |
(812) 0x29ada4 CMP %RAX,(%R12,%R10,8) |
(812) 0x29ada8 JLE 29a857 |
(812) 0x29adae MOV 0x68(%RBP),%RDX |
(812) 0x29adb2 MOV 0x48(%RBP),%RDI |
(812) 0x29adb6 VMULSD %XMM5,%XMM1,%XMM5 |
(812) 0x29adba MOV (%RDX),%RDX |
(812) 0x29adbd MOV (%RDI),%RDI |
(812) 0x29adc0 JMP 29ade5 |
(815) 0x29add0 MOV (%RDI,%RAX,8),%R8 |
(815) 0x29add4 MOV %R8,(%RDX,%RAX,8) |
(815) 0x29add8 INC %RAX |
(815) 0x29addb CMP %RAX,(%R12,%R10,8) |
(815) 0x29addf JLE 29a857 |
(815) 0x29ade5 VUCOMISD (%RCX,%RAX,8),%XMM5 |
(815) 0x29adea JB 29add0 |
(815) 0x29adec MOVQ $-0x1,(%RDX,%RAX,8) |
(815) 0x29adf4 DECQ -0x38(%RBP) |
(815) 0x29adf8 JMP 29add8 |
(812) 0x29adfa MOV (%R12,%R15,8),%RAX |
(812) 0x29adfe CMP %RAX,(%R12,%R10,8) |
(812) 0x29ae02 JLE 29a857 |
(812) 0x29ae08 MOV 0x40(%RBP),%RDI |
(812) 0x29ae0c MOV 0x48(%RBP),%R8 |
(812) 0x29ae10 MOV 0x68(%RBP),%R9 |
(812) 0x29ae14 MOV 0x28(%RBP),%RDX |
(812) 0x29ae18 VMULSD %XMM5,%XMM1,%XMM5 |
(812) 0x29ae1c MOV (%RDX),%RBX |
(812) 0x29ae1f MOV (%RDI),%RDI |
(812) 0x29ae22 MOV (%R8),%R8 |
(812) 0x29ae25 MOV (%R9),%R9 |
(812) 0x29ae28 JMP 29ae49 |
(817) 0x29ae30 MOVQ $-0x1,(%R9,%RAX,8) |
(817) 0x29ae38 DECQ -0x38(%RBP) |
(817) 0x29ae3c INC %RAX |
(817) 0x29ae3f CMP %RAX,(%R12,%R10,8) |
(817) 0x29ae43 JLE 29a857 |
(817) 0x29ae49 VMOVSD (%RCX,%RAX,8),%XMM6 |
(817) 0x29ae4e VUCOMISD %XMM5,%XMM6 |
(817) 0x29ae52 JAE 29ae30 |
(817) 0x29ae54 MOV (%R8,%RAX,8),%R11 |
(817) 0x29ae58 MOV (%RBX,%R15,8),%RDX |
(817) 0x29ae5c CMP %RDX,(%RDI,%R11,8) |
(817) 0x29ae60 JNE 29ae30 |
(817) 0x29ae62 MOV %R11,(%R9,%RAX,8) |
(817) 0x29ae66 JMP 29ae3c |
(812) 0x29ae68 MOV (%R12,%R15,8),%RAX |
(812) 0x29ae6c CMP %RAX,(%R12,%R10,8) |
(812) 0x29ae70 JLE 29a857 |
(812) 0x29ae76 MOV 0x68(%RBP),%RDX |
(812) 0x29ae7a MOV 0x48(%RBP),%RDI |
(812) 0x29ae7e MOV (%RDX),%RDX |
(812) 0x29ae81 MOV (%RDI),%RDI |
(812) 0x29ae84 JMP 29aea5 |
(813) 0x29ae90 MOV (%RDI,%RAX,8),%R8 |
(813) 0x29ae94 MOV %R8,(%RDX,%RAX,8) |
(813) 0x29ae98 INC %RAX |
(813) 0x29ae9b CMP %RAX,(%R12,%R10,8) |
(813) 0x29ae9f JLE 29a857 |
(813) 0x29aea5 VMOVSD (%RCX,%RAX,8),%XMM6 |
(813) 0x29aeaa VUCOMISD %XMM5,%XMM6 |
(813) 0x29aeae JB 29ae90 |
(813) 0x29aeb0 MOVQ $-0x1,(%RDX,%RAX,8) |
(813) 0x29aeb8 DECQ -0x38(%RBP) |
(813) 0x29aebc JMP 29ae98 |
0x29aebe MOV -0x90(%RBP),%RAX |
0x29aec5 MOV 0x78(%RBP),%RDX |
0x29aec9 MOV -0x60(%RBP),%RSI |
0x29aecd MOV -0x58(%RBP),%RBX |
0x29aed1 LEA -0x30(%RBP),%RDI |
0x29aed5 MOV (%RAX),%RCX |
0x29aed8 MOV (%RDX),%R8 |
0x29aedb LEA -0x38(%RBP),%RDX |
0x29aedf LEA (%RSI,%RCX,8),%RSI |
0x29aee3 SAL $0x3,%RCX |
0x29aee7 ADD (%RBX),%RCX |
0x29aeea CALL 2d4090 <hypre_prefix_sum_pair> |
0x29aeef MOV -0x80(%RBP),%RSI |
0x29aef3 CMP %RSI,-0x48(%RBP) |
0x29aef7 JLE 29afd2 |
0x29aefd MOV 0x18(%RBP),%R8 |
0x29af01 MOV 0x58(%RBP),%R9 |
0x29af05 MOV 0x38(%RBP),%R10 |
0x29af09 MOV 0x68(%RBP),%R11 |
0x29af0d MOV -0x88(%RBP),%RAX |
0x29af14 MOV 0x80(%RBP),%RCX |
0x29af1b MOV (%RBX),%RDI |
0x29af1e MOV (%RAX),%RDX |
0x29af21 MOV (%R8),%R8 |
0x29af24 MOV (%R9),%R9 |
0x29af27 MOV (%R10),%R10 |
0x29af2a MOV (%R11),%R11 |
0x29af2d JMP 29af3a |
(809) 0x29af30 CMP %RSI,-0x48(%RBP) |
(809) 0x29af34 JLE 29afd2 |
(809) 0x29af3a MOV -0x30(%RBP),%RAX |
(809) 0x29af3e MOV %RSI,%RBX |
(809) 0x29af41 ADD %RAX,(%RDX,%RSI,8) |
(809) 0x29af45 MOV -0x38(%RBP),%RAX |
(809) 0x29af49 ADD %RAX,(%RDI,%RSI,8) |
(809) 0x29af4d MOV (%R8,%RSI,8),%R14 |
(809) 0x29af51 MOV 0x8(%R8,%RSI,8),%R15 |
(809) 0x29af56 INC %RSI |
(809) 0x29af59 CMP %R15,%R14 |
(809) 0x29af5c JGE 29af90 |
(809) 0x29af5e MOV (%RDX,%RBX,8),%R12 |
(809) 0x29af62 MOV (%RCX),%R13 |
(809) 0x29af65 JMP 29af78 |
(811) 0x29af70 INC %R14 |
(811) 0x29af73 CMP %R15,%R14 |
(811) 0x29af76 JGE 29af90 |
(811) 0x29af78 MOV (%R9,%R14,8),%RAX |
(811) 0x29af7c TEST %RAX,%RAX |
(811) 0x29af7f JS 29af70 |
(811) 0x29af81 MOV %RAX,(%R13,%R12,8) |
(811) 0x29af86 INC %R12 |
(811) 0x29af89 MOV (%R8,%RSI,8),%R15 |
(811) 0x29af8d JMP 29af70 |
(809) 0x29af90 MOV (%R10,%RBX,8),%R14 |
(809) 0x29af94 MOV (%R10,%RSI,8),%R15 |
(809) 0x29af98 CMP %R15,%R14 |
(809) 0x29af9b JGE 29af30 |
(809) 0x29af9d MOV 0x88(%RBP),%RAX |
(809) 0x29afa4 MOV (%RDI,%RBX,8),%RBX |
(809) 0x29afa8 MOV (%RAX),%R12 |
(809) 0x29afab JMP 29afbc |
(810) 0x29afb0 INC %R14 |
(810) 0x29afb3 CMP %R15,%R14 |
(810) 0x29afb6 JGE 29af30 |
(810) 0x29afbc MOV (%R11,%R14,8),%RAX |
(810) 0x29afc0 TEST %RAX,%RAX |
(810) 0x29afc3 JS 29afb0 |
(810) 0x29afc5 MOV %RAX,(%R12,%RBX,8) |
(810) 0x29afc9 INC %RBX |
(810) 0x29afcc MOV (%R10,%RSI,8),%R15 |
(810) 0x29afd0 JMP 29afb0 |
0x29afd2 ADD $0x88,%RSP |
0x29afd9 POP %RBX |
0x29afda POP %R12 |
0x29afdc POP %R13 |
0x29afde POP %R14 |
0x29afe0 POP %R15 |
0x29afe2 POP %RBP |
0x29afe3 RET |
Path / |
Source file and lines | par_strength.c:246-513 |
Module | exec |
nb instructions | 96 |
nb uops | 98 |
loop length | 377 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 29 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 16.33 |
Dispatch | 22.67 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x80(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2d3d70 <hypre_GetSimpleThreadPartition> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVQ $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 29aebe <.omp_outlined..2+0x76e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD -0x8b7e1(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x8b809(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RDX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 29a861 <.omp_outlined..2+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x78(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x30(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x38(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD (%RBX),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CALL 2d4090 <hypre_prefix_sum_pair> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RSI,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 29afd2 <.omp_outlined..2+0x882> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R11),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 29af3a <.omp_outlined..2+0x7ea> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | par_strength.c:246-513 |
Module | exec |
nb instructions | 96 |
nb uops | 98 |
loop length | 377 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 29 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 16.33 |
Dispatch | 22.67 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x80(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2d3d70 <hypre_GetSimpleThreadPartition> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVQ $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 29aebe <.omp_outlined..2+0x76e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD -0x8b7e1(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x8b809(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RDX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 29a861 <.omp_outlined..2+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x78(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x30(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x38(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD (%RBX),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CALL 2d4090 <hypre_prefix_sum_pair> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RSI,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 29afd2 <.omp_outlined..2+0x882> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R11),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 29af3a <.omp_outlined..2+0x7ea> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..2#0x29a750– | 0.69 | 0.13 |
▼Loop 812 - par_strength.c:253-472 - exec– | 0.06 | 0.01 |
○Loop 814 - par_strength.c:451-460 - exec | 0.25 | 0.04 |
○Loop 824 - par_strength.c:324-327 - exec | 0.13 | 0.02 |
○Loop 815 - par_strength.c:436-445 - exec | 0 | 0 |
○Loop 819 - par_strength.c:376-386 - exec | 0 | 0 |
○Loop 827 - par_strength.c:297-302 - exec | 0 | 0 |
○Loop 822 - par_strength.c:345-347 - exec | 0 | 0 |
○Loop 826 - par_strength.c:311-314 - exec | 0 | 0 |
○Loop 821 - par_strength.c:351-353 - exec | 0 | 0 |
○Loop 830 - par_strength.c:270-275 - exec | 0 | 0 |
○Loop 820 - par_strength.c:363-373 - exec | 0 | 0 |
○Loop 823 - par_strength.c:329-332 - exec | 0 | 0 |
○Loop 818 - par_strength.c:392-402 - exec | 0 | 0 |
○Loop 828 - par_strength.c:289-294 - exec | 0 | 0 |
○Loop 817 - par_strength.c:405-415 - exec | 0 | 0 |
○Loop 825 - par_strength.c:316-319 - exec | 0 | 0 |
○Loop 816 - par_strength.c:424-433 - exec | 0 | 0 |
○Loop 813 - par_strength.c:463-472 - exec | 0 | 0 |
○Loop 829 - par_strength.c:278-283 - exec | 0 | 0 |
▼Loop 809 - par_strength.c:492-513 - exec– | 0.04 | 0.01 |
○Loop 811 - par_strength.c:498-503 - exec | 0.21 | 0.03 |
○Loop 810 - par_strength.c:508-513 - exec | 0 | 0 |