Function: .omp_outlined..2#0x2a4000 | Module: exec | Source: par_strength.c:246-513 [...] | Coverage: 0.69% |
---|
Function: .omp_outlined..2#0x2a4000 | Module: exec | Source: par_strength.c:246-513 [...] | Coverage: 0.69% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-859-5251/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 246 - 513 |
-------------------------------------------------------------------------------- |
246: #pragma omp parallel private(i,diag,row_scale,row_sum,jA,jS) |
247: #endif |
248: { |
249: HYPRE_Int start, stop; |
250: hypre_GetSimpleThreadPartition(&start, &stop, num_variables); |
251: HYPRE_Int jS_diag = 0, jS_offd = 0; |
252: |
253: for (i = start; i < stop; i++) |
254: { |
255: S_diag_i[i] = jS_diag; |
256: if (num_cols_offd) |
257: { |
258: S_offd_i[i] = jS_offd; |
259: } |
260: |
261: diag = A_diag_data[A_diag_i[i]]; |
262: |
263: /* compute scaling factor and row sum */ |
264: row_scale = 0.0; |
265: row_sum = diag; |
266: if (num_functions > 1) |
267: { |
268: if (diag < 0) |
269: { |
270: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
271: { |
272: if (dof_func[i] == dof_func[A_diag_j[jA]]) |
273: { |
274: row_scale = hypre_max(row_scale, A_diag_data[jA]); |
275: row_sum += A_diag_data[jA]; |
276: } |
277: } |
278: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
279: { |
280: if (dof_func[i] == dof_func_offd[A_offd_j[jA]]) |
281: { |
282: row_scale = hypre_max(row_scale, A_offd_data[jA]); |
283: row_sum += A_offd_data[jA]; |
[...] |
289: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
290: { |
291: if (dof_func[i] == dof_func[A_diag_j[jA]]) |
292: { |
293: row_scale = hypre_min(row_scale, A_diag_data[jA]); |
294: row_sum += A_diag_data[jA]; |
295: } |
296: } |
297: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
298: { |
299: if (dof_func[i] == dof_func_offd[A_offd_j[jA]]) |
300: { |
301: row_scale = hypre_min(row_scale, A_offd_data[jA]); |
302: row_sum += A_offd_data[jA]; |
[...] |
309: if (diag < 0) |
310: { |
311: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
312: { |
313: row_scale = hypre_max(row_scale, A_diag_data[jA]); |
314: row_sum += A_diag_data[jA]; |
315: } |
316: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
317: { |
318: row_scale = hypre_max(row_scale, A_offd_data[jA]); |
319: row_sum += A_offd_data[jA]; |
320: } |
321: } |
322: else |
323: { |
324: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
325: { |
326: row_scale = hypre_min(row_scale, A_diag_data[jA]); |
327: row_sum += A_diag_data[jA]; |
328: } |
329: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
330: { |
331: row_scale = hypre_min(row_scale, A_offd_data[jA]); |
332: row_sum += A_offd_data[jA]; |
333: } |
334: } /* diag >= 0*/ |
335: } /* num_functions <= 1 */ |
336: |
337: jS_diag += A_diag_i[i + 1] - A_diag_i[i] - 1; |
338: jS_offd += A_offd_i[i + 1] - A_offd_i[i]; |
339: |
340: /* compute row entries of S */ |
341: S_temp_diag_j[A_diag_i[i]] = -1; |
342: if ((fabs(row_sum) > fabs(diag)*max_row_sum) && (max_row_sum < 1.0)) |
343: { |
344: /* make all dependencies weak */ |
345: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
346: { |
347: S_temp_diag_j[jA] = -1; |
348: } |
349: jS_diag -= A_diag_i[i + 1] - (A_diag_i[i] + 1); |
350: |
351: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
352: { |
353: S_temp_offd_j[jA] = -1; |
354: } |
355: jS_offd -= A_offd_i[i + 1] - A_offd_i[i]; |
356: } |
357: else |
358: { |
359: if (num_functions > 1) |
360: { |
361: if (diag < 0) |
362: { |
363: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
364: { |
365: if (A_diag_data[jA] <= strength_threshold * row_scale |
366: || dof_func[i] != dof_func[A_diag_j[jA]]) |
367: { |
368: S_temp_diag_j[jA] = -1; |
369: --jS_diag; |
370: } |
371: else |
372: { |
373: S_temp_diag_j[jA] = A_diag_j[jA]; |
374: } |
375: } |
376: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
377: { |
378: if (A_offd_data[jA] <= strength_threshold * row_scale |
379: || dof_func[i] != dof_func_offd[A_offd_j[jA]]) |
380: { |
381: S_temp_offd_j[jA] = -1; |
382: --jS_offd; |
383: } |
384: else |
385: { |
386: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
392: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
393: { |
394: if (A_diag_data[jA] >= strength_threshold * row_scale |
395: || dof_func[i] != dof_func[A_diag_j[jA]]) |
396: { |
397: S_temp_diag_j[jA] = -1; |
398: --jS_diag; |
399: } |
400: else |
401: { |
402: S_temp_diag_j[jA] = A_diag_j[jA]; |
403: } |
404: } |
405: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
406: { |
407: if (A_offd_data[jA] >= strength_threshold * row_scale |
408: || dof_func[i] != dof_func_offd[A_offd_j[jA]]) |
409: { |
410: S_temp_offd_j[jA] = -1; |
411: --jS_offd; |
412: } |
413: else |
414: { |
415: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
422: if (diag < 0) |
423: { |
424: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
425: { |
426: if (A_diag_data[jA] <= strength_threshold * row_scale) |
427: { |
428: S_temp_diag_j[jA] = -1; |
429: --jS_diag; |
430: } |
431: else |
432: { |
433: S_temp_diag_j[jA] = A_diag_j[jA]; |
434: } |
435: } |
436: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
437: { |
438: if (A_offd_data[jA] <= strength_threshold * row_scale) |
439: { |
440: S_temp_offd_j[jA] = -1; |
441: --jS_offd; |
442: } |
443: else |
444: { |
445: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
451: for (jA = A_diag_i[i]+1; jA < A_diag_i[i+1]; jA++) |
452: { |
453: if (A_diag_data[jA] >= strength_threshold * row_scale) |
454: { |
455: S_temp_diag_j[jA] = -1; |
456: --jS_diag; |
457: } |
458: else |
459: { |
460: S_temp_diag_j[jA] = A_diag_j[jA]; |
461: } |
462: } |
463: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
464: { |
465: if (A_offd_data[jA] >= strength_threshold * row_scale) |
466: { |
467: S_temp_offd_j[jA] = -1; |
468: --jS_offd; |
469: } |
470: else |
471: { |
472: S_temp_offd_j[jA] = A_offd_j[jA]; |
[...] |
480: hypre_prefix_sum_pair(&jS_diag, S_diag_i + num_variables, &jS_offd, S_offd_i + num_variables, prefix_sum_workspace); |
[...] |
492: for (i = start; i < stop; i++) |
493: { |
494: S_diag_i[i] += jS_diag; |
495: S_offd_i[i] += jS_offd; |
496: |
497: jS = S_diag_i[i]; |
498: for (jA = A_diag_i[i]; jA < A_diag_i[i+1]; jA++) |
499: { |
500: if (S_temp_diag_j[jA] > -1) |
501: { |
502: S_diag_j[jS] = S_temp_diag_j[jA]; |
503: jS++; |
504: } |
505: } |
506: |
507: jS = S_offd_i[i]; |
508: for (jA = A_offd_i[i]; jA < A_offd_i[i+1]; jA++) |
509: { |
510: if (S_temp_offd_j[jA] > -1) |
511: { |
512: S_offd_j[jS] = S_temp_offd_j[jA]; |
513: jS++; |
0x2a4000 PUSH %RBP |
0x2a4001 MOV %RSP,%RBP |
0x2a4004 PUSH %R15 |
0x2a4006 PUSH %R14 |
0x2a4008 PUSH %R13 |
0x2a400a PUSH %R12 |
0x2a400c PUSH %RBX |
0x2a400d SUB $0x88,%RSP |
0x2a4014 MOV %RDX,-0x90(%RBP) |
0x2a401b MOV (%RDX),%RDX |
0x2a401e LEA -0x80(%RBP),%RDI |
0x2a4022 LEA -0x48(%RBP),%RSI |
0x2a4026 MOV %R9,-0x58(%RBP) |
0x2a402a MOV %R8,-0xa8(%RBP) |
0x2a4031 MOV %RCX,%RBX |
0x2a4034 CALL 2e0ed0 <hypre_GetSimpleThreadPartition> |
0x2a4039 MOVQ $0,-0x30(%RBP) |
0x2a4041 MOVQ $0,-0x38(%RBP) |
0x2a4049 MOV -0x80(%RBP),%R10 |
0x2a404d MOV %RBX,-0x88(%RBP) |
0x2a4054 MOV (%RBX),%RAX |
0x2a4057 MOV %RAX,-0x60(%RBP) |
0x2a405b CMP %R10,-0x48(%RBP) |
0x2a405f JLE 2a479a |
0x2a4065 MOV -0x58(%RBP),%RSI |
0x2a4069 MOV 0x60(%RBP),%RDX |
0x2a406d MOV 0x28(%RBP),%RBX |
0x2a4071 MOV 0x10(%RBP),%RCX |
0x2a4075 MOV 0x30(%RBP),%R11 |
0x2a4079 MOV 0x50(%RBP),%RDI |
0x2a407d MOV 0x48(%RBP),%R8 |
0x2a4081 MOV 0x40(%RBP),%R9 |
0x2a4085 MOV 0x70(%RBP),%RAX |
0x2a4089 VMOVSD -0x94f11(%RIP),%XMM3 |
0x2a4091 VMOVDDUP -0x94f39(%RIP),%XMM4 |
0x2a4099 VXORPD %XMM2,%XMM2,%XMM2 |
0x2a409d MOV (%RSI),%RSI |
0x2a40a0 VMOVSD (%RDX),%XMM0 |
0x2a40a4 MOV (%RBX),%RDX |
0x2a40a7 VMOVSD (%RAX),%XMM1 |
0x2a40ab MOV %RSI,-0xa0(%RBP) |
0x2a40b2 MOV (%RCX),%RSI |
0x2a40b5 MOV 0x18(%RBP),%RCX |
0x2a40b9 MOV %RDX,-0x40(%RBP) |
0x2a40bd MOV (%R11),%RDX |
0x2a40c0 MOV (%RCX),%R14 |
0x2a40c3 MOV 0x38(%RBP),%RCX |
0x2a40c7 MOV %RDX,-0x78(%RBP) |
0x2a40cb MOV (%R8),%RDX |
0x2a40ce MOV (%RCX),%R12 |
0x2a40d1 MOV (%RDI),%RCX |
0x2a40d4 MOV (%R9),%RDI |
0x2a40d7 MOV %RDX,-0x68(%RBP) |
0x2a40db MOV 0x68(%RBP),%RDX |
0x2a40df MOV %RDI,-0x70(%RBP) |
0x2a40e3 MOV 0x58(%RBP),%RDI |
0x2a40e7 MOV (%RDX),%RAX |
0x2a40ea MOV (%RDI),%R13 |
0x2a40ed MOV %RAX,-0x98(%RBP) |
0x2a40f4 JMP 2a4111 |
(921) 0x2a4100 SUB %RDX,%RAX |
(921) 0x2a4103 ADD %RAX,-0x38(%RBP) |
(921) 0x2a4107 CMP %R10,-0x48(%RBP) |
(921) 0x2a410b JLE 2a479a |
(921) 0x2a4111 MOV -0x30(%RBP),%RAX |
(921) 0x2a4115 MOV -0x60(%RBP),%RDX |
(921) 0x2a4119 MOV %R10,%R15 |
(921) 0x2a411c MOV %RAX,(%RDX,%R10,8) |
(921) 0x2a4120 MOV -0xa8(%RBP),%RDX |
(921) 0x2a4127 CMPQ $0,(%RDX) |
(921) 0x2a412b JE 2a413c |
(921) 0x2a412d MOV -0x38(%RBP),%RAX |
(921) 0x2a4131 MOV -0xa0(%RBP),%RDX |
(921) 0x2a4138 MOV %RAX,(%RDX,%R15,8) |
(921) 0x2a413c MOV (%R14,%R15,8),%R9 |
(921) 0x2a4140 MOV 0x20(%RBP),%RDX |
(921) 0x2a4144 MOV 0x8(%R14,%R15,8),%RAX |
(921) 0x2a4149 LEA 0x1(%R15),%R10 |
(921) 0x2a414d VMOVSD (%RSI,%R9,8),%XMM6 |
(921) 0x2a4153 CMPQ $0x2,(%RDX) |
(921) 0x2a4157 LEA 0x1(%R9),%R11 |
(921) 0x2a415b JL 2a41c0 |
(921) 0x2a415d VUCOMISD %XMM6,%XMM2 |
(921) 0x2a4161 VXORPD %XMM5,%XMM5,%XMM5 |
(921) 0x2a4165 VMOVAPD %XMM6,%XMM7 |
(921) 0x2a4169 JBE 2a4230 |
(921) 0x2a416f CMP %RAX,%R11 |
(921) 0x2a4172 JGE 2a4280 |
(921) 0x2a4178 MOV -0x40(%RBP),%RDX |
(921) 0x2a417c VMOVAPD %XMM6,%XMM7 |
(921) 0x2a4180 MOV (%RDX,%R15,8),%RDX |
(921) 0x2a4184 JMP 2a419c |
(939) 0x2a4190 INC %R11 |
(939) 0x2a4193 CMP %R11,%RAX |
(939) 0x2a4196 JE 2a4280 |
(939) 0x2a419c MOV -0x78(%RBP),%RDI |
(939) 0x2a41a0 MOV -0x40(%RBP),%R8 |
(939) 0x2a41a4 MOV (%RDI,%R11,8),%RDI |
(939) 0x2a41a8 CMP %RDX,(%R8,%RDI,8) |
(939) 0x2a41ac JNE 2a4190 |
(939) 0x2a41ae VMOVSD (%RSI,%R11,8),%XMM8 |
(939) 0x2a41b4 VADDSD %XMM7,%XMM8,%XMM7 |
(939) 0x2a41b8 VMAXSD %XMM5,%XMM8,%XMM5 |
(939) 0x2a41bc JMP 2a4190 |
(921) 0x2a41c0 VUCOMISD %XMM6,%XMM2 |
(921) 0x2a41c4 VXORPD %XMM5,%XMM5,%XMM5 |
(921) 0x2a41c8 VMOVAPD %XMM6,%XMM7 |
(921) 0x2a41cc JBE 2a42f0 |
(921) 0x2a41d2 CMP %RAX,%R11 |
(921) 0x2a41d5 JGE 2a41f6 |
(921) 0x2a41d7 VMOVAPD %XMM6,%XMM7 |
(921) 0x2a41db NOPL (%RAX,%RAX,1) |
(935) 0x2a41e0 VMOVSD (%RSI,%R11,8),%XMM8 |
(935) 0x2a41e6 INC %R11 |
(935) 0x2a41e9 VADDSD %XMM7,%XMM8,%XMM7 |
(935) 0x2a41ed VMAXSD %XMM5,%XMM8,%XMM5 |
(935) 0x2a41f1 CMP %R11,%RAX |
(935) 0x2a41f4 JNE 2a41e0 |
(921) 0x2a41f6 MOV (%R12,%R15,8),%RDX |
(921) 0x2a41fa MOV 0x8(%R12,%R15,8),%RDI |
(921) 0x2a41ff CMP %RDI,%RDX |
(921) 0x2a4202 JGE 2a43b0 |
(921) 0x2a4208 NOPL (%RAX,%RAX,1) |
(934) 0x2a4210 VMOVSD (%RCX,%RDX,8),%XMM8 |
(934) 0x2a4215 INC %RDX |
(934) 0x2a4218 VADDSD %XMM7,%XMM8,%XMM7 |
(934) 0x2a421c VMAXSD %XMM5,%XMM8,%XMM5 |
(934) 0x2a4220 CMP %RDX,%RDI |
(934) 0x2a4223 JNE 2a4210 |
(921) 0x2a4225 JMP 2a43b0 |
(921) 0x2a4230 CMP %RAX,%R11 |
(921) 0x2a4233 JGE 2a4350 |
(921) 0x2a4239 MOV -0x40(%RBP),%RDX |
(921) 0x2a423d VMOVAPD %XMM6,%XMM7 |
(921) 0x2a4241 MOV (%RDX,%R15,8),%RDX |
(921) 0x2a4245 JMP 2a425c |
(937) 0x2a4250 INC %R11 |
(937) 0x2a4253 CMP %R11,%RAX |
(937) 0x2a4256 JE 2a4350 |
(937) 0x2a425c MOV -0x78(%RBP),%RDI |
(937) 0x2a4260 MOV -0x40(%RBP),%R8 |
(937) 0x2a4264 MOV (%RDI,%R11,8),%RDI |
(937) 0x2a4268 CMP %RDX,(%R8,%RDI,8) |
(937) 0x2a426c JNE 2a4250 |
(937) 0x2a426e VMOVSD (%RSI,%R11,8),%XMM8 |
(937) 0x2a4274 VADDSD %XMM7,%XMM8,%XMM7 |
(937) 0x2a4278 VMINSD %XMM8,%XMM5,%XMM5 |
(937) 0x2a427d JMP 2a4250 |
(921) 0x2a4280 MOV (%R12,%R15,8),%R11 |
(921) 0x2a4284 MOV 0x8(%R12,%R15,8),%RDX |
(921) 0x2a4289 CMP %RDX,%R11 |
(921) 0x2a428c JGE 2a43b0 |
(921) 0x2a4292 MOV -0x40(%RBP),%RDI |
(921) 0x2a4296 MOV 0x50(%RBP),%R8 |
(921) 0x2a429a MOV (%RDI,%R15,8),%RDI |
(921) 0x2a429e MOV (%R8),%R8 |
(921) 0x2a42a1 MOV %R8,-0x50(%RBP) |
(921) 0x2a42a5 JMP 2a42bc |
(938) 0x2a42b0 INC %R11 |
(938) 0x2a42b3 CMP %R11,%RDX |
(938) 0x2a42b6 JE 2a43b0 |
(938) 0x2a42bc MOV -0x68(%RBP),%R8 |
(938) 0x2a42c0 MOV -0x70(%RBP),%RBX |
(938) 0x2a42c4 MOV (%R8,%R11,8),%R8 |
(938) 0x2a42c8 CMP %RDI,(%RBX,%R8,8) |
(938) 0x2a42cc JNE 2a42b0 |
(938) 0x2a42ce MOV -0x50(%RBP),%R8 |
(938) 0x2a42d2 VMOVSD (%R8,%R11,8),%XMM8 |
(938) 0x2a42d8 VADDSD %XMM7,%XMM8,%XMM7 |
(938) 0x2a42dc VMAXSD %XMM5,%XMM8,%XMM5 |
(938) 0x2a42e0 JMP 2a42b0 |
(921) 0x2a42f0 CMP %RAX,%R11 |
(921) 0x2a42f3 JGE 2a4317 |
(921) 0x2a42f5 VMOVAPD %XMM6,%XMM7 |
(921) 0x2a42f9 NOPL (%RAX) |
(933) 0x2a4300 VMOVSD (%RSI,%R11,8),%XMM8 |
(933) 0x2a4306 INC %R11 |
(933) 0x2a4309 VADDSD %XMM7,%XMM8,%XMM7 |
(933) 0x2a430d VMINSD %XMM8,%XMM5,%XMM5 |
(933) 0x2a4312 CMP %R11,%RAX |
(933) 0x2a4315 JNE 2a4300 |
(921) 0x2a4317 MOV (%R12,%R15,8),%RDX |
(921) 0x2a431b MOV 0x8(%R12,%R15,8),%RDI |
(921) 0x2a4320 CMP %RDI,%RDX |
(921) 0x2a4323 JGE 2a43b0 |
(921) 0x2a4329 NOPL (%RAX) |
(932) 0x2a4330 VMOVSD (%RCX,%RDX,8),%XMM8 |
(932) 0x2a4335 INC %RDX |
(932) 0x2a4338 VADDSD %XMM7,%XMM8,%XMM7 |
(932) 0x2a433c VMINSD %XMM8,%XMM5,%XMM5 |
(932) 0x2a4341 CMP %RDX,%RDI |
(932) 0x2a4344 JNE 2a4330 |
(921) 0x2a4346 JMP 2a43b0 |
(921) 0x2a4350 MOV (%R12,%R15,8),%R11 |
(921) 0x2a4354 MOV 0x8(%R12,%R15,8),%RDX |
(921) 0x2a4359 CMP %RDX,%R11 |
(921) 0x2a435c JGE 2a43b0 |
(921) 0x2a435e MOV -0x40(%RBP),%RDI |
(921) 0x2a4362 MOV 0x50(%RBP),%R8 |
(921) 0x2a4366 MOV (%RDI,%R15,8),%RDI |
(921) 0x2a436a MOV (%R8),%R8 |
(921) 0x2a436d MOV %R8,-0x50(%RBP) |
(921) 0x2a4371 JMP 2a4388 |
(936) 0x2a4380 INC %R11 |
(936) 0x2a4383 CMP %R11,%RDX |
(936) 0x2a4386 JE 2a43b0 |
(936) 0x2a4388 MOV -0x68(%RBP),%R8 |
(936) 0x2a438c MOV -0x70(%RBP),%RBX |
(936) 0x2a4390 MOV (%R8,%R11,8),%R8 |
(936) 0x2a4394 CMP %RDI,(%RBX,%R8,8) |
(936) 0x2a4398 JNE 2a4380 |
(936) 0x2a439a MOV -0x50(%RBP),%R8 |
(936) 0x2a439e VMOVSD (%R8,%R11,8),%XMM8 |
(936) 0x2a43a4 VADDSD %XMM7,%XMM8,%XMM7 |
(936) 0x2a43a8 VMINSD %XMM8,%XMM5,%XMM5 |
(936) 0x2a43ad JMP 2a4380 |
(921) 0x2a43b0 NOT %R9 |
(921) 0x2a43b3 ADD %R9,%RAX |
(921) 0x2a43b6 ADD %RAX,-0x30(%RBP) |
(921) 0x2a43ba MOV (%R12,%R10,8),%RAX |
(921) 0x2a43be SUB (%R12,%R15,8),%RAX |
(921) 0x2a43c2 ADD %RAX,-0x38(%RBP) |
(921) 0x2a43c6 VUCOMISD %XMM0,%XMM3 |
(921) 0x2a43ca MOV (%R14,%R15,8),%RAX |
(921) 0x2a43ce MOVQ $-0x1,(%R13,%RAX,8) |
(921) 0x2a43d7 JBE 2a4470 |
(921) 0x2a43dd VANDPD %XMM4,%XMM6,%XMM8 |
(921) 0x2a43e1 VANDPD %XMM4,%XMM7,%XMM7 |
(921) 0x2a43e5 VMULSD %XMM0,%XMM8,%XMM8 |
(921) 0x2a43e9 VUCOMISD %XMM8,%XMM7 |
(921) 0x2a43ee JBE 2a4470 |
(921) 0x2a43f4 MOV (%R14,%R15,8),%RDI |
(921) 0x2a43f8 MOV (%R14,%R10,8),%RDX |
(921) 0x2a43fc MOV -0x98(%RBP),%R8 |
(921) 0x2a4403 LEA 0x1(%RDI),%RAX |
(921) 0x2a4407 CMP %RDX,%RAX |
(921) 0x2a440a JGE 2a4429 |
(921) 0x2a440c NOPL (%RAX) |
(931) 0x2a4410 MOVQ $-0x1,(%R13,%RAX,8) |
(931) 0x2a4419 INC %RAX |
(931) 0x2a441c MOV (%R14,%R10,8),%RDX |
(931) 0x2a4420 CMP %RDX,%RAX |
(931) 0x2a4423 JL 2a4410 |
(921) 0x2a4425 MOV (%R14,%R15,8),%RDI |
(921) 0x2a4429 MOV -0x30(%RBP),%RAX |
(921) 0x2a442d SUB %RDX,%RDI |
(921) 0x2a4430 LEA 0x1(%RDI,%RAX,1),%RAX |
(921) 0x2a4435 MOV %RAX,-0x30(%RBP) |
(921) 0x2a4439 MOV (%R12,%R15,8),%RAX |
(921) 0x2a443d MOV (%R12,%R10,8),%RDX |
(921) 0x2a4441 CMP %RDX,%RAX |
(921) 0x2a4444 JGE 2a4100 |
(921) 0x2a444a NOPW (%RAX,%RAX,1) |
(930) 0x2a4450 MOVQ $-0x1,(%R8,%RAX,8) |
(930) 0x2a4458 INC %RAX |
(930) 0x2a445b MOV (%R12,%R10,8),%RDX |
(930) 0x2a445f CMP %RDX,%RAX |
(930) 0x2a4462 JL 2a4450 |
(921) 0x2a4464 MOV (%R12,%R15,8),%RAX |
(921) 0x2a4468 JMP 2a4100 |
(921) 0x2a4470 MOV (%R14,%R15,8),%RAX |
(921) 0x2a4474 MOV 0x20(%RBP),%RDX |
(921) 0x2a4478 INC %RAX |
(921) 0x2a447b CMPQ $0x2,(%RDX) |
(921) 0x2a447f MOV (%R14,%R10,8),%RDX |
(921) 0x2a4483 JL 2a4560 |
(921) 0x2a4489 VUCOMISD %XMM6,%XMM2 |
(921) 0x2a448d JBE 2a4616 |
(921) 0x2a4493 CMP %RDX,%RAX |
(921) 0x2a4496 JGE 2a44e9 |
(921) 0x2a4498 MOV 0x28(%RBP),%RDX |
(921) 0x2a449c MOV 0x30(%RBP),%RDI |
(921) 0x2a44a0 VMULSD %XMM5,%XMM1,%XMM6 |
(921) 0x2a44a4 MOV (%RDX),%RDX |
(921) 0x2a44a7 MOV (%RDI),%RDI |
(921) 0x2a44aa JMP 2a44c6 |
(929) 0x2a44b0 MOVQ $-0x1,(%R13,%RAX,8) |
(929) 0x2a44b9 DECQ -0x30(%RBP) |
(929) 0x2a44bd INC %RAX |
(929) 0x2a44c0 CMP %RAX,(%R14,%R10,8) |
(929) 0x2a44c4 JLE 2a44e9 |
(929) 0x2a44c6 VUCOMISD (%RSI,%RAX,8),%XMM6 |
(929) 0x2a44cb JAE 2a44b0 |
(929) 0x2a44cd MOV (%RDI,%RAX,8),%R8 |
(929) 0x2a44d1 MOV (%RDX,%R15,8),%R9 |
(929) 0x2a44d5 CMP %R9,(%RDX,%R8,8) |
(929) 0x2a44d9 JNE 2a44b0 |
(929) 0x2a44db MOV %R8,(%R13,%RAX,8) |
(929) 0x2a44e0 INC %RAX |
(929) 0x2a44e3 CMP %RAX,(%R14,%R10,8) |
(929) 0x2a44e7 JG 2a44c6 |
(921) 0x2a44e9 MOV (%R12,%R15,8),%RAX |
(921) 0x2a44ed CMP %RAX,(%R12,%R10,8) |
(921) 0x2a44f1 JLE 2a4107 |
(921) 0x2a44f7 MOV 0x40(%RBP),%RDI |
(921) 0x2a44fb MOV 0x48(%RBP),%R8 |
(921) 0x2a44ff MOV 0x68(%RBP),%R9 |
(921) 0x2a4503 MOV 0x28(%RBP),%RDX |
(921) 0x2a4507 VMULSD %XMM5,%XMM1,%XMM5 |
(921) 0x2a450b MOV (%RDX),%RBX |
(921) 0x2a450e MOV (%RDI),%RDI |
(921) 0x2a4511 MOV (%R8),%R8 |
(921) 0x2a4514 MOV (%R9),%R9 |
(921) 0x2a4517 JMP 2a4539 |
(928) 0x2a4520 MOVQ $-0x1,(%R9,%RAX,8) |
(928) 0x2a4528 DECQ -0x38(%RBP) |
(928) 0x2a452c INC %RAX |
(928) 0x2a452f CMP %RAX,(%R12,%R10,8) |
(928) 0x2a4533 JLE 2a4107 |
(928) 0x2a4539 VUCOMISD (%RCX,%RAX,8),%XMM5 |
(928) 0x2a453e JAE 2a4520 |
(928) 0x2a4540 MOV (%R8,%RAX,8),%R11 |
(928) 0x2a4544 MOV (%RBX,%R15,8),%RDX |
(928) 0x2a4548 CMP %RDX,(%RDI,%R11,8) |
(928) 0x2a454c JNE 2a4520 |
(928) 0x2a454e MOV %R11,(%R9,%RAX,8) |
(928) 0x2a4552 INC %RAX |
(928) 0x2a4555 CMP (%R12,%R10,8),%RAX |
(928) 0x2a4559 JL 2a4539 |
(921) 0x2a455b JMP 2a4107 |
(921) 0x2a4560 VUCOMISD %XMM6,%XMM2 |
(921) 0x2a4564 JBE 2a46e4 |
(921) 0x2a456a CMP %RDX,%RAX |
(921) 0x2a456d JGE 2a45af |
(921) 0x2a456f MOV 0x30(%RBP),%RDX |
(921) 0x2a4573 VMULSD %XMM5,%XMM1,%XMM6 |
(921) 0x2a4577 MOV (%RDX),%RDX |
(921) 0x2a457a JMP 2a4592 |
(925) 0x2a4580 MOV (%RDX,%RAX,8),%RDI |
(925) 0x2a4584 MOV %RDI,(%R13,%RAX,8) |
(925) 0x2a4589 INC %RAX |
(925) 0x2a458c CMP %RAX,(%R14,%R10,8) |
(925) 0x2a4590 JLE 2a45af |
(925) 0x2a4592 VUCOMISD (%RSI,%RAX,8),%XMM6 |
(925) 0x2a4597 JB 2a4580 |
(925) 0x2a4599 MOVQ $-0x1,(%R13,%RAX,8) |
(925) 0x2a45a2 DECQ -0x30(%RBP) |
(925) 0x2a45a6 INC %RAX |
(925) 0x2a45a9 CMP %RAX,(%R14,%R10,8) |
(925) 0x2a45ad JG 2a4592 |
(921) 0x2a45af MOV (%R12,%R15,8),%RAX |
(921) 0x2a45b3 CMP %RAX,(%R12,%R10,8) |
(921) 0x2a45b7 JLE 2a4107 |
(921) 0x2a45bd MOV 0x68(%RBP),%RDX |
(921) 0x2a45c1 MOV 0x48(%RBP),%RDI |
(921) 0x2a45c5 VMULSD %XMM5,%XMM1,%XMM5 |
(921) 0x2a45c9 MOV (%RDX),%RDX |
(921) 0x2a45cc MOV (%RDI),%RDI |
(921) 0x2a45cf JMP 2a45f5 |
(924) 0x2a45e0 MOV (%RDI,%RAX,8),%R8 |
(924) 0x2a45e4 MOV %R8,(%RDX,%RAX,8) |
(924) 0x2a45e8 INC %RAX |
(924) 0x2a45eb CMP %RAX,(%R12,%R10,8) |
(924) 0x2a45ef JLE 2a4107 |
(924) 0x2a45f5 VUCOMISD (%RCX,%RAX,8),%XMM5 |
(924) 0x2a45fa JB 2a45e0 |
(924) 0x2a45fc MOVQ $-0x1,(%RDX,%RAX,8) |
(924) 0x2a4604 DECQ -0x38(%RBP) |
(924) 0x2a4608 INC %RAX |
(924) 0x2a460b CMP (%R12,%R10,8),%RAX |
(924) 0x2a460f JL 2a45f5 |
(921) 0x2a4611 JMP 2a4107 |
(921) 0x2a4616 CMP %RDX,%RAX |
(921) 0x2a4619 JGE 2a466d |
(921) 0x2a461b MOV 0x28(%RBP),%RDX |
(921) 0x2a461f MOV 0x30(%RBP),%RDI |
(921) 0x2a4623 VMULSD %XMM5,%XMM1,%XMM6 |
(921) 0x2a4627 MOV (%RDX),%RDX |
(921) 0x2a462a MOV (%RDI),%RDI |
(921) 0x2a462d JMP 2a4646 |
(927) 0x2a4630 MOVQ $-0x1,(%R13,%RAX,8) |
(927) 0x2a4639 DECQ -0x30(%RBP) |
(927) 0x2a463d INC %RAX |
(927) 0x2a4640 CMP %RAX,(%R14,%R10,8) |
(927) 0x2a4644 JLE 2a466d |
(927) 0x2a4646 VMOVSD (%RSI,%RAX,8),%XMM7 |
(927) 0x2a464b VUCOMISD %XMM6,%XMM7 |
(927) 0x2a464f JAE 2a4630 |
(927) 0x2a4651 MOV (%RDI,%RAX,8),%R8 |
(927) 0x2a4655 MOV (%RDX,%R15,8),%R9 |
(927) 0x2a4659 CMP %R9,(%RDX,%R8,8) |
(927) 0x2a465d JNE 2a4630 |
(927) 0x2a465f MOV %R8,(%R13,%RAX,8) |
(927) 0x2a4664 INC %RAX |
(927) 0x2a4667 CMP %RAX,(%R14,%R10,8) |
(927) 0x2a466b JG 2a4646 |
(921) 0x2a466d MOV (%R12,%R15,8),%RAX |
(921) 0x2a4671 CMP %RAX,(%R12,%R10,8) |
(921) 0x2a4675 JLE 2a4107 |
(921) 0x2a467b MOV 0x40(%RBP),%RDI |
(921) 0x2a467f MOV 0x48(%RBP),%R8 |
(921) 0x2a4683 MOV 0x68(%RBP),%R9 |
(921) 0x2a4687 MOV 0x28(%RBP),%RDX |
(921) 0x2a468b VMULSD %XMM5,%XMM1,%XMM5 |
(921) 0x2a468f MOV (%RDX),%RBX |
(921) 0x2a4692 MOV (%RDI),%RDI |
(921) 0x2a4695 MOV (%R8),%R8 |
(921) 0x2a4698 MOV (%R9),%R9 |
(921) 0x2a469b JMP 2a46b9 |
(926) 0x2a46a0 MOVQ $-0x1,(%R9,%RAX,8) |
(926) 0x2a46a8 DECQ -0x38(%RBP) |
(926) 0x2a46ac INC %RAX |
(926) 0x2a46af CMP %RAX,(%R12,%R10,8) |
(926) 0x2a46b3 JLE 2a4107 |
(926) 0x2a46b9 VMOVSD (%RCX,%RAX,8),%XMM6 |
(926) 0x2a46be VUCOMISD %XMM5,%XMM6 |
(926) 0x2a46c2 JAE 2a46a0 |
(926) 0x2a46c4 MOV (%R8,%RAX,8),%R11 |
(926) 0x2a46c8 MOV (%RBX,%R15,8),%RDX |
(926) 0x2a46cc CMP %RDX,(%RDI,%R11,8) |
(926) 0x2a46d0 JNE 2a46a0 |
(926) 0x2a46d2 MOV %R11,(%R9,%RAX,8) |
(926) 0x2a46d6 INC %RAX |
(926) 0x2a46d9 CMP (%R12,%R10,8),%RAX |
(926) 0x2a46dd JL 2a46b9 |
(921) 0x2a46df JMP 2a4107 |
(921) 0x2a46e4 VMULSD %XMM5,%XMM1,%XMM5 |
(921) 0x2a46e8 CMP %RDX,%RAX |
(921) 0x2a46eb JGE 2a4733 |
(921) 0x2a46ed MOV 0x30(%RBP),%RDX |
(921) 0x2a46f1 MOV (%RDX),%RDX |
(921) 0x2a46f4 JMP 2a4712 |
(923) 0x2a4700 MOV (%RDX,%RAX,8),%RDI |
(923) 0x2a4704 MOV %RDI,(%R13,%RAX,8) |
(923) 0x2a4709 INC %RAX |
(923) 0x2a470c CMP %RAX,(%R14,%R10,8) |
(923) 0x2a4710 JLE 2a4733 |
(923) 0x2a4712 VMOVSD (%RSI,%RAX,8),%XMM6 |
(923) 0x2a4717 VUCOMISD %XMM5,%XMM6 |
(923) 0x2a471b JB 2a4700 |
(923) 0x2a471d MOVQ $-0x1,(%R13,%RAX,8) |
(923) 0x2a4726 DECQ -0x30(%RBP) |
(923) 0x2a472a INC %RAX |
(923) 0x2a472d CMP %RAX,(%R14,%R10,8) |
(923) 0x2a4731 JG 2a4712 |
(921) 0x2a4733 MOV (%R12,%R15,8),%RAX |
(921) 0x2a4737 CMP %RAX,(%R12,%R10,8) |
(921) 0x2a473b JLE 2a4107 |
(921) 0x2a4741 MOV 0x68(%RBP),%RDX |
(921) 0x2a4745 MOV 0x48(%RBP),%RDI |
(921) 0x2a4749 MOV (%RDX),%RDX |
(921) 0x2a474c MOV (%RDI),%RDI |
(921) 0x2a474f JMP 2a4775 |
(922) 0x2a4760 MOV (%RDI,%RAX,8),%R8 |
(922) 0x2a4764 MOV %R8,(%RDX,%RAX,8) |
(922) 0x2a4768 INC %RAX |
(922) 0x2a476b CMP %RAX,(%R12,%R10,8) |
(922) 0x2a476f JLE 2a4107 |
(922) 0x2a4775 VMOVSD (%RCX,%RAX,8),%XMM6 |
(922) 0x2a477a VUCOMISD %XMM5,%XMM6 |
(922) 0x2a477e JB 2a4760 |
(922) 0x2a4780 MOVQ $-0x1,(%RDX,%RAX,8) |
(922) 0x2a4788 DECQ -0x38(%RBP) |
(922) 0x2a478c INC %RAX |
(922) 0x2a478f CMP (%R12,%R10,8),%RAX |
(922) 0x2a4793 JL 2a4775 |
(921) 0x2a4795 JMP 2a4107 |
0x2a479a MOV -0x90(%RBP),%RAX |
0x2a47a1 MOV 0x78(%RBP),%RDX |
0x2a47a5 MOV -0x60(%RBP),%RSI |
0x2a47a9 MOV -0x58(%RBP),%RBX |
0x2a47ad LEA -0x30(%RBP),%RDI |
0x2a47b1 MOV (%RAX),%RCX |
0x2a47b4 MOV (%RDX),%R8 |
0x2a47b7 LEA -0x38(%RBP),%RDX |
0x2a47bb LEA (%RSI,%RCX,8),%RSI |
0x2a47bf SAL $0x3,%RCX |
0x2a47c3 ADD (%RBX),%RCX |
0x2a47c6 CALL 2e11f0 <hypre_prefix_sum_pair> |
0x2a47cb MOV -0x80(%RBP),%RSI |
0x2a47cf CMP %RSI,-0x48(%RBP) |
0x2a47d3 JLE 2a48b2 |
0x2a47d9 MOV 0x18(%RBP),%R8 |
0x2a47dd MOV 0x58(%RBP),%R9 |
0x2a47e1 MOV 0x38(%RBP),%R10 |
0x2a47e5 MOV 0x68(%RBP),%R11 |
0x2a47e9 MOV -0x88(%RBP),%RAX |
0x2a47f0 MOV 0x80(%RBP),%RCX |
0x2a47f7 MOV (%RBX),%RDI |
0x2a47fa MOV (%RAX),%RDX |
0x2a47fd MOV (%R8),%R8 |
0x2a4800 MOV (%R9),%R9 |
0x2a4803 MOV (%R10),%R10 |
0x2a4806 MOV (%R11),%R11 |
0x2a4809 JMP 2a481a |
(918) 0x2a4810 CMP %RSI,-0x48(%RBP) |
(918) 0x2a4814 JLE 2a48b2 |
(918) 0x2a481a MOV -0x30(%RBP),%RAX |
(918) 0x2a481e MOV %RSI,%RBX |
(918) 0x2a4821 ADD %RAX,(%RDX,%RSI,8) |
(918) 0x2a4825 MOV -0x38(%RBP),%RAX |
(918) 0x2a4829 ADD %RAX,(%RDI,%RSI,8) |
(918) 0x2a482d MOV (%R8,%RSI,8),%R14 |
(918) 0x2a4831 MOV 0x8(%R8,%RSI,8),%R15 |
(918) 0x2a4836 INC %RSI |
(918) 0x2a4839 CMP %R15,%R14 |
(918) 0x2a483c JGE 2a4870 |
(918) 0x2a483e MOV (%RDX,%RBX,8),%R12 |
(918) 0x2a4842 MOV (%RCX),%R13 |
(918) 0x2a4845 JMP 2a4858 |
(920) 0x2a4850 INC %R14 |
(920) 0x2a4853 CMP %R15,%R14 |
(920) 0x2a4856 JGE 2a4870 |
(920) 0x2a4858 MOV (%R9,%R14,8),%RAX |
(920) 0x2a485c TEST %RAX,%RAX |
(920) 0x2a485f JS 2a4850 |
(920) 0x2a4861 MOV %RAX,(%R13,%R12,8) |
(920) 0x2a4866 INC %R12 |
(920) 0x2a4869 MOV (%R8,%RSI,8),%R15 |
(920) 0x2a486d JMP 2a4850 |
(918) 0x2a4870 MOV (%R10,%RBX,8),%R14 |
(918) 0x2a4874 MOV (%R10,%RSI,8),%R15 |
(918) 0x2a4878 CMP %R15,%R14 |
(918) 0x2a487b JGE 2a4810 |
(918) 0x2a487d MOV 0x88(%RBP),%RAX |
(918) 0x2a4884 MOV (%RDI,%RBX,8),%RBX |
(918) 0x2a4888 MOV (%RAX),%R12 |
(918) 0x2a488b JMP 2a489c |
(919) 0x2a4890 INC %R14 |
(919) 0x2a4893 CMP %R15,%R14 |
(919) 0x2a4896 JGE 2a4810 |
(919) 0x2a489c MOV (%R11,%R14,8),%RAX |
(919) 0x2a48a0 TEST %RAX,%RAX |
(919) 0x2a48a3 JS 2a4890 |
(919) 0x2a48a5 MOV %RAX,(%R12,%RBX,8) |
(919) 0x2a48a9 INC %RBX |
(919) 0x2a48ac MOV (%R10,%RSI,8),%R15 |
(919) 0x2a48b0 JMP 2a4890 |
0x2a48b2 ADD $0x88,%RSP |
0x2a48b9 POP %RBX |
0x2a48ba POP %R12 |
0x2a48bc POP %R13 |
0x2a48be POP %R14 |
0x2a48c0 POP %R15 |
0x2a48c2 POP %RBP |
0x2a48c3 RET |
Path / |
Source file and lines | par_strength.c:246-513 |
Module | exec |
nb instructions | 96 |
nb uops | 98 |
loop length | 377 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 29 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 16.33 |
Dispatch | 22.67 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x80(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e0ed0 <hypre_GetSimpleThreadPartition> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVQ $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 2a479a <.omp_outlined..2+0x79a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD -0x94f11(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x94f39(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RDX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2a4111 <.omp_outlined..2+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x78(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x30(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x38(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD (%RBX),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CALL 2e11f0 <hypre_prefix_sum_pair> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RSI,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 2a48b2 <.omp_outlined..2+0x8b2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R11),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 2a481a <.omp_outlined..2+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | par_strength.c:246-513 |
Module | exec |
nb instructions | 96 |
nb uops | 98 |
loop length | 377 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 29 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 3.75 | 3.75 | 3.50 | 3.50 | 3.50 | 22.67 | 22.67 | 22.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 16.33 |
Dispatch | 22.67 |
Overall L1 | 22.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 15% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x80(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e0ed0 <hypre_GetSimpleThreadPartition> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVQ $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R10,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 2a479a <.omp_outlined..2+0x79a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD -0x94f11(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x94f39(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RDX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD (%RAX),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2a4111 <.omp_outlined..2+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x78(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x30(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA -0x38(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD (%RBX),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CALL 2e11f0 <hypre_prefix_sum_pair> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x80(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RSI,-0x48(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JLE 2a48b2 <.omp_outlined..2+0x8b2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R11),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 2a481a <.omp_outlined..2+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..2#0x2a4000– | 0.69 | 0.13 |
▼Loop 921 - par_strength.c:253-472 - exec– | 0.06 | 0.01 |
○Loop 923 - par_strength.c:451-460 - exec | 0.26 | 0.04 |
○Loop 933 - par_strength.c:324-327 - exec | 0.13 | 0.02 |
○Loop 922 - par_strength.c:463-472 - exec | 0 | 0 |
○Loop 928 - par_strength.c:376-386 - exec | 0 | 0 |
○Loop 926 - par_strength.c:405-415 - exec | 0 | 0 |
○Loop 934 - par_strength.c:316-319 - exec | 0 | 0 |
○Loop 930 - par_strength.c:351-353 - exec | 0 | 0 |
○Loop 937 - par_strength.c:289-294 - exec | 0 | 0 |
○Loop 932 - par_strength.c:329-332 - exec | 0 | 0 |
○Loop 936 - par_strength.c:297-302 - exec | 0 | 0 |
○Loop 925 - par_strength.c:424-433 - exec | 0 | 0 |
○Loop 927 - par_strength.c:392-402 - exec | 0 | 0 |
○Loop 929 - par_strength.c:363-373 - exec | 0 | 0 |
○Loop 935 - par_strength.c:311-314 - exec | 0 | 0 |
○Loop 931 - par_strength.c:345-347 - exec | 0 | 0 |
○Loop 938 - par_strength.c:278-283 - exec | 0 | 0 |
○Loop 924 - par_strength.c:436-445 - exec | 0 | 0 |
○Loop 939 - par_strength.c:270-275 - exec | 0 | 0 |
▼Loop 918 - par_strength.c:492-513 - exec– | 0.04 | 0.01 |
○Loop 920 - par_strength.c:498-503 - exec | 0.21 | 0.03 |
○Loop 919 - par_strength.c:508-513 - exec | 0 | 0 |