Function: hypre_ParCSRRelaxThreads._omp_fn.1 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 34.95% |
---|
Function: hypre_ParCSRRelaxThreads._omp_fn.1 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 34.95% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
[...] |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
0x12690 PUSH %RBP |
0x12691 MOV %RSP,%RBP |
0x12694 PUSH %R15 |
0x12696 PUSH %R14 |
0x12698 PUSH %R13 |
0x1269a MOV %RDI,%R13 |
0x1269d PUSH %R12 |
0x1269f PUSH %RBX |
0x126a0 AND $-0x20,%RSP |
0x126a4 SUB $0x40,%RSP |
0x126a8 CALL c110 <omp_get_num_threads@plt> |
0x126ad MOV %EAX,%EBX |
0x126af CALL c950 <omp_get_thread_num@plt> |
0x126b4 MOVSXD %EBX,%RSI |
0x126b7 MOVSXD %EAX,%RCX |
0x126ba MOV 0x40(%R13),%RAX |
0x126be CQTO |
0x126c0 IDIV %RSI |
0x126c3 CMP %RDX,%RCX |
0x126c6 JL 12cf8 |
0x126cc IMUL %RAX,%RCX |
0x126d0 ADD %RCX,%RDX |
0x126d3 ADD %RDX,%RAX |
0x126d6 MOV %RAX,0x28(%RSP) |
0x126db CMP %RAX,%RDX |
0x126de JGE 12ce6 |
0x126e4 MOV 0x30(%R13),%R11 |
0x126e8 MOV 0x50(%R13),%RDI |
0x126ec KXNORB %K1,%K1,%K1 |
0x126f0 MOV 0x48(%R13),%R10 |
0x126f4 MOV 0x28(%R13),%R14 |
0x126f8 MOV 0x18(%R13),%R15 |
0x126fc VMOVSD 0x68(%R13),%XMM3 |
0x12702 MOV %R11,0x38(%RSP) |
0x12707 MOV 0x60(%R13),%R9 |
0x1270b MOV 0x58(%R13),%R8 |
0x1270f MOV %RDI,0x20(%RSP) |
0x12714 MOV 0x38(%R13),%R12 |
0x12718 MOV 0x20(%R13),%RBX |
0x1271c MOV %R10,0x18(%RSP) |
0x12721 MOV 0x10(%R13),%R11 |
0x12725 VMOVSD 0x8(%R13),%XMM4 |
0x1272b MOV %R14,0x10(%RSP) |
0x12730 MOV (%R13),%R13 |
0x12734 MOV %R15,0x30(%RSP) |
0x12739 MOV %R13,0x8(%RSP) |
0x1273e JMP 1274e |
(44) 0x12740 INC %RDX |
(44) 0x12743 CMP %RDX,0x28(%RSP) |
(44) 0x12748 JE 12ce3 |
(44) 0x1274e MOV 0x30(%RSP),%RCX |
(44) 0x12753 MOV (%RCX,%RDX,8),%RAX |
(44) 0x12757 LEA (,%RAX,8),%RSI |
(44) 0x1275f MOV %RAX,%R14 |
(44) 0x12762 LEA (%R11,%RSI,1),%RDI |
(44) 0x12766 VCOMISD (%RDI),%XMM3 |
(44) 0x1276a JE 12740 |
(44) 0x1276c MOV 0x20(%RSP),%R10 |
(44) 0x12771 MOV 0x30(%RSP),%R15 |
(44) 0x12776 VMOVSD (%R10,%RDX,8),%XMM2 |
(44) 0x1277c MOV 0x8(%R15,%RDX,8),%R10 |
(44) 0x12781 CMP %R10,%RAX |
(44) 0x12784 JGE 12d60 |
(44) 0x1278a SUB %RAX,%R10 |
(44) 0x1278d LEA -0x1(%R10),%R13 |
(44) 0x12791 CMP $0x2,%R13 |
(44) 0x12795 JBE 12d79 |
(44) 0x1279b MOV %R10,%R15 |
(44) 0x1279e ADD %RBX,%RSI |
(44) 0x127a1 VXORPD %XMM0,%XMM0,%XMM0 |
(44) 0x127a5 XOR %ECX,%ECX |
(44) 0x127a7 SHR $0x2,%R15 |
(44) 0x127ab SAL $0x5,%R15 |
(44) 0x127af LEA -0x20(%R15),%R13 |
(44) 0x127b3 SHR $0x5,%R13 |
(44) 0x127b7 INC %R13 |
(44) 0x127ba AND $0x7,%R13D |
(44) 0x127be JE 12899 |
(44) 0x127c4 CMP $0x1,%R13 |
(44) 0x127c8 JE 12876 |
(44) 0x127ce CMP $0x2,%R13 |
(44) 0x127d2 JE 1285c |
(44) 0x127d8 CMP $0x3,%R13 |
(44) 0x127dc JE 12842 |
(44) 0x127de CMP $0x4,%R13 |
(44) 0x127e2 JE 12828 |
(44) 0x127e4 CMP $0x5,%R13 |
(44) 0x127e8 JE 1280e |
(44) 0x127ea CMP $0x6,%R13 |
(44) 0x127ee JNE 12d08 |
(44) 0x127f4 VMOVDQU (%RSI,%RCX,1),%YMM7 |
(44) 0x127f9 KMOVB %K1,%K3 |
(44) 0x127fd VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} |
(44) 0x12804 VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 |
(44) 0x1280a ADD $0x20,%RCX |
(44) 0x1280e VMOVDQU (%RSI,%RCX,1),%YMM8 |
(44) 0x12813 KMOVB %K1,%K2 |
(44) 0x12817 VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} |
(44) 0x1281e VFNMADD231PD (%RDI,%RCX,1),%YMM9,%YMM0 |
(44) 0x12824 ADD $0x20,%RCX |
(44) 0x12828 VMOVDQU (%RSI,%RCX,1),%YMM10 |
(44) 0x1282d KMOVB %K1,%K4 |
(44) 0x12831 VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} |
(44) 0x12838 VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 |
(44) 0x1283e ADD $0x20,%RCX |
(44) 0x12842 VMOVDQU (%RSI,%RCX,1),%YMM12 |
(44) 0x12847 KMOVB %K1,%K7 |
(44) 0x1284b VGATHERQPD (%R8,%YMM12,8),%YMM13{%K7} |
(44) 0x12852 VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 |
(44) 0x12858 ADD $0x20,%RCX |
(44) 0x1285c VMOVDQU (%RSI,%RCX,1),%YMM14 |
(44) 0x12861 KMOVB %K1,%K6 |
(44) 0x12865 VGATHERQPD (%R8,%YMM14,8),%YMM15{%K6} |
(44) 0x1286c VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 |
(44) 0x12872 ADD $0x20,%RCX |
(44) 0x12876 VMOVDQU (%RSI,%RCX,1),%YMM6 |
(44) 0x1287b KMOVB %K1,%K5 |
(44) 0x1287f VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} |
(44) 0x12886 VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 |
(44) 0x1288c ADD $0x20,%RCX |
(44) 0x12890 CMP %RCX,%R15 |
(44) 0x12893 JE 1297f |
(46) 0x12899 VMOVDQU (%RSI,%RCX,1),%YMM7 |
(46) 0x1289e KMOVB %K1,%K3 |
(46) 0x128a2 VMOVDQU 0x20(%RSI,%RCX,1),%YMM8 |
(46) 0x128a8 KMOVB %K1,%K2 |
(46) 0x128ac KMOVB %K1,%K4 |
(46) 0x128b0 KMOVB %K1,%K7 |
(46) 0x128b4 VMOVDQU 0x40(%RSI,%RCX,1),%YMM10 |
(46) 0x128ba VMOVDQU 0x60(%RSI,%RCX,1),%YMM12 |
(46) 0x128c0 KMOVB %K1,%K6 |
(46) 0x128c4 KMOVB %K1,%K5 |
(46) 0x128c8 VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} |
(46) 0x128cf VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} |
(46) 0x128d6 VMOVDQU 0x80(%RSI,%RCX,1),%YMM13 |
(46) 0x128df VMOVDQU 0xa0(%RSI,%RCX,1),%YMM15 |
(46) 0x128e8 KMOVB %K1,%K3 |
(46) 0x128ec KMOVB %K1,%K2 |
(46) 0x128f0 VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 |
(46) 0x128f6 VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} |
(46) 0x128fd VMOVDQU 0xc0(%RSI,%RCX,1),%YMM6 |
(46) 0x12906 VGATHERQPD (%R8,%YMM13,8),%YMM14{%K6} |
(46) 0x1290d VGATHERQPD (%R8,%YMM15,8),%YMM7{%K5} |
(46) 0x12914 VMOVDQU 0xe0(%RSI,%RCX,1),%YMM5 |
(46) 0x1291d VGATHERQPD (%R8,%YMM6,8),%YMM1{%K3} |
(46) 0x12924 VFNMADD231PD 0x20(%RDI,%RCX,1),%YMM9,%YMM0 |
(46) 0x1292b VFNMADD132PD 0x40(%RDI,%RCX,1),%YMM0,%YMM11 |
(46) 0x12932 VGATHERQPD (%R8,%YMM12,8),%YMM0{%K7} |
(46) 0x12939 VFNMADD132PD 0x60(%RDI,%RCX,1),%YMM11,%YMM0 |
(46) 0x12940 VFNMADD132PD 0x80(%RDI,%RCX,1),%YMM0,%YMM14 |
(46) 0x1294a VGATHERQPD (%R8,%YMM5,8),%YMM0{%K2} |
(46) 0x12951 VFNMADD132PD 0xa0(%RDI,%RCX,1),%YMM14,%YMM7 |
(46) 0x1295b VFNMADD132PD 0xc0(%RDI,%RCX,1),%YMM7,%YMM1 |
(46) 0x12965 VFNMADD132PD 0xe0(%RDI,%RCX,1),%YMM1,%YMM0 |
(46) 0x1296f ADD $0x100,%RCX |
(46) 0x12976 CMP %RCX,%R15 |
(46) 0x12979 JNE 12899 |
(44) 0x1297f VEXTRACTF64X2 $0x1,%YMM0,%XMM8 |
(44) 0x12986 VADDPD %XMM0,%XMM8,%XMM9 |
(44) 0x1298a VUNPCKHPD %XMM9,%XMM9,%XMM10 |
(44) 0x1298f VADDPD %XMM9,%XMM10,%XMM11 |
(44) 0x12994 VADDSD %XMM11,%XMM2,%XMM1 |
(44) 0x12999 TEST $0x3,%R10B |
(44) 0x1299d JE 129fa |
(44) 0x1299f MOV %R10,%RCX |
(44) 0x129a2 VADDPD %XMM8,%XMM0,%XMM0 |
(44) 0x129a7 AND $-0x4,%RCX |
(44) 0x129ab ADD %RCX,%RAX |
(44) 0x129ae SUB %RCX,%R10 |
(44) 0x129b1 CMP $0x1,%R10 |
(44) 0x129b5 JE 129ea |
(44) 0x129b7 ADD %R14,%RCX |
(44) 0x129ba KMOVB %K1,%K4 |
(44) 0x129be VMOVDQU (%RBX,%RCX,8),%XMM12 |
(44) 0x129c3 VGATHERQPD (%R8,%XMM12,8),%XMM13{%K4} |
(44) 0x129ca VFNMADD231PD (%R11,%RCX,8),%XMM13,%XMM0 |
(44) 0x129d0 VUNPCKHPD %XMM0,%XMM0,%XMM14 |
(44) 0x129d4 VADDPD %XMM0,%XMM14,%XMM15 |
(44) 0x129d8 VADDSD %XMM15,%XMM2,%XMM1 |
(44) 0x129dd TEST $0x1,%R10B |
(44) 0x129e1 JE 129fa |
(44) 0x129e3 AND $-0x2,%R10 |
(44) 0x129e7 ADD %R10,%RAX |
(44) 0x129ea MOV (%RBX,%RAX,8),%R14 |
(44) 0x129ee VMOVSD (%R11,%RAX,8),%XMM2 |
(44) 0x129f4 VFNMADD231SD (%R8,%R14,8),%XMM2,%XMM1 |
(44) 0x129fa MOV 0x10(%RSP),%RAX |
(44) 0x129ff MOV (%RAX,%RDX,8),%RCX |
(44) 0x12a03 MOV 0x8(%RAX,%RDX,8),%RSI |
(44) 0x12a08 CMP %RSI,%RCX |
(44) 0x12a0b JGE 12d50 |
(44) 0x12a11 SUB %RCX,%RSI |
(44) 0x12a14 MOV %RCX,%R15 |
(44) 0x12a17 LEA -0x1(%RSI),%RDI |
(44) 0x12a1b CMP $0x2,%RDI |
(44) 0x12a1f JBE 12d69 |
(44) 0x12a25 MOV 0x38(%RSP),%R14 |
(44) 0x12a2a LEA (,%RCX,8),%RDI |
(44) 0x12a32 XOR %EAX,%EAX |
(44) 0x12a34 VXORPD %XMM2,%XMM2,%XMM2 |
(44) 0x12a38 LEA (%R12,%RDI,1),%R13 |
(44) 0x12a3c ADD %R14,%RDI |
(44) 0x12a3f MOV %RSI,%R14 |
(44) 0x12a42 SHR $0x2,%R14 |
(44) 0x12a46 SAL $0x5,%R14 |
(44) 0x12a4a LEA -0x20(%R14),%R10 |
(44) 0x12a4e SHR $0x5,%R10 |
(44) 0x12a52 INC %R10 |
(44) 0x12a55 AND $0x7,%R10D |
(44) 0x12a59 JE 12b40 |
(44) 0x12a5f CMP $0x1,%R10 |
(44) 0x12a63 JE 12b1b |
(44) 0x12a69 CMP $0x2,%R10 |
(44) 0x12a6d JE 12aff |
(44) 0x12a73 CMP $0x3,%R10 |
(44) 0x12a77 JE 12ae3 |
(44) 0x12a79 CMP $0x4,%R10 |
(44) 0x12a7d JE 12ac7 |
(44) 0x12a7f CMP $0x5,%R10 |
(44) 0x12a83 JE 12aab |
(44) 0x12a85 CMP $0x6,%R10 |
(44) 0x12a89 JNE 12d30 |
(44) 0x12a8f VMOVDQU (%R13,%RAX,1),%YMM8 |
(44) 0x12a96 KMOVB %K1,%K6 |
(44) 0x12a9a VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} |
(44) 0x12aa1 VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 |
(44) 0x12aa7 ADD $0x20,%RAX |
(44) 0x12aab VMOVDQU (%R13,%RAX,1),%YMM9 |
(44) 0x12ab2 KMOVB %K1,%K5 |
(44) 0x12ab6 VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} |
(44) 0x12abd VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM2 |
(44) 0x12ac3 ADD $0x20,%RAX |
(44) 0x12ac7 VMOVDQU (%R13,%RAX,1),%YMM11 |
(44) 0x12ace KMOVB %K1,%K3 |
(44) 0x12ad2 VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} |
(44) 0x12ad9 VFNMADD231PD (%RDI,%RAX,1),%YMM12,%YMM2 |
(44) 0x12adf ADD $0x20,%RAX |
(44) 0x12ae3 VMOVDQU (%R13,%RAX,1),%YMM13 |
(44) 0x12aea KMOVB %K1,%K2 |
(44) 0x12aee VGATHERQPD (%R9,%YMM13,8),%YMM14{%K2} |
(44) 0x12af5 VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM2 |
(44) 0x12afb ADD $0x20,%RAX |
(44) 0x12aff VMOVDQU (%R13,%RAX,1),%YMM15 |
(44) 0x12b06 KMOVB %K1,%K4 |
(44) 0x12b0a VGATHERQPD (%R9,%YMM15,8),%YMM6{%K4} |
(44) 0x12b11 VFNMADD231PD (%RDI,%RAX,1),%YMM6,%YMM2 |
(44) 0x12b17 ADD $0x20,%RAX |
(44) 0x12b1b VMOVDQU (%R13,%RAX,1),%YMM7 |
(44) 0x12b22 KMOVB %K1,%K7 |
(44) 0x12b26 VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} |
(44) 0x12b2d VFNMADD231PD (%RDI,%RAX,1),%YMM5,%YMM2 |
(44) 0x12b33 ADD $0x20,%RAX |
(44) 0x12b37 CMP %RAX,%R14 |
(44) 0x12b3a JE 12c2e |
(45) 0x12b40 VMOVDQU (%R13,%RAX,1),%YMM8 |
(45) 0x12b47 KMOVB %K1,%K6 |
(45) 0x12b4b VMOVDQU 0x20(%R13,%RAX,1),%YMM9 |
(45) 0x12b52 KMOVB %K1,%K5 |
(45) 0x12b56 KMOVB %K1,%K3 |
(45) 0x12b5a KMOVB %K1,%K2 |
(45) 0x12b5e VMOVDQU 0x40(%R13,%RAX,1),%YMM11 |
(45) 0x12b65 VMOVDQU 0x60(%R13,%RAX,1),%YMM13 |
(45) 0x12b6c KMOVB %K1,%K4 |
(45) 0x12b70 KMOVB %K1,%K7 |
(45) 0x12b74 VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} |
(45) 0x12b7b VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} |
(45) 0x12b82 VMOVDQU 0x80(%R13,%RAX,1),%YMM14 |
(45) 0x12b8c VMOVDQU 0xa0(%R13,%RAX,1),%YMM6 |
(45) 0x12b96 KMOVB %K1,%K6 |
(45) 0x12b9a KMOVB %K1,%K5 |
(45) 0x12b9e VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 |
(45) 0x12ba4 VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} |
(45) 0x12bab VMOVDQU 0xc0(%R13,%RAX,1),%YMM7 |
(45) 0x12bb5 VGATHERQPD (%R9,%YMM14,8),%YMM15{%K4} |
(45) 0x12bbc VGATHERQPD (%R9,%YMM6,8),%YMM5{%K7} |
(45) 0x12bc3 VMOVDQU 0xe0(%R13,%RAX,1),%YMM0 |
(45) 0x12bcd VGATHERQPD (%R9,%YMM7,8),%YMM8{%K6} |
(45) 0x12bd4 VFNMADD231PD 0x20(%RDI,%RAX,1),%YMM10,%YMM2 |
(45) 0x12bdb VFNMADD132PD 0x40(%RDI,%RAX,1),%YMM2,%YMM12 |
(45) 0x12be2 VGATHERQPD (%R9,%YMM13,8),%YMM2{%K2} |
(45) 0x12be9 VFNMADD132PD 0x60(%RDI,%RAX,1),%YMM12,%YMM2 |
(45) 0x12bf0 VFNMADD132PD 0x80(%RDI,%RAX,1),%YMM2,%YMM15 |
(45) 0x12bfa VGATHERQPD (%R9,%YMM0,8),%YMM2{%K5} |
(45) 0x12c01 VFNMADD132PD 0xa0(%RDI,%RAX,1),%YMM15,%YMM5 |
(45) 0x12c0b VFNMADD132PD 0xc0(%RDI,%RAX,1),%YMM5,%YMM8 |
(45) 0x12c15 VFNMADD132PD 0xe0(%RDI,%RAX,1),%YMM8,%YMM2 |
(45) 0x12c1f ADD $0x100,%RAX |
(45) 0x12c25 CMP %RAX,%R14 |
(45) 0x12c28 JNE 12b40 |
(44) 0x12c2e VEXTRACTF64X2 $0x1,%YMM2,%XMM9 |
(44) 0x12c35 VADDPD %XMM2,%XMM9,%XMM10 |
(44) 0x12c39 VUNPCKHPD %XMM10,%XMM10,%XMM11 |
(44) 0x12c3e VADDPD %XMM10,%XMM11,%XMM12 |
(44) 0x12c43 VADDSD %XMM12,%XMM1,%XMM5 |
(44) 0x12c48 TEST $0x3,%SIL |
(44) 0x12c4c JE 12cb4 |
(44) 0x12c4e MOV %RSI,%R10 |
(44) 0x12c51 VADDPD %XMM2,%XMM9,%XMM6 |
(44) 0x12c55 AND $-0x4,%R10 |
(44) 0x12c59 ADD %R10,%RCX |
(44) 0x12c5c SUB %R10,%RSI |
(44) 0x12c5f CMP $0x1,%RSI |
(44) 0x12c63 JE 12c9f |
(44) 0x12c65 ADD %R15,%R10 |
(44) 0x12c68 MOV 0x38(%RSP),%R15 |
(44) 0x12c6d KMOVB %K1,%K3 |
(44) 0x12c71 VMOVDQU (%R12,%R10,8),%XMM13 |
(44) 0x12c77 VGATHERQPD (%R9,%XMM13,8),%XMM14{%K3} |
(44) 0x12c7e VFNMADD132PD (%R15,%R10,8),%XMM6,%XMM14 |
(44) 0x12c84 VUNPCKHPD %XMM14,%XMM14,%XMM15 |
(44) 0x12c89 VADDPD %XMM14,%XMM15,%XMM6 |
(44) 0x12c8e VADDSD %XMM1,%XMM6,%XMM5 |
(44) 0x12c92 TEST $0x1,%SIL |
(44) 0x12c96 JE 12cb4 |
(44) 0x12c98 AND $-0x2,%RSI |
(44) 0x12c9c ADD %RSI,%RCX |
(44) 0x12c9f MOV (%R12,%RCX,8),%RSI |
(44) 0x12ca3 MOV 0x38(%RSP),%RDI |
(44) 0x12ca8 VMOVSD (%R9,%RSI,8),%XMM1 |
(44) 0x12cae VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM5 |
(44) 0x12cb4 VMULSD %XMM5,%XMM4,%XMM7 |
(44) 0x12cb8 MOV 0x18(%RSP),%R13 |
(44) 0x12cbd MOV 0x8(%RSP),%RCX |
(44) 0x12cc2 VDIVSD (%RCX,%RDX,8),%XMM7,%XMM8 |
(44) 0x12cc7 VADDSD (%R13,%RDX,8),%XMM8,%XMM0 |
(44) 0x12cce VMOVSD %XMM0,(%R13,%RDX,8) |
(44) 0x12cd5 INC %RDX |
(44) 0x12cd8 CMP %RDX,0x28(%RSP) |
(44) 0x12cdd JNE 1274e |
0x12ce3 VZEROUPPER |
0x12ce6 LEA -0x28(%RBP),%RSP |
0x12cea POP %RBX |
0x12ceb POP %R12 |
0x12ced POP %R13 |
0x12cef POP %R14 |
0x12cf1 POP %R15 |
0x12cf3 POP %RBP |
0x12cf4 RET |
0x12cf5 NOPL (%RAX) |
0x12cf8 INC %RAX |
0x12cfb XOR %EDX,%EDX |
0x12cfd JMP 126cc |
0x12d02 NOPW (%RAX,%RAX,1) |
(44) 0x12d08 VMOVDQU (%RSI),%YMM6 |
(44) 0x12d0c KMOVB %K1,%K5 |
(44) 0x12d10 MOV $0x20,%ECX |
(44) 0x12d15 VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} |
(44) 0x12d1c VFNMADD231PD (%RDI),%YMM1,%YMM0 |
(44) 0x12d21 JMP 127f4 |
0x12d26 NOPW %CS:(%RAX,%RAX,1) |
(44) 0x12d30 VMOVDQU (%R13),%YMM7 |
(44) 0x12d36 KMOVB %K1,%K7 |
(44) 0x12d3a MOV $0x20,%EAX |
(44) 0x12d3f VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} |
(44) 0x12d46 VFNMADD231PD (%RDI),%YMM5,%YMM2 |
(44) 0x12d4b JMP 12a8f |
(44) 0x12d50 VMOVSD %XMM1,%XMM1,%XMM5 |
(44) 0x12d54 JMP 12cb4 |
0x12d59 NOPL (%RAX) |
(44) 0x12d60 VMOVSD %XMM2,%XMM2,%XMM1 |
(44) 0x12d64 JMP 129fa |
(44) 0x12d69 VMOVSD %XMM1,%XMM1,%XMM5 |
(44) 0x12d6d VXORPD %XMM6,%XMM6,%XMM6 |
(44) 0x12d71 XOR %R10D,%R10D |
(44) 0x12d74 JMP 12c5c |
(44) 0x12d79 VMOVSD %XMM2,%XMM2,%XMM1 |
(44) 0x12d7d VXORPD %XMM0,%XMM0,%XMM0 |
(44) 0x12d81 XOR %ECX,%ECX |
(44) 0x12d83 JMP 129ae |
0x12d88 NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 64 |
nb uops | 71 |
loop length | 238 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 11.83 cycles |
front end | 11.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 7.00 | 7.00 | 7.50 | 3.00 | 2.90 | 7.50 | 7.50 | 7.50 | 3.00 | 7.00 |
cycles | 3.10 | 5.67 | 7.00 | 7.00 | 7.50 | 3.00 | 2.90 | 7.50 | 7.50 | 7.50 | 3.00 | 7.00 |
Cycles executing div or sqrt instructions | 10.00 |
FE+BE cycles | 11.13-11.17 |
Stall cycles | 0.00 |
Front-end | 11.83 |
Dispatch | 7.50 |
DIV/SQRT | 10.00 |
Overall L1 | 11.83 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x40,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL c110 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL c950 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EBX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %EAX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RSI | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 12cf8 <hypre_ParCSRRelaxThreads._omp_fn.1+0x668> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12ce6 <hypre_ParCSRRelaxThreads._omp_fn.1+0x656> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x48(%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R13),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%R13),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 1274e <hypre_ParCSRRelaxThreads._omp_fn.1+0xbe> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 126cc <hypre_ParCSRRelaxThreads._omp_fn.1+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 64 |
nb uops | 71 |
loop length | 238 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 11.83 cycles |
front end | 11.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 7.00 | 7.00 | 7.50 | 3.00 | 2.90 | 7.50 | 7.50 | 7.50 | 3.00 | 7.00 |
cycles | 3.10 | 5.67 | 7.00 | 7.00 | 7.50 | 3.00 | 2.90 | 7.50 | 7.50 | 7.50 | 3.00 | 7.00 |
Cycles executing div or sqrt instructions | 10.00 |
FE+BE cycles | 11.13-11.17 |
Stall cycles | 0.00 |
Front-end | 11.83 |
Dispatch | 7.50 |
DIV/SQRT | 10.00 |
Overall L1 | 11.83 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x40,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL c110 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL c950 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EBX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %EAX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RSI | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 12cf8 <hypre_ParCSRRelaxThreads._omp_fn.1+0x668> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12ce6 <hypre_ParCSRRelaxThreads._omp_fn.1+0x656> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x48(%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R13),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%R13),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 1274e <hypre_ParCSRRelaxThreads._omp_fn.1+0xbe> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 126cc <hypre_ParCSRRelaxThreads._omp_fn.1+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_ParCSRRelaxThreads._omp_fn.1– | 34.95 | 8.19 |
▼Loop 44 - ams.c:3662-3682 - libparcsr_ls.so– | 0.59 | 0.1 |
○Loop 45 - ams.c:3677-3680 - libparcsr_ls.so | 0 | 0 |
○Loop 46 - ams.c:3672-3675 - libparcsr_ls.so | 0 | 0 |