Loop Id: 44 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 34.66% |
---|
Loop Id: 44 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 34.66% |
---|
0x12740 INC %RDX |
0x12743 CMP %RDX,0x28(%RSP) |
0x12748 JE 12ce3 |
0x1274e MOV 0x30(%RSP),%RCX |
0x12753 MOV (%RCX,%RDX,8),%RAX |
0x12757 LEA (,%RAX,8),%RSI |
0x1275f MOV %RAX,%R14 |
0x12762 LEA (%R11,%RSI,1),%RDI |
0x12766 VCOMISD (%RDI),%XMM3 |
0x1276a JE 12740 |
0x1276c MOV 0x20(%RSP),%R10 |
0x12771 MOV 0x30(%RSP),%R15 |
0x12776 VMOVSD (%R10,%RDX,8),%XMM2 |
0x1277c MOV 0x8(%R15,%RDX,8),%R10 |
0x12781 CMP %R10,%RAX |
0x12784 JGE 12d60 |
0x1278a SUB %RAX,%R10 |
0x1278d LEA -0x1(%R10),%R13 |
0x12791 CMP $0x2,%R13 |
0x12795 JBE 12d79 |
0x1279b MOV %R10,%R15 |
0x1279e ADD %RBX,%RSI |
0x127a1 VXORPD %XMM0,%XMM0,%XMM0 |
0x127a5 XOR %ECX,%ECX |
0x127a7 SHR $0x2,%R15 |
0x127ab SAL $0x5,%R15 |
0x127af LEA -0x20(%R15),%R13 |
0x127b3 SHR $0x5,%R13 |
0x127b7 INC %R13 |
0x127ba AND $0x7,%R13D |
0x127be JE 12899 |
0x127c4 CMP $0x1,%R13 |
0x127c8 JE 12876 |
0x127ce CMP $0x2,%R13 |
0x127d2 JE 1285c |
0x127d8 CMP $0x3,%R13 |
0x127dc JE 12842 |
0x127de CMP $0x4,%R13 |
0x127e2 JE 12828 |
0x127e4 CMP $0x5,%R13 |
0x127e8 JE 1280e |
0x127ea CMP $0x6,%R13 |
0x127ee JNE 12d08 |
0x127f4 VMOVDQU (%RSI,%RCX,1),%YMM7 |
0x127f9 KMOVB %K1,%K3 |
0x127fd VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} |
0x12804 VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 |
0x1280a ADD $0x20,%RCX |
0x1280e VMOVDQU (%RSI,%RCX,1),%YMM8 |
0x12813 KMOVB %K1,%K2 |
0x12817 VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} |
0x1281e VFNMADD231PD (%RDI,%RCX,1),%YMM9,%YMM0 |
0x12824 ADD $0x20,%RCX |
0x12828 VMOVDQU (%RSI,%RCX,1),%YMM10 |
0x1282d KMOVB %K1,%K4 |
0x12831 VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} |
0x12838 VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 |
0x1283e ADD $0x20,%RCX |
0x12842 VMOVDQU (%RSI,%RCX,1),%YMM12 |
0x12847 KMOVB %K1,%K7 |
0x1284b VGATHERQPD (%R8,%YMM12,8),%YMM13{%K7} |
0x12852 VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 |
0x12858 ADD $0x20,%RCX |
0x1285c VMOVDQU (%RSI,%RCX,1),%YMM14 |
0x12861 KMOVB %K1,%K6 |
0x12865 VGATHERQPD (%R8,%YMM14,8),%YMM15{%K6} |
0x1286c VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 |
0x12872 ADD $0x20,%RCX |
0x12876 VMOVDQU (%RSI,%RCX,1),%YMM6 |
0x1287b KMOVB %K1,%K5 |
0x1287f VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} |
0x12886 VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 |
0x1288c ADD $0x20,%RCX |
0x12890 CMP %RCX,%R15 |
0x12893 JE 1297f |
(46) 0x12899 VMOVDQU (%RSI,%RCX,1),%YMM7 |
(46) 0x1289e KMOVB %K1,%K3 |
(46) 0x128a2 VMOVDQU 0x20(%RSI,%RCX,1),%YMM8 |
(46) 0x128a8 KMOVB %K1,%K2 |
(46) 0x128ac KMOVB %K1,%K4 |
(46) 0x128b0 KMOVB %K1,%K7 |
(46) 0x128b4 VMOVDQU 0x40(%RSI,%RCX,1),%YMM10 |
(46) 0x128ba VMOVDQU 0x60(%RSI,%RCX,1),%YMM12 |
(46) 0x128c0 KMOVB %K1,%K6 |
(46) 0x128c4 KMOVB %K1,%K5 |
(46) 0x128c8 VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} |
(46) 0x128cf VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} |
(46) 0x128d6 VMOVDQU 0x80(%RSI,%RCX,1),%YMM13 |
(46) 0x128df VMOVDQU 0xa0(%RSI,%RCX,1),%YMM15 |
(46) 0x128e8 KMOVB %K1,%K3 |
(46) 0x128ec KMOVB %K1,%K2 |
(46) 0x128f0 VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 |
(46) 0x128f6 VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} |
(46) 0x128fd VMOVDQU 0xc0(%RSI,%RCX,1),%YMM6 |
(46) 0x12906 VGATHERQPD (%R8,%YMM13,8),%YMM14{%K6} |
(46) 0x1290d VGATHERQPD (%R8,%YMM15,8),%YMM7{%K5} |
(46) 0x12914 VMOVDQU 0xe0(%RSI,%RCX,1),%YMM5 |
(46) 0x1291d VGATHERQPD (%R8,%YMM6,8),%YMM1{%K3} |
(46) 0x12924 VFNMADD231PD 0x20(%RDI,%RCX,1),%YMM9,%YMM0 |
(46) 0x1292b VFNMADD132PD 0x40(%RDI,%RCX,1),%YMM0,%YMM11 |
(46) 0x12932 VGATHERQPD (%R8,%YMM12,8),%YMM0{%K7} |
(46) 0x12939 VFNMADD132PD 0x60(%RDI,%RCX,1),%YMM11,%YMM0 |
(46) 0x12940 VFNMADD132PD 0x80(%RDI,%RCX,1),%YMM0,%YMM14 |
(46) 0x1294a VGATHERQPD (%R8,%YMM5,8),%YMM0{%K2} |
(46) 0x12951 VFNMADD132PD 0xa0(%RDI,%RCX,1),%YMM14,%YMM7 |
(46) 0x1295b VFNMADD132PD 0xc0(%RDI,%RCX,1),%YMM7,%YMM1 |
(46) 0x12965 VFNMADD132PD 0xe0(%RDI,%RCX,1),%YMM1,%YMM0 |
(46) 0x1296f ADD $0x100,%RCX |
(46) 0x12976 CMP %RCX,%R15 |
(46) 0x12979 JNE 12899 |
0x1297f VEXTRACTF64X2 $0x1,%YMM0,%XMM8 |
0x12986 VADDPD %XMM0,%XMM8,%XMM9 |
0x1298a VUNPCKHPD %XMM9,%XMM9,%XMM10 |
0x1298f VADDPD %XMM9,%XMM10,%XMM11 |
0x12994 VADDSD %XMM11,%XMM2,%XMM1 |
0x12999 TEST $0x3,%R10B |
0x1299d JE 129fa |
0x1299f MOV %R10,%RCX |
0x129a2 VADDPD %XMM8,%XMM0,%XMM0 |
0x129a7 AND $-0x4,%RCX |
0x129ab ADD %RCX,%RAX |
0x129ae SUB %RCX,%R10 |
0x129b1 CMP $0x1,%R10 |
0x129b5 JE 129ea |
0x129b7 ADD %R14,%RCX |
0x129ba KMOVB %K1,%K4 |
0x129be VMOVDQU (%RBX,%RCX,8),%XMM12 |
0x129c3 VGATHERQPD (%R8,%XMM12,8),%XMM13{%K4} |
0x129ca VFNMADD231PD (%R11,%RCX,8),%XMM13,%XMM0 |
0x129d0 VUNPCKHPD %XMM0,%XMM0,%XMM14 |
0x129d4 VADDPD %XMM0,%XMM14,%XMM15 |
0x129d8 VADDSD %XMM15,%XMM2,%XMM1 |
0x129dd TEST $0x1,%R10B |
0x129e1 JE 129fa |
0x129e3 AND $-0x2,%R10 |
0x129e7 ADD %R10,%RAX |
0x129ea MOV (%RBX,%RAX,8),%R14 |
0x129ee VMOVSD (%R11,%RAX,8),%XMM2 |
0x129f4 VFNMADD231SD (%R8,%R14,8),%XMM2,%XMM1 |
0x129fa MOV 0x10(%RSP),%RAX |
0x129ff MOV (%RAX,%RDX,8),%RCX |
0x12a03 MOV 0x8(%RAX,%RDX,8),%RSI |
0x12a08 CMP %RSI,%RCX |
0x12a0b JGE 12d50 |
0x12a11 SUB %RCX,%RSI |
0x12a14 MOV %RCX,%R15 |
0x12a17 LEA -0x1(%RSI),%RDI |
0x12a1b CMP $0x2,%RDI |
0x12a1f JBE 12d69 |
0x12a25 MOV 0x38(%RSP),%R14 |
0x12a2a LEA (,%RCX,8),%RDI |
0x12a32 XOR %EAX,%EAX |
0x12a34 VXORPD %XMM2,%XMM2,%XMM2 |
0x12a38 LEA (%R12,%RDI,1),%R13 |
0x12a3c ADD %R14,%RDI |
0x12a3f MOV %RSI,%R14 |
0x12a42 SHR $0x2,%R14 |
0x12a46 SAL $0x5,%R14 |
0x12a4a LEA -0x20(%R14),%R10 |
0x12a4e SHR $0x5,%R10 |
0x12a52 INC %R10 |
0x12a55 AND $0x7,%R10D |
0x12a59 JE 12b40 |
0x12a5f CMP $0x1,%R10 |
0x12a63 JE 12b1b |
0x12a69 CMP $0x2,%R10 |
0x12a6d JE 12aff |
0x12a73 CMP $0x3,%R10 |
0x12a77 JE 12ae3 |
0x12a79 CMP $0x4,%R10 |
0x12a7d JE 12ac7 |
0x12a7f CMP $0x5,%R10 |
0x12a83 JE 12aab |
0x12a85 CMP $0x6,%R10 |
0x12a89 JNE 12d30 |
0x12a8f VMOVDQU (%R13,%RAX,1),%YMM8 |
0x12a96 KMOVB %K1,%K6 |
0x12a9a VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} |
0x12aa1 VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 |
0x12aa7 ADD $0x20,%RAX |
0x12aab VMOVDQU (%R13,%RAX,1),%YMM9 |
0x12ab2 KMOVB %K1,%K5 |
0x12ab6 VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} |
0x12abd VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM2 |
0x12ac3 ADD $0x20,%RAX |
0x12ac7 VMOVDQU (%R13,%RAX,1),%YMM11 |
0x12ace KMOVB %K1,%K3 |
0x12ad2 VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} |
0x12ad9 VFNMADD231PD (%RDI,%RAX,1),%YMM12,%YMM2 |
0x12adf ADD $0x20,%RAX |
0x12ae3 VMOVDQU (%R13,%RAX,1),%YMM13 |
0x12aea KMOVB %K1,%K2 |
0x12aee VGATHERQPD (%R9,%YMM13,8),%YMM14{%K2} |
0x12af5 VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM2 |
0x12afb ADD $0x20,%RAX |
0x12aff VMOVDQU (%R13,%RAX,1),%YMM15 |
0x12b06 KMOVB %K1,%K4 |
0x12b0a VGATHERQPD (%R9,%YMM15,8),%YMM6{%K4} |
0x12b11 VFNMADD231PD (%RDI,%RAX,1),%YMM6,%YMM2 |
0x12b17 ADD $0x20,%RAX |
0x12b1b VMOVDQU (%R13,%RAX,1),%YMM7 |
0x12b22 KMOVB %K1,%K7 |
0x12b26 VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} |
0x12b2d VFNMADD231PD (%RDI,%RAX,1),%YMM5,%YMM2 |
0x12b33 ADD $0x20,%RAX |
0x12b37 CMP %RAX,%R14 |
0x12b3a JE 12c2e |
(45) 0x12b40 VMOVDQU (%R13,%RAX,1),%YMM8 |
(45) 0x12b47 KMOVB %K1,%K6 |
(45) 0x12b4b VMOVDQU 0x20(%R13,%RAX,1),%YMM9 |
(45) 0x12b52 KMOVB %K1,%K5 |
(45) 0x12b56 KMOVB %K1,%K3 |
(45) 0x12b5a KMOVB %K1,%K2 |
(45) 0x12b5e VMOVDQU 0x40(%R13,%RAX,1),%YMM11 |
(45) 0x12b65 VMOVDQU 0x60(%R13,%RAX,1),%YMM13 |
(45) 0x12b6c KMOVB %K1,%K4 |
(45) 0x12b70 KMOVB %K1,%K7 |
(45) 0x12b74 VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} |
(45) 0x12b7b VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} |
(45) 0x12b82 VMOVDQU 0x80(%R13,%RAX,1),%YMM14 |
(45) 0x12b8c VMOVDQU 0xa0(%R13,%RAX,1),%YMM6 |
(45) 0x12b96 KMOVB %K1,%K6 |
(45) 0x12b9a KMOVB %K1,%K5 |
(45) 0x12b9e VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 |
(45) 0x12ba4 VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} |
(45) 0x12bab VMOVDQU 0xc0(%R13,%RAX,1),%YMM7 |
(45) 0x12bb5 VGATHERQPD (%R9,%YMM14,8),%YMM15{%K4} |
(45) 0x12bbc VGATHERQPD (%R9,%YMM6,8),%YMM5{%K7} |
(45) 0x12bc3 VMOVDQU 0xe0(%R13,%RAX,1),%YMM0 |
(45) 0x12bcd VGATHERQPD (%R9,%YMM7,8),%YMM8{%K6} |
(45) 0x12bd4 VFNMADD231PD 0x20(%RDI,%RAX,1),%YMM10,%YMM2 |
(45) 0x12bdb VFNMADD132PD 0x40(%RDI,%RAX,1),%YMM2,%YMM12 |
(45) 0x12be2 VGATHERQPD (%R9,%YMM13,8),%YMM2{%K2} |
(45) 0x12be9 VFNMADD132PD 0x60(%RDI,%RAX,1),%YMM12,%YMM2 |
(45) 0x12bf0 VFNMADD132PD 0x80(%RDI,%RAX,1),%YMM2,%YMM15 |
(45) 0x12bfa VGATHERQPD (%R9,%YMM0,8),%YMM2{%K5} |
(45) 0x12c01 VFNMADD132PD 0xa0(%RDI,%RAX,1),%YMM15,%YMM5 |
(45) 0x12c0b VFNMADD132PD 0xc0(%RDI,%RAX,1),%YMM5,%YMM8 |
(45) 0x12c15 VFNMADD132PD 0xe0(%RDI,%RAX,1),%YMM8,%YMM2 |
(45) 0x12c1f ADD $0x100,%RAX |
(45) 0x12c25 CMP %RAX,%R14 |
(45) 0x12c28 JNE 12b40 |
0x12c2e VEXTRACTF64X2 $0x1,%YMM2,%XMM9 |
0x12c35 VADDPD %XMM2,%XMM9,%XMM10 |
0x12c39 VUNPCKHPD %XMM10,%XMM10,%XMM11 |
0x12c3e VADDPD %XMM10,%XMM11,%XMM12 |
0x12c43 VADDSD %XMM12,%XMM1,%XMM5 |
0x12c48 TEST $0x3,%SIL |
0x12c4c JE 12cb4 |
0x12c4e MOV %RSI,%R10 |
0x12c51 VADDPD %XMM2,%XMM9,%XMM6 |
0x12c55 AND $-0x4,%R10 |
0x12c59 ADD %R10,%RCX |
0x12c5c SUB %R10,%RSI |
0x12c5f CMP $0x1,%RSI |
0x12c63 JE 12c9f |
0x12c65 ADD %R15,%R10 |
0x12c68 MOV 0x38(%RSP),%R15 |
0x12c6d KMOVB %K1,%K3 |
0x12c71 VMOVDQU (%R12,%R10,8),%XMM13 |
0x12c77 VGATHERQPD (%R9,%XMM13,8),%XMM14{%K3} |
0x12c7e VFNMADD132PD (%R15,%R10,8),%XMM6,%XMM14 |
0x12c84 VUNPCKHPD %XMM14,%XMM14,%XMM15 |
0x12c89 VADDPD %XMM14,%XMM15,%XMM6 |
0x12c8e VADDSD %XMM1,%XMM6,%XMM5 |
0x12c92 TEST $0x1,%SIL |
0x12c96 JE 12cb4 |
0x12c98 AND $-0x2,%RSI |
0x12c9c ADD %RSI,%RCX |
0x12c9f MOV (%R12,%RCX,8),%RSI |
0x12ca3 MOV 0x38(%RSP),%RDI |
0x12ca8 VMOVSD (%R9,%RSI,8),%XMM1 |
0x12cae VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM5 |
0x12cb4 VMULSD %XMM5,%XMM4,%XMM7 |
0x12cb8 MOV 0x18(%RSP),%R13 |
0x12cbd MOV 0x8(%RSP),%RCX |
0x12cc2 VDIVSD (%RCX,%RDX,8),%XMM7,%XMM8 |
0x12cc7 VADDSD (%R13,%RDX,8),%XMM8,%XMM0 |
0x12cce VMOVSD %XMM0,(%R13,%RDX,8) |
0x12cd5 INC %RDX |
0x12cd8 CMP %RDX,0x28(%RSP) |
0x12cdd JNE 1274e |
0x12d08 VMOVDQU (%RSI),%YMM6 |
0x12d0c KMOVB %K1,%K5 |
0x12d10 MOV $0x20,%ECX |
0x12d15 VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} |
0x12d1c VFNMADD231PD (%RDI),%YMM1,%YMM0 |
0x12d21 JMP 127f4 |
0x12d30 VMOVDQU (%R13),%YMM7 |
0x12d36 KMOVB %K1,%K7 |
0x12d3a MOV $0x20,%EAX |
0x12d3f VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} |
0x12d46 VFNMADD231PD (%RDI),%YMM5,%YMM2 |
0x12d4b JMP 12a8f |
0x12d50 VMOVSD %XMM1,%XMM1,%XMM5 |
0x12d54 JMP 12cb4 |
0x12d60 VMOVSD %XMM2,%XMM2,%XMM1 |
0x12d64 JMP 129fa |
0x12d69 VMOVSD %XMM1,%XMM1,%XMM5 |
0x12d6d VXORPD %XMM6,%XMM6,%XMM6 |
0x12d71 XOR %R10D,%R10D |
0x12d74 JMP 12c5c |
0x12d79 VMOVSD %XMM2,%XMM2,%XMM1 |
0x12d7d VXORPD %XMM0,%XMM0,%XMM0 |
0x12d81 XOR %ECX,%ECX |
0x12d83 JMP 129ae |
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
[...] |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.58 |
CQA speedup if FP arith vectorized | 1.55 |
CQA speedup if fully vectorized | 3.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 53.17 |
CQA cycles if no scalar integer | 33.67 |
CQA cycles if FP arith vectorized | 34.33 |
CQA cycles if fully vectorized | 15.71 |
Front-end cycles | 53.17 |
DIV/SQRT cycles | 37.50 |
P0 cycles | 37.50 |
P1 cycles | 39.00 |
P2 cycles | 39.00 |
P3 cycles | 0.50 |
P4 cycles | 37.30 |
P5 cycles | 37.30 |
P6 cycles | 0.50 |
P7 cycles | 0.50 |
P8 cycles | 0.50 |
P9 cycles | 37.40 |
P10 cycles | 39.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 109.19 - 378.49 |
Stall cycles (UFS) | 60.86 - 330.14 |
Nb insns | 236.00 |
Nb uops | 301.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 2.76 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 31.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 59.05 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 42.31 |
Vector-efficiency ratio all | 29.88 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 24.52 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.58 |
CQA speedup if FP arith vectorized | 1.55 |
CQA speedup if fully vectorized | 3.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 53.17 |
CQA cycles if no scalar integer | 33.67 |
CQA cycles if FP arith vectorized | 34.33 |
CQA cycles if fully vectorized | 15.71 |
Front-end cycles | 53.17 |
DIV/SQRT cycles | 37.50 |
P0 cycles | 37.50 |
P1 cycles | 39.00 |
P2 cycles | 39.00 |
P3 cycles | 0.50 |
P4 cycles | 37.30 |
P5 cycles | 37.30 |
P6 cycles | 0.50 |
P7 cycles | 0.50 |
P8 cycles | 0.50 |
P9 cycles | 37.40 |
P10 cycles | 39.00 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 109.19 - 378.49 |
Stall cycles (UFS) | 60.86 - 330.14 |
Nb insns | 236.00 |
Nb uops | 301.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 2.76 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 31.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 59.05 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 42.31 |
Vector-efficiency ratio all | 29.88 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 24.52 |
Path / |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 236 |
nb uops | 301 |
loop length | 1086 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 53.17 cycles |
front end | 53.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 37.50 | 37.50 | 39.00 | 39.00 | 0.50 | 37.30 | 37.30 | 0.50 | 0.50 | 0.50 | 37.40 | 39.00 |
cycles | 37.50 | 37.50 | 39.00 | 39.00 | 0.50 | 37.30 | 37.30 | 0.50 | 0.50 | 0.50 | 37.40 | 39.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 109.19-378.49 |
Stall cycles | 60.86-330.14 |
ROB full (events) | 67.30-344.15 |
Front-end | 53.17 |
Dispatch | 39.00 |
DIV/SQRT | 4.00 |
Overall L1 | 53.17 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 67% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 70% |
all | 59% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 42% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 31% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 32% |
all | 29% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 24% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 12ce3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x653> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R11,%RSI,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD (%RDI),%XMM3 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JE 12740 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RDX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12d60 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R10),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 12d79 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 12899 <hypre_ParCSRRelaxThreads._omp_fn.1+0x209> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12876 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1285c <hypre_ParCSRRelaxThreads._omp_fn.1+0x1cc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12842 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12828 <hypre_ParCSRRelaxThreads._omp_fn.1+0x198> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1280e <hypre_ParCSRRelaxThreads._omp_fn.1+0x17e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 12d08 <hypre_ParCSRRelaxThreads._omp_fn.1+0x678> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM12,8),%YMM13{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM14,8),%YMM15{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1297f <hypre_ParCSRRelaxThreads._omp_fn.1+0x2ef> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM8,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM10,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM11,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM8,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 129ea <hypre_ParCSRRelaxThreads._omp_fn.1+0x35a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU (%RBX,%RCX,8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD (%R8,%XMM12,8),%XMM13{%K4} | 5 | 1 | 0.50 | 0.67 | 0.67 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1 |
VFNMADD231PD (%R11,%RCX,8),%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM0,%XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM0,%XMM14,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM15,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11,%RAX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%R8,%R14,8),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12d50 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 12d69 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R12,%RDI,1),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 12b40 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12b1b <hypre_ParCSRRelaxThreads._omp_fn.1+0x48b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12aff <hypre_ParCSRRelaxThreads._omp_fn.1+0x46f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12ae3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x453> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12ac7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x437> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12aab <hypre_ParCSRRelaxThreads._omp_fn.1+0x41b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 12d30 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM12,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM13,8),%YMM14{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM15,8),%YMM6{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM6,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM5,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12c2e <hypre_ParCSRRelaxThreads._omp_fn.1+0x59e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM9,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM10,%XMM10,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM10,%XMM11,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM12,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM2,%XMM9,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12c9f <hypre_ParCSRRelaxThreads._omp_fn.1+0x60f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R15,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU (%R12,%R10,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD (%R9,%XMM13,8),%XMM14{%K3} | 5 | 1 | 0.50 | 0.67 | 0.67 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1 |
VFNMADD132PD (%R15,%R10,8),%XMM6,%XMM14 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM14,%XMM14,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM14,%XMM15,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM6,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD %XMM5,%XMM4,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VDIVSD (%RCX,%RDX,8),%XMM7,%XMM8 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VADDSD (%R13,%RDX,8),%XMM8,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM0,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 1274e <hypre_ParCSRRelaxThreads._omp_fn.1+0xbe> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x20,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 127f4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x164> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVDQU (%R13),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x20,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM5,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 12a8f <hypre_ParCSRRelaxThreads._omp_fn.1+0x3ff> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM2,%XMM2,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 12c5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x5cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM2,%XMM2,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 129ae <hypre_ParCSRRelaxThreads._omp_fn.1+0x31e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 236 |
nb uops | 301 |
loop length | 1086 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 53.17 cycles |
front end | 53.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 37.50 | 37.50 | 39.00 | 39.00 | 0.50 | 37.30 | 37.30 | 0.50 | 0.50 | 0.50 | 37.40 | 39.00 |
cycles | 37.50 | 37.50 | 39.00 | 39.00 | 0.50 | 37.30 | 37.30 | 0.50 | 0.50 | 0.50 | 37.40 | 39.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 109.19-378.49 |
Stall cycles | 60.86-330.14 |
ROB full (events) | 67.30-344.15 |
Front-end | 53.17 |
Dispatch | 39.00 |
DIV/SQRT | 4.00 |
Overall L1 | 53.17 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 67% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 70% |
all | 59% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 42% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 31% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 32% |
all | 29% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 24% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 12ce3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x653> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R11,%RSI,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCOMISD (%RDI),%XMM3 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JE 12740 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R10,%RDX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12d60 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R10),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 12d79 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6e9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x2,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 12899 <hypre_ParCSRRelaxThreads._omp_fn.1+0x209> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12876 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1285c <hypre_ParCSRRelaxThreads._omp_fn.1+0x1cc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12842 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12828 <hypre_ParCSRRelaxThreads._omp_fn.1+0x198> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1280e <hypre_ParCSRRelaxThreads._omp_fn.1+0x17e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 12d08 <hypre_ParCSRRelaxThreads._omp_fn.1+0x678> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM7,8),%YMM5{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM5,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM8,8),%YMM9{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM9,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM10,8),%YMM11{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM12,8),%YMM13{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM14,8),%YMM15{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%RSI,%RCX,1),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 1297f <hypre_ParCSRRelaxThreads._omp_fn.1+0x2ef> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM0,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM8,%XMM9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM9,%XMM9,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM9,%XMM10,%XMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM11,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM8,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 129ea <hypre_ParCSRRelaxThreads._omp_fn.1+0x35a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU (%RBX,%RCX,8),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD (%R8,%XMM12,8),%XMM13{%K4} | 5 | 1 | 0.50 | 0.67 | 0.67 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1 |
VFNMADD231PD (%R11,%RCX,8),%XMM13,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM0,%XMM0,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM0,%XMM14,%XMM15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM15,%XMM2,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%R10B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11,%RAX,8),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%R8,%R14,8),%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 12d50 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 12d69 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R12,%RDI,1),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SAL $0x5,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA -0x20(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x5,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 12b40 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12b1b <hypre_ParCSRRelaxThreads._omp_fn.1+0x48b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12aff <hypre_ParCSRRelaxThreads._omp_fn.1+0x46f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12ae3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x453> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12ac7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x437> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12aab <hypre_ParCSRRelaxThreads._omp_fn.1+0x41b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 12d30 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM8,8),%YMM0{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM9,8),%YMM10{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM11,8),%YMM12{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM12,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM13,8),%YMM14{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM15,8),%YMM6{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM6,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVDQU (%R13,%RAX,1),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI,%RAX,1),%YMM5,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x20,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12c2e <hypre_ParCSRRelaxThreads._omp_fn.1+0x59e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VEXTRACTF64X2 $0x1,%YMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM9,%XMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VUNPCKHPD %XMM10,%XMM10,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM10,%XMM11,%XMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM12,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x3,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDPD %XMM2,%XMM9,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 12c9f <hypre_ParCSRRelaxThreads._omp_fn.1+0x60f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R15,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU (%R12,%R10,8),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VGATHERQPD (%R9,%XMM13,8),%XMM14{%K3} | 5 | 1 | 0.50 | 0.67 | 0.67 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0-29 | 1 |
VFNMADD132PD (%R15,%R10,8),%XMM6,%XMM14 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM14,%XMM14,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VADDPD %XMM14,%XMM15,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM1,%XMM6,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x1,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
AND $-0x2,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMULSD %XMM5,%XMM4,%XMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VDIVSD (%RCX,%RDX,8),%XMM7,%XMM8 | 1 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 13-15 | 4 |
VADDSD (%R13,%RDX,8),%XMM8,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM0,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,0x28(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 1274e <hypre_ParCSRRelaxThreads._omp_fn.1+0xbe> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVDQU (%RSI),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x20,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD (%R8,%YMM6,8),%YMM1{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 127f4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x164> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVDQU (%R13),%YMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x20,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VGATHERQPD (%R9,%YMM7,8),%YMM5{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFNMADD231PD (%RDI),%YMM5,%YMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
JMP 12a8f <hypre_ParCSRRelaxThreads._omp_fn.1+0x3ff> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 12cb4 <hypre_ParCSRRelaxThreads._omp_fn.1+0x624> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM2,%XMM2,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 129fa <hypre_ParCSRRelaxThreads._omp_fn.1+0x36a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM1,%XMM1,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 12c5c <hypre_ParCSRRelaxThreads._omp_fn.1+0x5cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
VMOVSD %XMM2,%XMM2,%XMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 129ae <hypre_ParCSRRelaxThreads._omp_fn.1+0x31e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |