Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 2.2% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 2.2% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-8218/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x587870 PUSH %RBP |
0x587871 MOV %RSP,%RBP |
0x587874 PUSH %R15 |
0x587876 PUSH %R14 |
0x587878 PUSH %R13 |
0x58787a PUSH %R12 |
0x58787c PUSH %RBX |
0x58787d SUB $0x38,%RSP |
0x587881 MOV 0x28(%RDI),%RDX |
0x587885 MOV 0x10(%RDI),%RSI |
0x587889 MOV 0x20(%RDI),%R10 |
0x58788d MOV 0x30(%RDI),%RAX |
0x587891 MOV 0x18(%RDI),%R15 |
0x587895 MOV 0x40(%RDI),%R13 |
0x587899 MOV %RDX,-0x60(%RBP) |
0x58789d MOV 0x38(%RDI),%RBX |
0x5878a1 MOV 0x8(%RDI),%R14 |
0x5878a5 MOV %RSI,-0x50(%RBP) |
0x5878a9 MOV (%RDI),%RDI |
0x5878ac MOV %R10,-0x58(%RBP) |
0x5878b0 MOV %RAX,-0x40(%RBP) |
0x5878b4 MOV %RDI,-0x48(%RBP) |
0x5878b8 CALL 598830 <hypre_GetThreadNum> |
0x5878bd MOV %RAX,%R12 |
0x5878c0 CALL 4110b0 <omp_get_num_threads@plt> |
0x5878c5 CLTQ |
0x5878c7 MOV %RAX,-0x38(%RBP) |
0x5878cb CALL 4111f0 <omp_get_thread_num@plt> |
0x5878d0 MOV -0x48(%RBP),%R11 |
0x5878d4 MOV -0x50(%RBP),%RSI |
0x5878d8 MOVSXD %EAX,%R9 |
0x5878db MOV %R15,%RAX |
0x5878de MOV -0x58(%RBP),%R8 |
0x5878e2 CQTO |
0x5878e4 IDIVQ -0x38(%RBP) |
0x5878e8 CMP %RDX,%R9 |
0x5878eb MOV %RAX,%RCX |
0x5878ee JL 587d30 |
0x5878f4 MOV %RCX,%R10 |
0x5878f7 IMUL %R9,%R10 |
0x5878fb ADD %R10,%RDX |
0x5878fe ADD %RDX,%RCX |
0x587901 CMP %RCX,%RDX |
0x587904 JGE 587b8f |
0x58790a MOV -0x40(%RBP),%R15 |
0x58790e SAL $0x3,%RDX |
0x587912 LEA (%R8,%RCX,8),%RDI |
0x587916 ADD %RDX,%R14 |
0x587919 ADD %R8,%RDX |
0x58791c IMUL %R15,%R12 |
(3102) 0x587920 MOV (%R14),%RAX |
(3102) 0x587923 MOV 0x8(%R14),%R8 |
(3102) 0x587927 CMP %R8,%RAX |
(3102) 0x58792a JGE 587b7e |
(3102) 0x587930 MOV %R8,%RCX |
(3102) 0x587933 SUB %RAX,%RCX |
(3102) 0x587936 AND $0x7,%ECX |
(3102) 0x587939 JE 587a69 |
(3102) 0x58793f CMP $0x1,%RCX |
(3102) 0x587943 JE 587a3d |
(3102) 0x587949 CMP $0x2,%RCX |
(3102) 0x58794d JE 587a1c |
(3102) 0x587953 CMP $0x3,%RCX |
(3102) 0x587957 JE 5879f9 |
(3102) 0x58795d CMP $0x4,%RCX |
(3102) 0x587961 JE 5879d6 |
(3102) 0x587963 CMP $0x5,%RCX |
(3102) 0x587967 JE 5879b5 |
(3102) 0x587969 CMP $0x6,%RCX |
(3102) 0x58796d JE 587992 |
(3102) 0x58796f MOV (%RSI,%RAX,8),%R10 |
(3102) 0x587973 VMOVSD (%R11,%RAX,8),%XMM0 |
(3102) 0x587979 INC %RAX |
(3102) 0x58797c ADD %R12,%R10 |
(3102) 0x58797f LEA (%RBX,%R10,8),%R15 |
(3102) 0x587983 VMOVSD (%R15),%XMM7 |
(3102) 0x587988 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3102) 0x58798d VMOVSD %XMM0,(%R15) |
(3102) 0x587992 MOV (%RSI,%RAX,8),%RCX |
(3102) 0x587996 VMOVSD (%R11,%RAX,8),%XMM1 |
(3102) 0x58799c INC %RAX |
(3102) 0x58799f ADD %R12,%RCX |
(3102) 0x5879a2 LEA (%RBX,%RCX,8),%R10 |
(3102) 0x5879a6 VMOVSD (%R10),%XMM2 |
(3102) 0x5879ab VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3102) 0x5879b0 VMOVSD %XMM1,(%R10) |
(3102) 0x5879b5 MOV (%RSI,%RAX,8),%R15 |
(3102) 0x5879b9 VMOVSD (%R11,%RAX,8),%XMM3 |
(3102) 0x5879bf INC %RAX |
(3102) 0x5879c2 ADD %R12,%R15 |
(3102) 0x5879c5 LEA (%RBX,%R15,8),%RCX |
(3102) 0x5879c9 VMOVSD (%RCX),%XMM6 |
(3102) 0x5879cd VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3102) 0x5879d2 VMOVSD %XMM3,(%RCX) |
(3102) 0x5879d6 MOV (%RSI,%RAX,8),%R10 |
(3102) 0x5879da VMOVSD (%R11,%RAX,8),%XMM4 |
(3102) 0x5879e0 INC %RAX |
(3102) 0x5879e3 ADD %R12,%R10 |
(3102) 0x5879e6 LEA (%RBX,%R10,8),%R15 |
(3102) 0x5879ea VMOVSD (%R15),%XMM5 |
(3102) 0x5879ef VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3102) 0x5879f4 VMOVSD %XMM4,(%R15) |
(3102) 0x5879f9 MOV (%RSI,%RAX,8),%RCX |
(3102) 0x5879fd VMOVSD (%R11,%RAX,8),%XMM8 |
(3102) 0x587a03 INC %RAX |
(3102) 0x587a06 ADD %R12,%RCX |
(3102) 0x587a09 LEA (%RBX,%RCX,8),%R10 |
(3102) 0x587a0d VMOVSD (%R10),%XMM9 |
(3102) 0x587a12 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3102) 0x587a17 VMOVSD %XMM8,(%R10) |
(3102) 0x587a1c MOV (%RSI,%RAX,8),%R15 |
(3102) 0x587a20 VMOVSD (%R11,%RAX,8),%XMM10 |
(3102) 0x587a26 INC %RAX |
(3102) 0x587a29 ADD %R12,%R15 |
(3102) 0x587a2c LEA (%RBX,%R15,8),%RCX |
(3102) 0x587a30 VMOVSD (%RCX),%XMM11 |
(3102) 0x587a34 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3102) 0x587a39 VMOVSD %XMM10,(%RCX) |
(3102) 0x587a3d MOV (%RSI,%RAX,8),%R10 |
(3102) 0x587a41 VMOVSD (%R11,%RAX,8),%XMM12 |
(3102) 0x587a47 INC %RAX |
(3102) 0x587a4a ADD %R12,%R10 |
(3102) 0x587a4d LEA (%RBX,%R10,8),%R15 |
(3102) 0x587a51 VMOVSD (%R15),%XMM13 |
(3102) 0x587a56 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3102) 0x587a5b VMOVSD %XMM12,(%R15) |
(3102) 0x587a60 CMP %R8,%RAX |
(3102) 0x587a63 JE 587b7e |
(3103) 0x587a69 MOV (%RSI,%RAX,8),%RCX |
(3103) 0x587a6d VMOVSD (%R11,%RAX,8),%XMM14 |
(3103) 0x587a73 MOV 0x8(%RSI,%RAX,8),%R15 |
(3103) 0x587a78 ADD %R12,%RCX |
(3103) 0x587a7b LEA (%RBX,%RCX,8),%R10 |
(3103) 0x587a7f ADD %R12,%R15 |
(3103) 0x587a82 VMOVSD (%R10),%XMM15 |
(3103) 0x587a87 LEA (%RBX,%R15,8),%RCX |
(3103) 0x587a8b VFMADD132SD (%RDX),%XMM15,%XMM14 |
(3103) 0x587a90 VMOVSD %XMM14,(%R10) |
(3103) 0x587a95 MOV 0x10(%RSI,%RAX,8),%R10 |
(3103) 0x587a9a VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(3103) 0x587aa1 VMOVSD (%RCX),%XMM7 |
(3103) 0x587aa5 ADD %R12,%R10 |
(3103) 0x587aa8 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3103) 0x587aad LEA (%RBX,%R10,8),%R15 |
(3103) 0x587ab1 VMOVSD %XMM0,(%RCX) |
(3103) 0x587ab5 MOV 0x18(%RSI,%RAX,8),%RCX |
(3103) 0x587aba VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(3103) 0x587ac1 VMOVSD (%R15),%XMM2 |
(3103) 0x587ac6 ADD %R12,%RCX |
(3103) 0x587ac9 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3103) 0x587ace LEA (%RBX,%RCX,8),%R10 |
(3103) 0x587ad2 VMOVSD %XMM1,(%R15) |
(3103) 0x587ad7 MOV 0x20(%RSI,%RAX,8),%R15 |
(3103) 0x587adc VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(3103) 0x587ae3 VMOVSD (%R10),%XMM6 |
(3103) 0x587ae8 ADD %R12,%R15 |
(3103) 0x587aeb VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3103) 0x587af0 LEA (%RBX,%R15,8),%RCX |
(3103) 0x587af4 VMOVSD %XMM3,(%R10) |
(3103) 0x587af9 MOV 0x28(%RSI,%RAX,8),%R10 |
(3103) 0x587afe VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(3103) 0x587b05 VMOVSD (%RCX),%XMM5 |
(3103) 0x587b09 ADD %R12,%R10 |
(3103) 0x587b0c VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3103) 0x587b11 LEA (%RBX,%R10,8),%R15 |
(3103) 0x587b15 VMOVSD %XMM4,(%RCX) |
(3103) 0x587b19 MOV 0x30(%RSI,%RAX,8),%RCX |
(3103) 0x587b1e VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(3103) 0x587b25 VMOVSD (%R15),%XMM9 |
(3103) 0x587b2a ADD %R12,%RCX |
(3103) 0x587b2d VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3103) 0x587b32 LEA (%RBX,%RCX,8),%R10 |
(3103) 0x587b36 VMOVSD %XMM8,(%R15) |
(3103) 0x587b3b VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(3103) 0x587b42 VMOVSD (%R10),%XMM11 |
(3103) 0x587b47 MOV 0x38(%RSI,%RAX,8),%R15 |
(3103) 0x587b4c VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3103) 0x587b51 ADD %R12,%R15 |
(3103) 0x587b54 LEA (%RBX,%R15,8),%RCX |
(3103) 0x587b58 VMOVSD %XMM10,(%R10) |
(3103) 0x587b5d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(3103) 0x587b64 VMOVSD (%RCX),%XMM13 |
(3103) 0x587b68 ADD $0x8,%RAX |
(3103) 0x587b6c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3103) 0x587b71 VMOVSD %XMM12,(%RCX) |
(3103) 0x587b75 CMP %R8,%RAX |
(3103) 0x587b78 JNE 587a69 |
(3102) 0x587b7e ADD $0x8,%RDX |
(3102) 0x587b82 ADD $0x8,%R14 |
(3102) 0x587b86 CMP %RDX,%RDI |
(3102) 0x587b89 JNE 587920 |
0x587b8f MOV %R9,-0x48(%RBP) |
0x587b93 CALL 411290 <GOMP_barrier@plt> |
0x587b98 MOV -0x40(%RBP),%RAX |
0x587b9c MOV -0x48(%RBP),%R14 |
0x587ba0 CQTO |
0x587ba2 IDIVQ -0x38(%RBP) |
0x587ba6 CMP %RDX,%R14 |
0x587ba9 JL 587d26 |
0x587baf IMUL %RAX,%R14 |
0x587bb3 ADD %R14,%RDX |
0x587bb6 ADD %RDX,%RAX |
0x587bb9 CMP %RAX,%RDX |
0x587bbc JGE 587d13 |
0x587bc2 TEST %R13,%R13 |
0x587bc5 JLE 587d13 |
0x587bcb MOV -0x60(%RBP),%R11 |
0x587bcf LEA (,%RDX,8),%R12 |
0x587bd7 ADD %R12,%R11 |
0x587bda ADD %RBX,%R12 |
0x587bdd MOV -0x40(%RBP),%RBX |
0x587be1 SAL $0x3,%RBX |
0x587be5 NOPL (%RAX) |
(3101) 0x587be8 MOV %R13,%R9 |
(3101) 0x587beb VMOVSD (%R11),%XMM14 |
(3101) 0x587bf0 MOV %R12,%RDI |
(3101) 0x587bf3 XOR %ESI,%ESI |
(3101) 0x587bf5 AND $0x7,%R9D |
(3101) 0x587bf9 JE 587c96 |
(3101) 0x587bff CMP $0x1,%R9 |
(3101) 0x587c03 JE 587c82 |
(3101) 0x587c05 CMP $0x2,%R9 |
(3101) 0x587c09 JE 587c73 |
(3101) 0x587c0b CMP $0x3,%R9 |
(3101) 0x587c0f JE 587c64 |
(3101) 0x587c11 CMP $0x4,%R9 |
(3101) 0x587c15 JE 587c55 |
(3101) 0x587c17 CMP $0x5,%R9 |
(3101) 0x587c1b JE 587c46 |
(3101) 0x587c1d CMP $0x6,%R9 |
(3101) 0x587c21 JE 587c37 |
(3101) 0x587c23 VADDSD (%R12),%XMM14,%XMM14 |
(3101) 0x587c29 MOV $0x1,%ESI |
(3101) 0x587c2e LEA (%R12,%RBX,1),%RDI |
(3101) 0x587c32 VMOVSD %XMM14,(%R11) |
(3101) 0x587c37 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c3b INC %RSI |
(3101) 0x587c3e ADD %RBX,%RDI |
(3101) 0x587c41 VMOVSD %XMM14,(%R11) |
(3101) 0x587c46 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c4a INC %RSI |
(3101) 0x587c4d ADD %RBX,%RDI |
(3101) 0x587c50 VMOVSD %XMM14,(%R11) |
(3101) 0x587c55 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c59 INC %RSI |
(3101) 0x587c5c ADD %RBX,%RDI |
(3101) 0x587c5f VMOVSD %XMM14,(%R11) |
(3101) 0x587c64 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c68 INC %RSI |
(3101) 0x587c6b ADD %RBX,%RDI |
(3101) 0x587c6e VMOVSD %XMM14,(%R11) |
(3101) 0x587c73 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c77 INC %RSI |
(3101) 0x587c7a ADD %RBX,%RDI |
(3101) 0x587c7d VMOVSD %XMM14,(%R11) |
(3101) 0x587c82 VADDSD (%RDI),%XMM14,%XMM14 |
(3101) 0x587c86 INC %RSI |
(3101) 0x587c89 ADD %RBX,%RDI |
(3101) 0x587c8c VMOVSD %XMM14,(%R11) |
(3101) 0x587c91 CMP %RSI,%R13 |
(3101) 0x587c94 JE 587cff |
(3100) 0x587c96 VADDSD (%RDI),%XMM14,%XMM15 |
(3100) 0x587c9a ADD %RBX,%RDI |
(3100) 0x587c9d ADD $0x8,%RSI |
(3100) 0x587ca1 VMOVSD %XMM15,(%R11) |
(3100) 0x587ca6 VADDSD (%RDI),%XMM15,%XMM0 |
(3100) 0x587caa ADD %RBX,%RDI |
(3100) 0x587cad VMOVSD %XMM0,(%R11) |
(3100) 0x587cb2 VADDSD (%RDI),%XMM0,%XMM7 |
(3100) 0x587cb6 ADD %RBX,%RDI |
(3100) 0x587cb9 VMOVSD %XMM7,(%R11) |
(3100) 0x587cbe VADDSD (%RDI),%XMM7,%XMM1 |
(3100) 0x587cc2 ADD %RBX,%RDI |
(3100) 0x587cc5 VMOVSD %XMM1,(%R11) |
(3100) 0x587cca VADDSD (%RDI),%XMM1,%XMM2 |
(3100) 0x587cce ADD %RBX,%RDI |
(3100) 0x587cd1 VMOVSD %XMM2,(%R11) |
(3100) 0x587cd6 VADDSD (%RDI),%XMM2,%XMM3 |
(3100) 0x587cda ADD %RBX,%RDI |
(3100) 0x587cdd VMOVSD %XMM3,(%R11) |
(3100) 0x587ce2 VADDSD (%RDI),%XMM3,%XMM6 |
(3100) 0x587ce6 ADD %RBX,%RDI |
(3100) 0x587ce9 VMOVSD %XMM6,(%R11) |
(3100) 0x587cee VADDSD (%RDI),%XMM6,%XMM14 |
(3100) 0x587cf2 ADD %RBX,%RDI |
(3100) 0x587cf5 VMOVSD %XMM14,(%R11) |
(3100) 0x587cfa CMP %RSI,%R13 |
(3100) 0x587cfd JNE 587c96 |
(3101) 0x587cff INC %RDX |
(3101) 0x587d02 ADD $0x8,%R11 |
(3101) 0x587d06 ADD $0x8,%R12 |
(3101) 0x587d0a CMP %RDX,%RAX |
(3101) 0x587d0d JNE 587be8 |
0x587d13 ADD $0x38,%RSP |
0x587d17 POP %RBX |
0x587d18 POP %R12 |
0x587d1a POP %R13 |
0x587d1c POP %R14 |
0x587d1e POP %R15 |
0x587d20 POP %RBP |
0x587d21 JMP 411290 |
0x587d26 INC %RAX |
0x587d29 XOR %EDX,%EDX |
0x587d2b JMP 587baf |
0x587d30 INC %RCX |
0x587d33 XOR %EDX,%EDX |
0x587d35 JMP 5878f4 |
0x587d3a NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 598830 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 587d30 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 587b8f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 587d26 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 587d13 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 587d13 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 587baf <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5878f4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 598830 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 587d30 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 587b8f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 587d26 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 587d13 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 587d13 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 587baf <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5878f4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 2.2 | 0.47 |
▼Loop 3102 - csr_matvec.c:564-567 - exec– | 2.09 | 0.32 |
○Loop 3103 - csr_matvec.c:564-567 - exec | 0.02 | 0 |
▼Loop 3101 - csr_matvec.c:577-579 - exec– | 0 | 0 |
○Loop 3100 - csr_matvec.c:577-579 - exec | 0.09 | 0.01 |