Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 3.55% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 3.55% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-8217/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x581e30 PUSH %RBP |
0x581e31 MOV %RSP,%RBP |
0x581e34 PUSH %R15 |
0x581e36 PUSH %R14 |
0x581e38 PUSH %R13 |
0x581e3a PUSH %R12 |
0x581e3c PUSH %RBX |
0x581e3d SUB $0x38,%RSP |
0x581e41 MOV 0x28(%RDI),%RDX |
0x581e45 MOV 0x10(%RDI),%RSI |
0x581e49 MOV 0x20(%RDI),%R10 |
0x581e4d MOV 0x30(%RDI),%RAX |
0x581e51 MOV 0x18(%RDI),%R15 |
0x581e55 MOV 0x40(%RDI),%R13 |
0x581e59 MOV %RDX,-0x60(%RBP) |
0x581e5d MOV 0x38(%RDI),%RBX |
0x581e61 MOV 0x8(%RDI),%R14 |
0x581e65 MOV %RSI,-0x50(%RBP) |
0x581e69 MOV (%RDI),%RDI |
0x581e6c MOV %R10,-0x58(%RBP) |
0x581e70 MOV %RAX,-0x40(%RBP) |
0x581e74 MOV %RDI,-0x48(%RBP) |
0x581e78 CALL 592810 <hypre_GetThreadNum> |
0x581e7d MOV %RAX,%R12 |
0x581e80 CALL 4110b0 <omp_get_num_threads@plt> |
0x581e85 CLTQ |
0x581e87 MOV %RAX,-0x38(%RBP) |
0x581e8b CALL 4111f0 <omp_get_thread_num@plt> |
0x581e90 MOV -0x48(%RBP),%R11 |
0x581e94 MOV -0x50(%RBP),%RSI |
0x581e98 MOVSXD %EAX,%R9 |
0x581e9b MOV %R15,%RAX |
0x581e9e MOV -0x58(%RBP),%R8 |
0x581ea2 CQTO |
0x581ea4 IDIVQ -0x38(%RBP) |
0x581ea8 CMP %RDX,%R9 |
0x581eab MOV %RAX,%RCX |
0x581eae JL 5822f0 |
0x581eb4 MOV %RCX,%R10 |
0x581eb7 IMUL %R9,%R10 |
0x581ebb ADD %R10,%RDX |
0x581ebe ADD %RDX,%RCX |
0x581ec1 CMP %RCX,%RDX |
0x581ec4 JGE 58214f |
0x581eca MOV -0x40(%RBP),%R15 |
0x581ece SAL $0x3,%RDX |
0x581ed2 LEA (%R8,%RCX,8),%RDI |
0x581ed6 ADD %RDX,%R14 |
0x581ed9 ADD %R8,%RDX |
0x581edc IMUL %R15,%R12 |
(3098) 0x581ee0 MOV (%R14),%RAX |
(3098) 0x581ee3 MOV 0x8(%R14),%R8 |
(3098) 0x581ee7 CMP %R8,%RAX |
(3098) 0x581eea JGE 58213e |
(3098) 0x581ef0 MOV %R8,%RCX |
(3098) 0x581ef3 SUB %RAX,%RCX |
(3098) 0x581ef6 AND $0x7,%ECX |
(3098) 0x581ef9 JE 582029 |
(3098) 0x581eff CMP $0x1,%RCX |
(3098) 0x581f03 JE 581ffd |
(3098) 0x581f09 CMP $0x2,%RCX |
(3098) 0x581f0d JE 581fdc |
(3098) 0x581f13 CMP $0x3,%RCX |
(3098) 0x581f17 JE 581fb9 |
(3098) 0x581f1d CMP $0x4,%RCX |
(3098) 0x581f21 JE 581f96 |
(3098) 0x581f23 CMP $0x5,%RCX |
(3098) 0x581f27 JE 581f75 |
(3098) 0x581f29 CMP $0x6,%RCX |
(3098) 0x581f2d JE 581f52 |
(3098) 0x581f2f MOV (%RSI,%RAX,8),%R10 |
(3098) 0x581f33 VMOVSD (%R11,%RAX,8),%XMM0 |
(3098) 0x581f39 INC %RAX |
(3098) 0x581f3c ADD %R12,%R10 |
(3098) 0x581f3f LEA (%RBX,%R10,8),%R15 |
(3098) 0x581f43 VMOVSD (%R15),%XMM7 |
(3098) 0x581f48 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3098) 0x581f4d VMOVSD %XMM0,(%R15) |
(3098) 0x581f52 MOV (%RSI,%RAX,8),%RCX |
(3098) 0x581f56 VMOVSD (%R11,%RAX,8),%XMM1 |
(3098) 0x581f5c INC %RAX |
(3098) 0x581f5f ADD %R12,%RCX |
(3098) 0x581f62 LEA (%RBX,%RCX,8),%R10 |
(3098) 0x581f66 VMOVSD (%R10),%XMM2 |
(3098) 0x581f6b VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3098) 0x581f70 VMOVSD %XMM1,(%R10) |
(3098) 0x581f75 MOV (%RSI,%RAX,8),%R15 |
(3098) 0x581f79 VMOVSD (%R11,%RAX,8),%XMM3 |
(3098) 0x581f7f INC %RAX |
(3098) 0x581f82 ADD %R12,%R15 |
(3098) 0x581f85 LEA (%RBX,%R15,8),%RCX |
(3098) 0x581f89 VMOVSD (%RCX),%XMM6 |
(3098) 0x581f8d VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3098) 0x581f92 VMOVSD %XMM3,(%RCX) |
(3098) 0x581f96 MOV (%RSI,%RAX,8),%R10 |
(3098) 0x581f9a VMOVSD (%R11,%RAX,8),%XMM4 |
(3098) 0x581fa0 INC %RAX |
(3098) 0x581fa3 ADD %R12,%R10 |
(3098) 0x581fa6 LEA (%RBX,%R10,8),%R15 |
(3098) 0x581faa VMOVSD (%R15),%XMM5 |
(3098) 0x581faf VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3098) 0x581fb4 VMOVSD %XMM4,(%R15) |
(3098) 0x581fb9 MOV (%RSI,%RAX,8),%RCX |
(3098) 0x581fbd VMOVSD (%R11,%RAX,8),%XMM8 |
(3098) 0x581fc3 INC %RAX |
(3098) 0x581fc6 ADD %R12,%RCX |
(3098) 0x581fc9 LEA (%RBX,%RCX,8),%R10 |
(3098) 0x581fcd VMOVSD (%R10),%XMM9 |
(3098) 0x581fd2 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3098) 0x581fd7 VMOVSD %XMM8,(%R10) |
(3098) 0x581fdc MOV (%RSI,%RAX,8),%R15 |
(3098) 0x581fe0 VMOVSD (%R11,%RAX,8),%XMM10 |
(3098) 0x581fe6 INC %RAX |
(3098) 0x581fe9 ADD %R12,%R15 |
(3098) 0x581fec LEA (%RBX,%R15,8),%RCX |
(3098) 0x581ff0 VMOVSD (%RCX),%XMM11 |
(3098) 0x581ff4 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3098) 0x581ff9 VMOVSD %XMM10,(%RCX) |
(3098) 0x581ffd MOV (%RSI,%RAX,8),%R10 |
(3098) 0x582001 VMOVSD (%R11,%RAX,8),%XMM12 |
(3098) 0x582007 INC %RAX |
(3098) 0x58200a ADD %R12,%R10 |
(3098) 0x58200d LEA (%RBX,%R10,8),%R15 |
(3098) 0x582011 VMOVSD (%R15),%XMM13 |
(3098) 0x582016 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3098) 0x58201b VMOVSD %XMM12,(%R15) |
(3098) 0x582020 CMP %R8,%RAX |
(3098) 0x582023 JE 58213e |
(3099) 0x582029 MOV (%RSI,%RAX,8),%RCX |
(3099) 0x58202d VMOVSD (%R11,%RAX,8),%XMM14 |
(3099) 0x582033 MOV 0x8(%RSI,%RAX,8),%R15 |
(3099) 0x582038 ADD %R12,%RCX |
(3099) 0x58203b LEA (%RBX,%RCX,8),%R10 |
(3099) 0x58203f ADD %R12,%R15 |
(3099) 0x582042 VMOVSD (%R10),%XMM15 |
(3099) 0x582047 VFMADD132SD (%RDX),%XMM15,%XMM14 |
(3099) 0x58204c LEA (%RBX,%R15,8),%RCX |
(3099) 0x582050 VMOVSD %XMM14,(%R10) |
(3099) 0x582055 VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(3099) 0x58205c VMOVSD (%RCX),%XMM7 |
(3099) 0x582060 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3099) 0x582065 MOV 0x10(%RSI,%RAX,8),%R10 |
(3099) 0x58206a ADD %R12,%R10 |
(3099) 0x58206d LEA (%RBX,%R10,8),%R15 |
(3099) 0x582071 VMOVSD %XMM0,(%RCX) |
(3099) 0x582075 VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(3099) 0x58207c VMOVSD (%R15),%XMM2 |
(3099) 0x582081 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3099) 0x582086 MOV 0x18(%RSI,%RAX,8),%RCX |
(3099) 0x58208b ADD %R12,%RCX |
(3099) 0x58208e LEA (%RBX,%RCX,8),%R10 |
(3099) 0x582092 VMOVSD %XMM1,(%R15) |
(3099) 0x582097 VMOVSD (%R10),%XMM6 |
(3099) 0x58209c MOV 0x20(%RSI,%RAX,8),%R15 |
(3099) 0x5820a1 VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(3099) 0x5820a8 VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3099) 0x5820ad ADD %R12,%R15 |
(3099) 0x5820b0 LEA (%RBX,%R15,8),%RCX |
(3099) 0x5820b4 VMOVSD %XMM3,(%R10) |
(3099) 0x5820b9 VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(3099) 0x5820c0 VMOVSD (%RCX),%XMM5 |
(3099) 0x5820c4 VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3099) 0x5820c9 MOV 0x28(%RSI,%RAX,8),%R10 |
(3099) 0x5820ce ADD %R12,%R10 |
(3099) 0x5820d1 LEA (%RBX,%R10,8),%R15 |
(3099) 0x5820d5 VMOVSD %XMM4,(%RCX) |
(3099) 0x5820d9 VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(3099) 0x5820e0 VMOVSD (%R15),%XMM9 |
(3099) 0x5820e5 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3099) 0x5820ea MOV 0x30(%RSI,%RAX,8),%RCX |
(3099) 0x5820ef ADD %R12,%RCX |
(3099) 0x5820f2 LEA (%RBX,%RCX,8),%R10 |
(3099) 0x5820f6 VMOVSD %XMM8,(%R15) |
(3099) 0x5820fb VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(3099) 0x582102 VMOVSD (%R10),%XMM11 |
(3099) 0x582107 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3099) 0x58210c MOV 0x38(%RSI,%RAX,8),%R15 |
(3099) 0x582111 ADD %R12,%R15 |
(3099) 0x582114 LEA (%RBX,%R15,8),%RCX |
(3099) 0x582118 VMOVSD %XMM10,(%R10) |
(3099) 0x58211d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(3099) 0x582124 VMOVSD (%RCX),%XMM13 |
(3099) 0x582128 ADD $0x8,%RAX |
(3099) 0x58212c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3099) 0x582131 VMOVSD %XMM12,(%RCX) |
(3099) 0x582135 CMP %R8,%RAX |
(3099) 0x582138 JNE 582029 |
(3098) 0x58213e ADD $0x8,%RDX |
(3098) 0x582142 ADD $0x8,%R14 |
(3098) 0x582146 CMP %RDX,%RDI |
(3098) 0x582149 JNE 581ee0 |
0x58214f MOV %R9,-0x48(%RBP) |
0x582153 CALL 411290 <GOMP_barrier@plt> |
0x582158 MOV -0x40(%RBP),%RAX |
0x58215c MOV -0x48(%RBP),%R14 |
0x582160 CQTO |
0x582162 IDIVQ -0x38(%RBP) |
0x582166 CMP %RDX,%R14 |
0x582169 JL 5822e6 |
0x58216f IMUL %RAX,%R14 |
0x582173 ADD %R14,%RDX |
0x582176 ADD %RDX,%RAX |
0x582179 CMP %RAX,%RDX |
0x58217c JGE 5822d3 |
0x582182 TEST %R13,%R13 |
0x582185 JLE 5822d3 |
0x58218b MOV -0x60(%RBP),%R11 |
0x58218f LEA (,%RDX,8),%R12 |
0x582197 ADD %R12,%R11 |
0x58219a ADD %RBX,%R12 |
0x58219d MOV -0x40(%RBP),%RBX |
0x5821a1 SAL $0x3,%RBX |
0x5821a5 NOPL (%RAX) |
(3097) 0x5821a8 MOV %R13,%R9 |
(3097) 0x5821ab VMOVSD (%R11),%XMM14 |
(3097) 0x5821b0 MOV %R12,%RDI |
(3097) 0x5821b3 XOR %ESI,%ESI |
(3097) 0x5821b5 AND $0x7,%R9D |
(3097) 0x5821b9 JE 582256 |
(3097) 0x5821bf CMP $0x1,%R9 |
(3097) 0x5821c3 JE 582242 |
(3097) 0x5821c5 CMP $0x2,%R9 |
(3097) 0x5821c9 JE 582233 |
(3097) 0x5821cb CMP $0x3,%R9 |
(3097) 0x5821cf JE 582224 |
(3097) 0x5821d1 CMP $0x4,%R9 |
(3097) 0x5821d5 JE 582215 |
(3097) 0x5821d7 CMP $0x5,%R9 |
(3097) 0x5821db JE 582206 |
(3097) 0x5821dd CMP $0x6,%R9 |
(3097) 0x5821e1 JE 5821f7 |
(3097) 0x5821e3 VADDSD (%R12),%XMM14,%XMM14 |
(3097) 0x5821e9 MOV $0x1,%ESI |
(3097) 0x5821ee LEA (%R12,%RBX,1),%RDI |
(3097) 0x5821f2 VMOVSD %XMM14,(%R11) |
(3097) 0x5821f7 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x5821fb INC %RSI |
(3097) 0x5821fe ADD %RBX,%RDI |
(3097) 0x582201 VMOVSD %XMM14,(%R11) |
(3097) 0x582206 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x58220a INC %RSI |
(3097) 0x58220d ADD %RBX,%RDI |
(3097) 0x582210 VMOVSD %XMM14,(%R11) |
(3097) 0x582215 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x582219 INC %RSI |
(3097) 0x58221c ADD %RBX,%RDI |
(3097) 0x58221f VMOVSD %XMM14,(%R11) |
(3097) 0x582224 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x582228 INC %RSI |
(3097) 0x58222b ADD %RBX,%RDI |
(3097) 0x58222e VMOVSD %XMM14,(%R11) |
(3097) 0x582233 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x582237 INC %RSI |
(3097) 0x58223a ADD %RBX,%RDI |
(3097) 0x58223d VMOVSD %XMM14,(%R11) |
(3097) 0x582242 VADDSD (%RDI),%XMM14,%XMM14 |
(3097) 0x582246 INC %RSI |
(3097) 0x582249 ADD %RBX,%RDI |
(3097) 0x58224c VMOVSD %XMM14,(%R11) |
(3097) 0x582251 CMP %RSI,%R13 |
(3097) 0x582254 JE 5822bf |
(3096) 0x582256 VADDSD (%RDI),%XMM14,%XMM15 |
(3096) 0x58225a ADD %RBX,%RDI |
(3096) 0x58225d ADD $0x8,%RSI |
(3096) 0x582261 VMOVSD %XMM15,(%R11) |
(3096) 0x582266 VADDSD (%RDI),%XMM15,%XMM0 |
(3096) 0x58226a ADD %RBX,%RDI |
(3096) 0x58226d VMOVSD %XMM0,(%R11) |
(3096) 0x582272 VADDSD (%RDI),%XMM0,%XMM7 |
(3096) 0x582276 ADD %RBX,%RDI |
(3096) 0x582279 VMOVSD %XMM7,(%R11) |
(3096) 0x58227e VADDSD (%RDI),%XMM7,%XMM1 |
(3096) 0x582282 ADD %RBX,%RDI |
(3096) 0x582285 VMOVSD %XMM1,(%R11) |
(3096) 0x58228a VADDSD (%RDI),%XMM1,%XMM2 |
(3096) 0x58228e ADD %RBX,%RDI |
(3096) 0x582291 VMOVSD %XMM2,(%R11) |
(3096) 0x582296 VADDSD (%RDI),%XMM2,%XMM3 |
(3096) 0x58229a ADD %RBX,%RDI |
(3096) 0x58229d VMOVSD %XMM3,(%R11) |
(3096) 0x5822a2 VADDSD (%RDI),%XMM3,%XMM6 |
(3096) 0x5822a6 ADD %RBX,%RDI |
(3096) 0x5822a9 VMOVSD %XMM6,(%R11) |
(3096) 0x5822ae VADDSD (%RDI),%XMM6,%XMM14 |
(3096) 0x5822b2 ADD %RBX,%RDI |
(3096) 0x5822b5 VMOVSD %XMM14,(%R11) |
(3096) 0x5822ba CMP %RSI,%R13 |
(3096) 0x5822bd JNE 582256 |
(3097) 0x5822bf INC %RDX |
(3097) 0x5822c2 ADD $0x8,%R11 |
(3097) 0x5822c6 ADD $0x8,%R12 |
(3097) 0x5822ca CMP %RDX,%RAX |
(3097) 0x5822cd JNE 5821a8 |
0x5822d3 ADD $0x38,%RSP |
0x5822d7 POP %RBX |
0x5822d8 POP %R12 |
0x5822da POP %R13 |
0x5822dc POP %R14 |
0x5822de POP %R15 |
0x5822e0 POP %RBP |
0x5822e1 JMP 411290 |
0x5822e6 INC %RAX |
0x5822e9 XOR %EDX,%EDX |
0x5822eb JMP 58216f |
0x5822f0 INC %RCX |
0x5822f3 XOR %EDX,%EDX |
0x5822f5 JMP 581eb4 |
0x5822fa NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 592810 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5822f0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 58214f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5822e6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5822d3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5822d3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 58216f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 581eb4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 592810 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5822f0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 58214f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5822e6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5822d3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5822d3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 58216f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 581eb4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 3.55 | 0.54 |
▼Loop 3098 - csr_matvec.c:564-567 - exec– | 3.37 | 0.32 |
○Loop 3099 - csr_matvec.c:564-567 - exec | 0.04 | 0 |
▼Loop 3097 - csr_matvec.c:577-579 - exec– | 0.01 | 0 |
○Loop 3096 - csr_matvec.c:577-579 - exec | 0.12 | 0.01 |