Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 1.53% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 1.53% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-859-5251/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x53e160 PUSH %RBP |
0x53e161 MOV %RSP,%RBP |
0x53e164 PUSH %R15 |
0x53e166 PUSH %R14 |
0x53e168 PUSH %R13 |
0x53e16a PUSH %R12 |
0x53e16c PUSH %RBX |
0x53e16d SUB $0x38,%RSP |
0x53e171 MOV 0x28(%RDI),%RDX |
0x53e175 MOV 0x10(%RDI),%RSI |
0x53e179 MOV 0x20(%RDI),%R10 |
0x53e17d MOV 0x30(%RDI),%RAX |
0x53e181 MOV 0x18(%RDI),%R15 |
0x53e185 MOV 0x40(%RDI),%R13 |
0x53e189 MOV %RDX,-0x60(%RBP) |
0x53e18d MOV 0x38(%RDI),%RBX |
0x53e191 MOV 0x8(%RDI),%R14 |
0x53e195 MOV %RSI,-0x50(%RBP) |
0x53e199 MOV (%RDI),%RDI |
0x53e19c MOV %R10,-0x58(%RBP) |
0x53e1a0 MOV %RAX,-0x40(%RBP) |
0x53e1a4 MOV %RDI,-0x48(%RBP) |
0x53e1a8 CALL 54d120 <hypre_GetThreadNum> |
0x53e1ad MOV %RAX,%R12 |
0x53e1b0 CALL 4110b0 <omp_get_num_threads@plt> |
0x53e1b5 CLTQ |
0x53e1b7 MOV %RAX,-0x38(%RBP) |
0x53e1bb CALL 4111f0 <omp_get_thread_num@plt> |
0x53e1c0 MOV -0x48(%RBP),%R11 |
0x53e1c4 MOV -0x50(%RBP),%RSI |
0x53e1c8 MOVSXD %EAX,%R9 |
0x53e1cb MOV %R15,%RAX |
0x53e1ce MOV -0x58(%RBP),%R8 |
0x53e1d2 CQTO |
0x53e1d4 IDIVQ -0x38(%RBP) |
0x53e1d8 CMP %RDX,%R9 |
0x53e1db MOV %RAX,%RCX |
0x53e1de JL 53e620 |
0x53e1e4 MOV %RCX,%R10 |
0x53e1e7 IMUL %R9,%R10 |
0x53e1eb ADD %R10,%RDX |
0x53e1ee ADD %RDX,%RCX |
0x53e1f1 CMP %RCX,%RDX |
0x53e1f4 JGE 53e47f |
0x53e1fa MOV -0x40(%RBP),%R15 |
0x53e1fe SAL $0x3,%RDX |
0x53e202 LEA (%R8,%RCX,8),%RDI |
0x53e206 ADD %RDX,%R14 |
0x53e209 ADD %R8,%RDX |
0x53e20c IMUL %R15,%R12 |
(2917) 0x53e210 MOV (%R14),%RAX |
(2917) 0x53e213 MOV 0x8(%R14),%R8 |
(2917) 0x53e217 CMP %R8,%RAX |
(2917) 0x53e21a JGE 53e46e |
(2917) 0x53e220 MOV %R8,%RCX |
(2917) 0x53e223 SUB %RAX,%RCX |
(2917) 0x53e226 AND $0x7,%ECX |
(2917) 0x53e229 JE 53e359 |
(2917) 0x53e22f CMP $0x1,%RCX |
(2917) 0x53e233 JE 53e32d |
(2917) 0x53e239 CMP $0x2,%RCX |
(2917) 0x53e23d JE 53e30c |
(2917) 0x53e243 CMP $0x3,%RCX |
(2917) 0x53e247 JE 53e2e9 |
(2917) 0x53e24d CMP $0x4,%RCX |
(2917) 0x53e251 JE 53e2c6 |
(2917) 0x53e253 CMP $0x5,%RCX |
(2917) 0x53e257 JE 53e2a5 |
(2917) 0x53e259 CMP $0x6,%RCX |
(2917) 0x53e25d JE 53e282 |
(2917) 0x53e25f MOV (%RSI,%RAX,8),%R10 |
(2917) 0x53e263 VMOVSD (%R11,%RAX,8),%XMM0 |
(2917) 0x53e269 INC %RAX |
(2917) 0x53e26c ADD %R12,%R10 |
(2917) 0x53e26f LEA (%RBX,%R10,8),%R15 |
(2917) 0x53e273 VMOVSD (%R15),%XMM7 |
(2917) 0x53e278 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(2917) 0x53e27d VMOVSD %XMM0,(%R15) |
(2917) 0x53e282 MOV (%RSI,%RAX,8),%RCX |
(2917) 0x53e286 VMOVSD (%R11,%RAX,8),%XMM1 |
(2917) 0x53e28c INC %RAX |
(2917) 0x53e28f ADD %R12,%RCX |
(2917) 0x53e292 LEA (%RBX,%RCX,8),%R10 |
(2917) 0x53e296 VMOVSD (%R10),%XMM2 |
(2917) 0x53e29b VFMADD132SD (%RDX),%XMM2,%XMM1 |
(2917) 0x53e2a0 VMOVSD %XMM1,(%R10) |
(2917) 0x53e2a5 MOV (%RSI,%RAX,8),%R15 |
(2917) 0x53e2a9 VMOVSD (%R11,%RAX,8),%XMM3 |
(2917) 0x53e2af INC %RAX |
(2917) 0x53e2b2 ADD %R12,%R15 |
(2917) 0x53e2b5 LEA (%RBX,%R15,8),%RCX |
(2917) 0x53e2b9 VMOVSD (%RCX),%XMM6 |
(2917) 0x53e2bd VFMADD132SD (%RDX),%XMM6,%XMM3 |
(2917) 0x53e2c2 VMOVSD %XMM3,(%RCX) |
(2917) 0x53e2c6 MOV (%RSI,%RAX,8),%R10 |
(2917) 0x53e2ca VMOVSD (%R11,%RAX,8),%XMM4 |
(2917) 0x53e2d0 INC %RAX |
(2917) 0x53e2d3 ADD %R12,%R10 |
(2917) 0x53e2d6 LEA (%RBX,%R10,8),%R15 |
(2917) 0x53e2da VMOVSD (%R15),%XMM5 |
(2917) 0x53e2df VFMADD132SD (%RDX),%XMM5,%XMM4 |
(2917) 0x53e2e4 VMOVSD %XMM4,(%R15) |
(2917) 0x53e2e9 MOV (%RSI,%RAX,8),%RCX |
(2917) 0x53e2ed VMOVSD (%R11,%RAX,8),%XMM8 |
(2917) 0x53e2f3 INC %RAX |
(2917) 0x53e2f6 ADD %R12,%RCX |
(2917) 0x53e2f9 LEA (%RBX,%RCX,8),%R10 |
(2917) 0x53e2fd VMOVSD (%R10),%XMM9 |
(2917) 0x53e302 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(2917) 0x53e307 VMOVSD %XMM8,(%R10) |
(2917) 0x53e30c MOV (%RSI,%RAX,8),%R15 |
(2917) 0x53e310 VMOVSD (%R11,%RAX,8),%XMM10 |
(2917) 0x53e316 INC %RAX |
(2917) 0x53e319 ADD %R12,%R15 |
(2917) 0x53e31c LEA (%RBX,%R15,8),%RCX |
(2917) 0x53e320 VMOVSD (%RCX),%XMM11 |
(2917) 0x53e324 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(2917) 0x53e329 VMOVSD %XMM10,(%RCX) |
(2917) 0x53e32d MOV (%RSI,%RAX,8),%R10 |
(2917) 0x53e331 VMOVSD (%R11,%RAX,8),%XMM12 |
(2917) 0x53e337 INC %RAX |
(2917) 0x53e33a ADD %R12,%R10 |
(2917) 0x53e33d LEA (%RBX,%R10,8),%R15 |
(2917) 0x53e341 VMOVSD (%R15),%XMM13 |
(2917) 0x53e346 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(2917) 0x53e34b VMOVSD %XMM12,(%R15) |
(2917) 0x53e350 CMP %R8,%RAX |
(2917) 0x53e353 JE 53e46e |
(2918) 0x53e359 MOV (%RSI,%RAX,8),%RCX |
(2918) 0x53e35d VMOVSD (%R11,%RAX,8),%XMM14 |
(2918) 0x53e363 MOV 0x8(%RSI,%RAX,8),%R15 |
(2918) 0x53e368 ADD %R12,%RCX |
(2918) 0x53e36b LEA (%RBX,%RCX,8),%R10 |
(2918) 0x53e36f ADD %R12,%R15 |
(2918) 0x53e372 VMOVSD (%R10),%XMM15 |
(2918) 0x53e377 VFMADD132SD (%RDX),%XMM15,%XMM14 |
(2918) 0x53e37c LEA (%RBX,%R15,8),%RCX |
(2918) 0x53e380 VMOVSD %XMM14,(%R10) |
(2918) 0x53e385 VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(2918) 0x53e38c VMOVSD (%RCX),%XMM7 |
(2918) 0x53e390 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(2918) 0x53e395 MOV 0x10(%RSI,%RAX,8),%R10 |
(2918) 0x53e39a ADD %R12,%R10 |
(2918) 0x53e39d LEA (%RBX,%R10,8),%R15 |
(2918) 0x53e3a1 VMOVSD %XMM0,(%RCX) |
(2918) 0x53e3a5 VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(2918) 0x53e3ac VMOVSD (%R15),%XMM2 |
(2918) 0x53e3b1 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(2918) 0x53e3b6 MOV 0x18(%RSI,%RAX,8),%RCX |
(2918) 0x53e3bb ADD %R12,%RCX |
(2918) 0x53e3be LEA (%RBX,%RCX,8),%R10 |
(2918) 0x53e3c2 VMOVSD %XMM1,(%R15) |
(2918) 0x53e3c7 VMOVSD (%R10),%XMM6 |
(2918) 0x53e3cc MOV 0x20(%RSI,%RAX,8),%R15 |
(2918) 0x53e3d1 VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(2918) 0x53e3d8 VFMADD132SD (%RDX),%XMM6,%XMM3 |
(2918) 0x53e3dd ADD %R12,%R15 |
(2918) 0x53e3e0 LEA (%RBX,%R15,8),%RCX |
(2918) 0x53e3e4 VMOVSD %XMM3,(%R10) |
(2918) 0x53e3e9 VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(2918) 0x53e3f0 VMOVSD (%RCX),%XMM5 |
(2918) 0x53e3f4 VFMADD132SD (%RDX),%XMM5,%XMM4 |
(2918) 0x53e3f9 MOV 0x28(%RSI,%RAX,8),%R10 |
(2918) 0x53e3fe ADD %R12,%R10 |
(2918) 0x53e401 LEA (%RBX,%R10,8),%R15 |
(2918) 0x53e405 VMOVSD %XMM4,(%RCX) |
(2918) 0x53e409 VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(2918) 0x53e410 VMOVSD (%R15),%XMM9 |
(2918) 0x53e415 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(2918) 0x53e41a MOV 0x30(%RSI,%RAX,8),%RCX |
(2918) 0x53e41f ADD %R12,%RCX |
(2918) 0x53e422 LEA (%RBX,%RCX,8),%R10 |
(2918) 0x53e426 VMOVSD %XMM8,(%R15) |
(2918) 0x53e42b VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(2918) 0x53e432 VMOVSD (%R10),%XMM11 |
(2918) 0x53e437 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(2918) 0x53e43c MOV 0x38(%RSI,%RAX,8),%R15 |
(2918) 0x53e441 ADD %R12,%R15 |
(2918) 0x53e444 LEA (%RBX,%R15,8),%RCX |
(2918) 0x53e448 VMOVSD %XMM10,(%R10) |
(2918) 0x53e44d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(2918) 0x53e454 VMOVSD (%RCX),%XMM13 |
(2918) 0x53e458 ADD $0x8,%RAX |
(2918) 0x53e45c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(2918) 0x53e461 VMOVSD %XMM12,(%RCX) |
(2918) 0x53e465 CMP %R8,%RAX |
(2918) 0x53e468 JNE 53e359 |
(2917) 0x53e46e ADD $0x8,%RDX |
(2917) 0x53e472 ADD $0x8,%R14 |
(2917) 0x53e476 CMP %RDX,%RDI |
(2917) 0x53e479 JNE 53e210 |
0x53e47f MOV %R9,-0x48(%RBP) |
0x53e483 CALL 411290 <GOMP_barrier@plt> |
0x53e488 MOV -0x40(%RBP),%RAX |
0x53e48c MOV -0x48(%RBP),%R14 |
0x53e490 CQTO |
0x53e492 IDIVQ -0x38(%RBP) |
0x53e496 CMP %RDX,%R14 |
0x53e499 JL 53e616 |
0x53e49f IMUL %RAX,%R14 |
0x53e4a3 ADD %R14,%RDX |
0x53e4a6 ADD %RDX,%RAX |
0x53e4a9 CMP %RAX,%RDX |
0x53e4ac JGE 53e603 |
0x53e4b2 TEST %R13,%R13 |
0x53e4b5 JLE 53e603 |
0x53e4bb MOV -0x60(%RBP),%R11 |
0x53e4bf LEA (,%RDX,8),%R12 |
0x53e4c7 ADD %R12,%R11 |
0x53e4ca ADD %RBX,%R12 |
0x53e4cd MOV -0x40(%RBP),%RBX |
0x53e4d1 SAL $0x3,%RBX |
0x53e4d5 NOPL (%RAX) |
(2916) 0x53e4d8 MOV %R13,%R9 |
(2916) 0x53e4db VMOVSD (%R11),%XMM14 |
(2916) 0x53e4e0 MOV %R12,%RDI |
(2916) 0x53e4e3 XOR %ESI,%ESI |
(2916) 0x53e4e5 AND $0x7,%R9D |
(2916) 0x53e4e9 JE 53e586 |
(2916) 0x53e4ef CMP $0x1,%R9 |
(2916) 0x53e4f3 JE 53e572 |
(2916) 0x53e4f5 CMP $0x2,%R9 |
(2916) 0x53e4f9 JE 53e563 |
(2916) 0x53e4fb CMP $0x3,%R9 |
(2916) 0x53e4ff JE 53e554 |
(2916) 0x53e501 CMP $0x4,%R9 |
(2916) 0x53e505 JE 53e545 |
(2916) 0x53e507 CMP $0x5,%R9 |
(2916) 0x53e50b JE 53e536 |
(2916) 0x53e50d CMP $0x6,%R9 |
(2916) 0x53e511 JE 53e527 |
(2916) 0x53e513 VADDSD (%R12),%XMM14,%XMM14 |
(2916) 0x53e519 MOV $0x1,%ESI |
(2916) 0x53e51e LEA (%R12,%RBX,1),%RDI |
(2916) 0x53e522 VMOVSD %XMM14,(%R11) |
(2916) 0x53e527 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e52b INC %RSI |
(2916) 0x53e52e ADD %RBX,%RDI |
(2916) 0x53e531 VMOVSD %XMM14,(%R11) |
(2916) 0x53e536 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e53a INC %RSI |
(2916) 0x53e53d ADD %RBX,%RDI |
(2916) 0x53e540 VMOVSD %XMM14,(%R11) |
(2916) 0x53e545 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e549 INC %RSI |
(2916) 0x53e54c ADD %RBX,%RDI |
(2916) 0x53e54f VMOVSD %XMM14,(%R11) |
(2916) 0x53e554 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e558 INC %RSI |
(2916) 0x53e55b ADD %RBX,%RDI |
(2916) 0x53e55e VMOVSD %XMM14,(%R11) |
(2916) 0x53e563 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e567 INC %RSI |
(2916) 0x53e56a ADD %RBX,%RDI |
(2916) 0x53e56d VMOVSD %XMM14,(%R11) |
(2916) 0x53e572 VADDSD (%RDI),%XMM14,%XMM14 |
(2916) 0x53e576 INC %RSI |
(2916) 0x53e579 ADD %RBX,%RDI |
(2916) 0x53e57c VMOVSD %XMM14,(%R11) |
(2916) 0x53e581 CMP %RSI,%R13 |
(2916) 0x53e584 JE 53e5ef |
(2915) 0x53e586 VADDSD (%RDI),%XMM14,%XMM15 |
(2915) 0x53e58a ADD %RBX,%RDI |
(2915) 0x53e58d ADD $0x8,%RSI |
(2915) 0x53e591 VMOVSD %XMM15,(%R11) |
(2915) 0x53e596 VADDSD (%RDI),%XMM15,%XMM0 |
(2915) 0x53e59a ADD %RBX,%RDI |
(2915) 0x53e59d VMOVSD %XMM0,(%R11) |
(2915) 0x53e5a2 VADDSD (%RDI),%XMM0,%XMM7 |
(2915) 0x53e5a6 ADD %RBX,%RDI |
(2915) 0x53e5a9 VMOVSD %XMM7,(%R11) |
(2915) 0x53e5ae VADDSD (%RDI),%XMM7,%XMM1 |
(2915) 0x53e5b2 ADD %RBX,%RDI |
(2915) 0x53e5b5 VMOVSD %XMM1,(%R11) |
(2915) 0x53e5ba VADDSD (%RDI),%XMM1,%XMM2 |
(2915) 0x53e5be ADD %RBX,%RDI |
(2915) 0x53e5c1 VMOVSD %XMM2,(%R11) |
(2915) 0x53e5c6 VADDSD (%RDI),%XMM2,%XMM3 |
(2915) 0x53e5ca ADD %RBX,%RDI |
(2915) 0x53e5cd VMOVSD %XMM3,(%R11) |
(2915) 0x53e5d2 VADDSD (%RDI),%XMM3,%XMM6 |
(2915) 0x53e5d6 ADD %RBX,%RDI |
(2915) 0x53e5d9 VMOVSD %XMM6,(%R11) |
(2915) 0x53e5de VADDSD (%RDI),%XMM6,%XMM14 |
(2915) 0x53e5e2 ADD %RBX,%RDI |
(2915) 0x53e5e5 VMOVSD %XMM14,(%R11) |
(2915) 0x53e5ea CMP %RSI,%R13 |
(2915) 0x53e5ed JNE 53e586 |
(2916) 0x53e5ef INC %RDX |
(2916) 0x53e5f2 ADD $0x8,%R11 |
(2916) 0x53e5f6 ADD $0x8,%R12 |
(2916) 0x53e5fa CMP %RDX,%RAX |
(2916) 0x53e5fd JNE 53e4d8 |
0x53e603 ADD $0x38,%RSP |
0x53e607 POP %RBX |
0x53e608 POP %R12 |
0x53e60a POP %R13 |
0x53e60c POP %R14 |
0x53e60e POP %R15 |
0x53e610 POP %RBP |
0x53e611 JMP 411290 |
0x53e616 INC %RAX |
0x53e619 XOR %EDX,%EDX |
0x53e61b JMP 53e49f |
0x53e620 INC %RCX |
0x53e623 XOR %EDX,%EDX |
0x53e625 JMP 53e1e4 |
0x53e62a NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 90 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 14.00-24.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 14.00-24.00 |
Overall L1 | 15.00-24.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 54d120 <hypre_GetThreadNum> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CLTQ | |||||||||||||||||
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CQTO | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JL 53e620 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 53e47f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CQTO | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 53e616 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 53e603 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 53e603 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RBX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x3,%RBX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 53e49f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 53e1e4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 90 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 14.00-24.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 14.00-24.00 |
Overall L1 | 15.00-24.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 54d120 <hypre_GetThreadNum> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CLTQ | |||||||||||||||||
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CQTO | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JL 53e620 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 53e47f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CQTO | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 53e616 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 53e603 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 53e603 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RBX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x3,%RBX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 53e49f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 53e1e4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 1.53 | 0.22 |
▼Loop 2917 - csr_matvec.c:564-567 - exec– | 1.28 | 0.14 |
○Loop 2918 - csr_matvec.c:564-567 - exec | 0.04 | 0 |
▼Loop 2916 - csr_matvec.c:577-579 - exec– | 0 | 0 |
○Loop 2915 - csr_matvec.c:577-579 - exec | 0.21 | 0.02 |