Function: hypre_CSRMatrixMatvecOutOfPlace.extracted.19 | Module: libseq_mv.so | Source: csr_matvec.c:178-206 [...] | Coverage: 0.03% |
---|
Function: hypre_CSRMatrixMatvecOutOfPlace.extracted.19 | Module: libseq_mv.so | Source: csr_matvec.c:178-206 [...] | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-587-0005/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 178 - 206 |
-------------------------------------------------------------------------------- |
178: #pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE |
179: #endif |
180: |
181: for (i = 0; i < num_rownnz; i++) |
182: { |
183: m = A_rownnz[i]; |
[...] |
191: if ( num_vectors==1 ) |
192: { |
193: tempx = 0; |
194: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
195: tempx += A_data[jj] * x_data[A_j[jj]]; |
196: y_data[m] += tempx; |
197: } |
198: else |
199: for ( j=0; j<num_vectors; ++j ) |
200: { |
201: tempx = 0; |
202: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
203: tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; |
204: y_data[ j*vecstride_y + m*idxstride_y] += tempx; |
205: } |
206: } |
0x62a0 PUSH %RBP |
0x62a1 MOV %RSP,%RBP |
0x62a4 PUSH %R15 |
0x62a6 PUSH %R14 |
0x62a8 PUSH %R13 |
0x62aa PUSH %R12 |
0x62ac PUSH %RBX |
0x62ad SUB $0xc8,%RSP |
0x62b4 MOV %R9,-0x98(%RBP) |
0x62bb MOV %R8,-0x70(%RBP) |
0x62bf MOV %RCX,-0x50(%RBP) |
0x62c3 MOV %RDX,-0x68(%RBP) |
0x62c7 MOV 0x40(%RBP),%R13 |
0x62cb MOV 0x38(%RBP),%R15 |
0x62cf MOV 0x30(%RBP),%RAX |
0x62d3 MOV %RAX,-0xe0(%RBP) |
0x62da MOV 0x28(%RBP),%RAX |
0x62de MOV %RAX,-0x80(%RBP) |
0x62e2 MOV 0x20(%RBP),%RAX |
0x62e6 MOV %RAX,-0x58(%RBP) |
0x62ea MOV 0x18(%RBP),%RAX |
0x62ee MOV %RAX,-0x48(%RBP) |
0x62f2 MOV 0x10(%RBP),%RAX |
0x62f6 MOV %RAX,-0x60(%RBP) |
0x62fa MOVL $0,-0x40(%RBP) |
0x6301 MOV (%RDI),%ESI |
0x6303 MOVQ $0,-0x78(%RBP) |
0x630b MOVQ $0x1,-0xe8(%RBP) |
0x6316 SUB $0x8,%RSP |
0x631a LEA -0xe8(%RBP),%RAX |
0x6321 LEA 0x209138(%RIP),%RDI |
0x6328 LEA -0x40(%RBP),%RCX |
0x632c LEA -0x78(%RBP),%R8 |
0x6330 LEA 0x50(%RBP),%R9 |
0x6334 MOV %ESI,-0x3c(%RBP) |
0x6337 MOV $0x22,%EDX |
0x633c PUSH $0x1 |
0x633e PUSH $0x1 |
0x6340 PUSH %RAX |
0x6341 CALL 2240 <__kmpc_for_static_init_8@plt> |
0x6346 ADD $0x20,%RSP |
0x634a MOV -0x78(%RBP),%RAX |
0x634e MOV 0x50(%RBP),%RCX |
0x6352 MOV %RAX,-0x90(%RBP) |
0x6359 SUB %RAX,%RCX |
0x635c MOV %RCX,-0x88(%RBP) |
0x6363 JAE 6388 |
0x6365 LEA 0x209114(%RIP),%RDI |
0x636c MOV -0x3c(%RBP),%ESI |
0x636f ADD $0xc8,%RSP |
0x6376 POP %RBX |
0x6377 POP %R12 |
0x6379 POP %R13 |
0x637b POP %R14 |
0x637d POP %R15 |
0x637f POP %RBP |
0x6380 VZEROUPPER |
0x6383 JMP 21c0 |
0x6388 MOV -0x58(%RBP),%RAX |
0x638c DEC %RAX |
0x638f MOV %RAX,-0xd8(%RBP) |
0x6396 XOR %ECX,%ECX |
0x6398 JMP 63ca |
0x639a NOPW (%RAX,%RAX,1) |
(102) 0x63a0 MOV -0x48(%RBP),%RAX |
(102) 0x63a4 MOV (%RAX),%RAX |
(102) 0x63a7 VADDSD (%RAX,%R12,8),%XMM0,%XMM0 |
(102) 0x63ad VMOVSD %XMM0,(%RAX,%R12,8) |
(102) 0x63b3 MOV -0xa0(%RBP),%RCX |
(102) 0x63ba LEA 0x1(%RCX),%RAX |
(102) 0x63be CMP -0x88(%RBP),%RCX |
(102) 0x63c5 MOV %RAX,%RCX |
(102) 0x63c8 JE 6365 |
(102) 0x63ca MOV -0x90(%RBP),%RAX |
(102) 0x63d1 MOV %RCX,-0xa0(%RBP) |
(102) 0x63d8 ADD %RCX,%RAX |
(102) 0x63db MOV -0x98(%RBP),%RCX |
(102) 0x63e2 MOV (%RCX,%RAX,8),%R12 |
(102) 0x63e6 CMPQ $0x1,-0x58(%RBP) |
(102) 0x63eb JNE 64a0 |
(102) 0x63f1 MOV -0x50(%RBP),%RAX |
(102) 0x63f5 MOV (%RAX),%RAX |
(102) 0x63f8 MOV (%RAX,%R12,8),%R10 |
(102) 0x63fc MOV 0x8(%RAX,%R12,8),%RAX |
(102) 0x6401 VXORPD %XMM0,%XMM0,%XMM0 |
(102) 0x6405 MOV %RAX,%RDX |
(102) 0x6408 SUB %R10,%RDX |
(102) 0x640b JLE 63a0 |
(102) 0x640d MOV -0x68(%RBP),%RCX |
(102) 0x6411 MOV (%RCX),%RCX |
(102) 0x6414 MOV -0x60(%RBP),%RSI |
(102) 0x6418 MOV (%RSI),%RSI |
(102) 0x641b MOV -0x70(%RBP),%RDI |
(102) 0x641f MOV (%RDI),%R8 |
(102) 0x6422 MOV %RDX,%R9 |
(102) 0x6425 AND $-0x2,%R9 |
(102) 0x6429 JE 666b |
(102) 0x642f MOV %R12,-0x30(%RBP) |
(102) 0x6433 LEA -0x1(%R9),%RDI |
(102) 0x6437 LEA (%RCX,%R10,8),%R11 |
(102) 0x643b MOV %R10,-0x38(%RBP) |
(102) 0x643f LEA (%R8,%R10,8),%RBX |
(102) 0x6443 VXORPD %XMM0,%XMM0,%XMM0 |
(102) 0x6447 XOR %R14D,%R14D |
(102) 0x644a NOPW (%RAX,%RAX,1) |
(107) 0x6450 MOV (%RBX,%R14,8),%R12 |
(107) 0x6454 MOV 0x8(%RBX,%R14,8),%R10 |
(107) 0x6459 VMOVSD (%RSI,%R12,8),%XMM1 |
(107) 0x645f VMOVHPD (%RSI,%R10,8),%XMM1,%XMM1 |
(107) 0x6465 VFMADD231PD (%R11,%R14,8),%XMM1,%XMM0 |
(107) 0x646b ADD $0x2,%R14 |
(107) 0x646f CMP %RDI,%R14 |
(107) 0x6472 JBE 6450 |
(102) 0x6474 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
(102) 0x6479 VADDSD %XMM1,%XMM0,%XMM0 |
(102) 0x647d CMP %R9,%RDX |
(102) 0x6480 MOV -0x30(%RBP),%R12 |
(102) 0x6484 MOV -0x38(%RBP),%R10 |
(102) 0x6488 JE 63a0 |
(102) 0x648e JMP 666e |
0x6493 NOPW %CS:(%RAX,%RAX,1) |
(102) 0x64a0 JL 63b3 |
(102) 0x64a6 MOV -0x50(%RBP),%RAX |
(102) 0x64aa MOV (%RAX),%RAX |
(102) 0x64ad MOV (%RAX,%R12,8),%RDX |
(102) 0x64b1 MOV 0x8(%RAX,%R12,8),%RSI |
(102) 0x64b6 MOV -0x48(%RBP),%RAX |
(102) 0x64ba MOV (%RAX),%RDI |
(102) 0x64bd MOV %RSI,%R9 |
(102) 0x64c0 SUB %RDX,%R9 |
(102) 0x64c3 IMUL -0x80(%RBP),%R12 |
(102) 0x64c8 LEA (,%RDX,8),%RAX |
(102) 0x64d0 MOV %RAX,-0xa8(%RBP) |
(102) 0x64d7 XOR %R8D,%R8D |
(102) 0x64da MOV %R12,-0x30(%RBP) |
(102) 0x64de MOV %RDX,-0xc8(%RBP) |
(102) 0x64e5 MOV %RSI,-0xc0(%RBP) |
(102) 0x64ec MOV %RDI,-0xb8(%RBP) |
(102) 0x64f3 MOV %R9,-0xb0(%RBP) |
(102) 0x64fa JMP 652c |
0x64fc NOPL (%RAX) |
(103) 0x6500 MOV %R8,%RAX |
(103) 0x6503 IMUL -0xe0(%RBP),%RAX |
(103) 0x650b ADD %R12,%RAX |
(103) 0x650e VADDSD (%RDI,%RAX,8),%XMM0,%XMM0 |
(103) 0x6513 VMOVSD %XMM0,(%RDI,%RAX,8) |
(103) 0x6518 LEA 0x1(%R8),%RAX |
(103) 0x651c CMP -0xd8(%RBP),%R8 |
(103) 0x6523 MOV %RAX,%R8 |
(103) 0x6526 JE 63b3 |
(103) 0x652c VXORPD %XMM0,%XMM0,%XMM0 |
(103) 0x6530 CMP %RDX,%RSI |
(103) 0x6533 JLE 6500 |
(103) 0x6535 MOV -0x68(%RBP),%RAX |
(103) 0x6539 MOV (%RAX),%RBX |
(103) 0x653c MOV -0x60(%RBP),%RAX |
(103) 0x6540 MOV (%RAX),%R14 |
(103) 0x6543 MOV -0x70(%RBP),%RAX |
(103) 0x6547 MOV (%RAX),%R10 |
(103) 0x654a MOV %R9,%R11 |
(103) 0x654d AND $-0x4,%R11 |
(103) 0x6551 JE 6632 |
(103) 0x6557 LEA -0x1(%R11),%RDX |
(103) 0x655b MOV -0xa8(%RBP),%RCX |
(103) 0x6562 MOV %RBX,-0x38(%RBP) |
(103) 0x6566 LEA (%RBX,%RCX,1),%RAX |
(103) 0x656a MOV %R10,-0xd0(%RBP) |
(103) 0x6571 LEA (%R10,%RCX,1),%RDI |
(103) 0x6575 VXORPD %XMM0,%XMM0,%XMM0 |
(103) 0x6579 XOR %EBX,%EBX |
(103) 0x657b NOPL (%RAX,%RAX,1) |
(105) 0x6580 MOV %R8,%RCX |
(105) 0x6583 IMUL %R13,%RCX |
(105) 0x6587 MOV (%RDI,%RBX,8),%R10 |
(105) 0x658b IMUL %R15,%R10 |
(105) 0x658f ADD %RCX,%R10 |
(105) 0x6592 MOV %R13,%RSI |
(105) 0x6595 MOV 0x8(%RDI,%RBX,8),%R13 |
(105) 0x659a IMUL %R15,%R13 |
(105) 0x659e ADD %RCX,%R13 |
(105) 0x65a1 MOV 0x10(%RDI,%RBX,8),%R9 |
(105) 0x65a6 IMUL %R15,%R9 |
(105) 0x65aa ADD %RCX,%R9 |
(105) 0x65ad MOV 0x18(%RDI,%RBX,8),%R12 |
(105) 0x65b2 IMUL %R15,%R12 |
(105) 0x65b6 ADD %RCX,%R12 |
(105) 0x65b9 VMOVSD (%R14,%R9,8),%XMM1 |
(105) 0x65bf VMOVHPD (%R14,%R12,8),%XMM1,%XMM1 |
(105) 0x65c5 VMOVSD (%R14,%R10,8),%XMM2 |
(105) 0x65cb VMOVHPD (%R14,%R13,8),%XMM2,%XMM2 |
(105) 0x65d1 MOV %RSI,%R13 |
(105) 0x65d4 VINSERTF128 $0x1,%XMM1,%YMM2,%YMM1 |
(105) 0x65da VFMADD231PD (%RAX,%RBX,8),%YMM1,%YMM0 |
(105) 0x65e0 ADD $0x4,%RBX |
(105) 0x65e4 CMP %RDX,%RBX |
(105) 0x65e7 JBE 6580 |
(103) 0x65e9 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
(103) 0x65ef VADDPD %XMM1,%XMM0,%XMM0 |
(103) 0x65f3 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
(103) 0x65f8 VADDSD %XMM1,%XMM0,%XMM0 |
(103) 0x65fc MOV -0xb0(%RBP),%R9 |
(103) 0x6603 CMP %R11,%R9 |
(103) 0x6606 MOV -0x30(%RBP),%R12 |
(103) 0x660a MOV -0xc8(%RBP),%RDX |
(103) 0x6611 MOV -0xc0(%RBP),%RSI |
(103) 0x6618 MOV -0xb8(%RBP),%RDI |
(103) 0x661f MOV -0x38(%RBP),%RBX |
(103) 0x6623 MOV -0xd0(%RBP),%R10 |
(103) 0x662a JE 6500 |
(103) 0x6630 JMP 6635 |
(103) 0x6632 XOR %R11D,%R11D |
(103) 0x6635 ADD %RDX,%R11 |
(103) 0x6638 NOPL (%RAX,%RAX,1) |
(104) 0x6640 MOV (%R10,%R11,8),%RAX |
(104) 0x6644 IMUL %R15,%RAX |
(104) 0x6648 MOV %R8,%RCX |
(104) 0x664b IMUL %R13,%RCX |
(104) 0x664f ADD %RAX,%RCX |
(104) 0x6652 VMOVSD (%R14,%RCX,8),%XMM1 |
(104) 0x6658 VFMADD231SD (%RBX,%R11,8),%XMM1,%XMM0 |
(104) 0x665e INC %R11 |
(104) 0x6661 CMP %R11,%RSI |
(104) 0x6664 JNE 6640 |
(103) 0x6666 JMP 6500 |
(102) 0x666b XOR %R9D,%R9D |
(102) 0x666e ADD %R10,%R9 |
(102) 0x6671 NOPW %CS:(%RAX,%RAX,1) |
(106) 0x6680 MOV (%R8,%R9,8),%RDX |
(106) 0x6684 VMOVSD (%RSI,%RDX,8),%XMM1 |
(106) 0x6689 VFMADD231SD (%RCX,%R9,8),%XMM1,%XMM0 |
(106) 0x668f INC %R9 |
(106) 0x6692 CMP %R9,%RAX |
(106) 0x6695 JNE 6680 |
(102) 0x6697 JMP 63a0 |
0x669c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | csr_matvec.c:178-206 |
Module | libseq_mv.so |
nb instructions | 67 |
nb uops | 69 |
loop length | 277 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.10 | 1.00 | 6.00 | 6.00 | 13.00 | 1.00 | 0.90 | 13.00 | 13.00 | 13.00 | 1.00 | 6.00 |
cycles | 1.10 | 1.00 | 6.00 | 6.00 | 13.00 | 1.00 | 0.90 | 13.00 | 13.00 | 13.00 | 1.00 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.09 |
Stall cycles | 1.46-1.46 |
RS full (events) | 5.26-5.25 |
Front-end | 11.50 |
Dispatch | 13.00 |
Overall L1 | 13.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xc8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0xe8(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x209138(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x78(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ESI,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 2240 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JAE 6388 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xe8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x209114(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x3c(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0xc8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 21c0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 63ca <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x12a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | csr_matvec.c:178-206 |
Module | libseq_mv.so |
nb instructions | 67 |
nb uops | 69 |
loop length | 277 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.10 | 1.00 | 6.00 | 6.00 | 13.00 | 1.00 | 0.90 | 13.00 | 13.00 | 13.00 | 1.00 | 6.00 |
cycles | 1.10 | 1.00 | 6.00 | 6.00 | 13.00 | 1.00 | 0.90 | 13.00 | 13.00 | 13.00 | 1.00 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.09 |
Stall cycles | 1.46-1.46 |
RS full (events) | 5.26-5.25 |
Front-end | 11.50 |
Dispatch | 13.00 |
Overall L1 | 13.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 11% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xc8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0xe8(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x209138(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x78(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ESI,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 2240 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JAE 6388 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xe8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x209114(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x3c(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0xc8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 21c0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 63ca <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x12a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecOutOfPlace.extracted.19– | 0.03 | 0.01 |
▼Loop 102 - csr_matvec.c:178-206 - libseq_mv.so– | 0 | 0 |
○Loop 107 - csr_matvec.c:194-195 - libseq_mv.so | 0.02 | 0.01 |
○Loop 106 - csr_matvec.c:194-195 - libseq_mv.so | 0 | 0 |
▼Loop 103 - csr_matvec.c:178-204 - libseq_mv.so– | 0 | 0 |
○Loop 105 - csr_matvec.c:202-203 - libseq_mv.so | 0 | 0 |
○Loop 104 - csr_matvec.c:202-203 - libseq_mv.so | 0 | 0 |