Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: libIJ_mv.so | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.42% |
---|
Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: libIJ_mv.so | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.42% |
---|
/scratch_na/users/xoserete/qaas_runs/171-587-0005/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3240 - 3484 |
-------------------------------------------------------------------------------- |
3240: #pragma omp parallel |
[...] |
3256: num_threads = hypre_NumActiveThreads(); |
3257: my_thread_num = hypre_GetThreadNum(); |
3258: |
3259: len = nrows/num_threads; |
3260: rest = nrows - len*num_threads; |
3261: |
3262: if (my_thread_num < rest) |
3263: { |
3264: ns = my_thread_num*(len+1); |
3265: ne = (my_thread_num+1)*(len+1); |
3266: } |
3267: else |
3268: { |
3269: ns = my_thread_num*len+rest; |
3270: ne = (my_thread_num+1)*len+rest; |
3271: } |
3272: |
3273: value_start[my_thread_num] = 0; |
3274: for (ii=ns; ii < ne; ii++) |
3275: value_start[my_thread_num] += ncols[ii]; |
3276: |
3277: #ifdef HYPRE_USING_OPENMP |
3278: #pragma omp barrier |
3279: #endif |
3280: if (my_thread_num == 0) |
3281: { |
3282: for (i=0; i < max_num_threads; i++) |
3283: value_start[i+1] += value_start[i]; |
[...] |
3289: if (my_thread_num) |
3290: indx = value_start[my_thread_num-1]; |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
3301: { |
3302: local_j = aux_j[row_local]; |
3303: local_data = aux_data[row_local]; |
3304: space = row_space[row_local]; |
3305: old_size = row_length[row_local]; |
3306: size = space - old_size; |
3307: if (size < n) |
3308: { |
3309: size = n - size; |
3310: tmp_j = hypre_CTAlloc(HYPRE_Int,size); |
3311: tmp_data = hypre_CTAlloc(HYPRE_Complex,size); |
3312: } |
3313: tmp_indx = 0; |
3314: not_found = 1; |
3315: size = old_size; |
3316: for (i=0; i < n; i++) |
3317: { |
3318: for (j=0; j < old_size; j++) |
3319: { |
3320: if (local_j[j] == cols[indx]) |
3321: { |
3322: local_data[j] = values[indx]; |
[...] |
3329: if (size < space) |
3330: { |
3331: local_j[size] = cols[indx]; |
3332: local_data[size++] = values[indx]; |
3333: } |
3334: else |
3335: { |
3336: tmp_j[tmp_indx] = cols[indx]; |
3337: tmp_data[tmp_indx++] = values[indx]; |
[...] |
3344: row_length[row_local] = size+tmp_indx; |
3345: |
3346: if (tmp_indx) |
3347: { |
3348: aux_j[row_local] = hypre_TReAlloc(aux_j[row_local],HYPRE_Int, |
3349: size+tmp_indx); |
3350: aux_data[row_local] = hypre_TReAlloc(aux_data[row_local], |
3351: HYPRE_Complex,size+tmp_indx); |
3352: row_space[row_local] = size+tmp_indx; |
3353: local_j = aux_j[row_local]; |
[...] |
3359: for (i=0; i < tmp_indx; i++) |
3360: { |
3361: local_j[cnt] = tmp_j[i]; |
3362: local_data[cnt++] = tmp_data[i]; |
3363: } |
3364: |
3365: if (tmp_j) |
3366: { |
3367: hypre_TFree(tmp_j); |
3368: hypre_TFree(tmp_data); |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
3378: cnt_diag = diag_indx; |
3379: cnt_offd = offd_indx; |
3380: diag_space = diag_i[row_local+1]; |
3381: offd_space = offd_i[row_local+1]; |
3382: not_found = 1; |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
3391: { |
3392: offd_data[j] = values[indx]; |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
3411: if (print_level) |
3412: hypre_printf("Error in row %d ! Too many elements!\n", |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
3425: { |
3426: diag_data[j] = values[indx]; |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
3446: hypre_printf("Error in row %d ! Too many elements !\n", |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
3468: { |
3469: col_indx = 0; |
3470: for (i=0; i < off_proc_i_indx; i=i+2) |
3471: { |
3472: row_len = off_proc_i[i+1]; |
3473: if (off_proc_i[i] == row) |
3474: { |
3475: for (j=0; j < n; j++) |
3476: { |
3477: cnt1 = col_indx; |
3478: for (k=0; k < row_len; k++) |
3479: { |
3480: if (off_proc_j[cnt1] == cols[j]) |
3481: { |
3482: off_proc_j[cnt1++] = -1; |
3483: /*cancel_indx++;*/ |
3484: offproc_cnt[my_thread_num]++; |
0xe060 PUSH %RBP |
0xe061 MOV %RSP,%RBP |
0xe064 PUSH %R15 |
0xe066 PUSH %R14 |
0xe068 PUSH %R13 |
0xe06a PUSH %R12 |
0xe06c PUSH %RBX |
0xe06d SUB $0x88,%RSP |
0xe074 MOV %R9,%RBX |
0xe077 MOV %R8,-0xa0(%RBP) |
0xe07e MOV %RCX,-0x38(%RBP) |
0xe082 MOV %RDX,%R15 |
0xe085 MOV %RDI,%R14 |
0xe088 CALL 2d90 <hypre_NumActiveThreads@plt> |
0xe08d MOV %RAX,%R13 |
0xe090 CALL 2b50 <hypre_GetThreadNum@plt> |
0xe095 MOV %RAX,%R12 |
0xe098 MOV %R13,%RAX |
0xe09b OR %R15,%RAX |
0xe09e SHR $0x20,%RAX |
0xe0a2 JE e0ae |
0xe0a4 MOV %R15,%RAX |
0xe0a7 CQTO |
0xe0a9 IDIV %R13 |
0xe0ac JMP e0b6 |
0xe0ae MOV %R15D,%EAX |
0xe0b1 XOR %EDX,%EDX |
0xe0b3 DIV %R13D |
0xe0b6 MOV 0xb8(%RBP),%R15 |
0xe0bd LEA 0x1(%R12),%RCX |
0xe0c2 CMP %RDX,%R12 |
0xe0c5 MOV -0x38(%RBP),%R8 |
0xe0c9 JGE e0dc |
0xe0cb LEA 0x1(%RAX),%RDI |
0xe0cf MOV %RDI,%RSI |
0xe0d2 IMUL %R12,%RSI |
0xe0d6 IMUL %RCX,%RDI |
0xe0da JMP e0f0 |
0xe0dc MOV %RAX,%RSI |
0xe0df IMUL %R12,%RSI |
0xe0e3 ADD %RDX,%RSI |
0xe0e6 IMUL %RAX,%RCX |
0xe0ea ADD %RDX,%RCX |
0xe0ed MOV %RCX,%RDI |
0xe0f0 MOVQ $0,(%R15,%R12,8) |
0xe0f8 MOV %RSI,%R13 |
0xe0fb CMP %RDI,%RSI |
0xe0fe MOV %RDI,-0x30(%RBP) |
0xe102 JGE e1df |
0xe108 LEA (%R15,%R12,8),%RCX |
0xe10c CMP %RDX,%R12 |
0xe10f CMOVL %R12,%RDX |
0xe113 LEA (%R8,%RDI,8),%RDI |
0xe117 ADD $-0x8,%RDI |
0xe11b IMUL %R12,%RAX |
0xe11f LEA (%RDX,%RAX,1),%RSI |
0xe123 CMP %RCX,%RDI |
0xe126 JB e154 |
0xe128 LEA (%R8,%RSI,8),%RDI |
0xe12c CMP %RDI,%RCX |
0xe12f JB e154 |
0xe131 XOR %EAX,%EAX |
0xe133 MOV %R13,%RDX |
0xe136 MOV -0x30(%RBP),%RSI |
0xe13a NOPW (%RAX,%RAX,1) |
(215) 0xe140 ADD (%R8,%RDX,8),%RAX |
(215) 0xe144 MOV %RAX,(%RCX) |
(215) 0xe147 INC %RDX |
(215) 0xe14a CMP %RDX,%RSI |
(215) 0xe14d JNE e140 |
0xe14f JMP e1df |
0xe154 MOV -0x30(%RBP),%R8 |
0xe158 SUB %RSI,%R8 |
0xe15b MOV %R8,%RSI |
0xe15e AND $-0x4,%RSI |
0xe162 JE e1b6 |
0xe164 LEA -0x1(%RSI),%RDI |
0xe168 MOV -0x38(%RBP),%R9 |
0xe16c LEA (%R9,%R13,8),%R9 |
0xe170 VPXOR %XMM0,%XMM0,%XMM0 |
0xe174 XOR %R10D,%R10D |
0xe177 NOPW (%RAX,%RAX,1) |
(214) 0xe180 VPADDQ (%R9,%R10,8),%YMM0,%YMM0 |
(214) 0xe186 ADD $0x4,%R10 |
(214) 0xe18a CMP %RDI,%R10 |
(214) 0xe18d JBE e180 |
0xe18f VEXTRACTI128 $0x1,%YMM0,%XMM1 |
0xe195 VPADDQ %XMM1,%XMM0,%XMM0 |
0xe199 VPSHUFD $-0x12,%XMM0,%XMM1 |
0xe19e VPADDQ %XMM1,%XMM0,%XMM0 |
0xe1a2 VMOVQ %XMM0,%RDI |
0xe1a7 CMP %RSI,%R8 |
0xe1aa MOV -0x38(%RBP),%R8 |
0xe1ae MOV -0x30(%RBP),%R9 |
0xe1b2 JNE e1c2 |
0xe1b4 JMP e1dc |
0xe1b6 XOR %ESI,%ESI |
0xe1b8 XOR %EDI,%EDI |
0xe1ba MOV -0x38(%RBP),%R8 |
0xe1be MOV -0x30(%RBP),%R9 |
0xe1c2 ADD %RDX,%RSI |
0xe1c5 ADD %RAX,%RSI |
0xe1c8 NOPL (%RAX,%RAX,1) |
(213) 0xe1d0 ADD (%R8,%RSI,8),%RDI |
(213) 0xe1d4 INC %RSI |
(213) 0xe1d7 CMP %RSI,%R9 |
(213) 0xe1da JNE e1d0 |
0xe1dc MOV %RDI,(%RCX) |
0xe1df MOV (%R14),%ESI |
0xe1e2 LEA 0x2090a7(%RIP),%RDI |
0xe1e9 VZEROUPPER |
0xe1ec CALL 2e10 <__kmpc_barrier@plt> |
0xe1f1 TEST %R12,%R12 |
0xe1f4 JNE e296 |
0xe1fa MOV 0xc8(%RBP),%RAX |
0xe201 TEST %RAX,%RAX |
0xe204 JLE e296 |
0xe20a MOV (%R15),%RCX |
0xe20d CMP $0x8,%RAX |
0xe211 JB e267 |
0xe213 MOV %RAX,%RDX |
0xe216 SHR $0x3,%RDX |
0xe21a LEA 0x40(%R15),%RSI |
0xe21e XCHG %AX,%AX |
(212) 0xe220 ADD -0x38(%RSI),%RCX |
(212) 0xe224 MOV %RCX,-0x38(%RSI) |
(212) 0xe228 ADD -0x30(%RSI),%RCX |
(212) 0xe22c MOV %RCX,-0x30(%RSI) |
(212) 0xe230 ADD -0x28(%RSI),%RCX |
(212) 0xe234 MOV %RCX,-0x28(%RSI) |
(212) 0xe238 ADD -0x20(%RSI),%RCX |
(212) 0xe23c MOV %RCX,-0x20(%RSI) |
(212) 0xe240 ADD -0x18(%RSI),%RCX |
(212) 0xe244 MOV %RCX,-0x18(%RSI) |
(212) 0xe248 ADD -0x10(%RSI),%RCX |
(212) 0xe24c MOV %RCX,-0x10(%RSI) |
(212) 0xe250 ADD -0x8(%RSI),%RCX |
(212) 0xe254 MOV %RCX,-0x8(%RSI) |
(212) 0xe258 ADD (%RSI),%RCX |
(212) 0xe25b MOV %RCX,(%RSI) |
(212) 0xe25e ADD $0x40,%RSI |
(212) 0xe262 DEC %RDX |
(212) 0xe265 JNE e220 |
0xe267 MOV %RAX,%RDX |
0xe26a AND $-0x8,%RDX |
0xe26e CMP %RAX,%RDX |
0xe271 JE e296 |
0xe273 NOPW %CS:(%RAX,%RAX,1) |
(211) 0xe280 ADD 0x8(%R15,%RDX,8),%RCX |
(211) 0xe285 MOV %RCX,0x8(%R15,%RDX,8) |
(211) 0xe28a LEA 0x1(%RDX),%RSI |
(211) 0xe28e MOV %RSI,%RDX |
(211) 0xe291 CMP %RSI,%RAX |
(211) 0xe294 JNE e280 |
0xe296 MOV (%R14),%ESI |
0xe299 LEA 0x209010(%RIP),%RDI |
0xe2a0 CALL 2e10 <__kmpc_barrier@plt> |
0xe2a5 TEST %R12,%R12 |
0xe2a8 JE e2b1 |
0xe2aa MOV -0x8(%R15,%R12,8),%RCX |
0xe2af JMP e2b3 |
0xe2b1 XOR %ECX,%ECX |
0xe2b3 MOV -0x38(%RBP),%RDX |
0xe2b7 MOV %R13,%RSI |
0xe2ba MOV -0x30(%RBP),%RDI |
0xe2be CMP %RDI,%R13 |
0xe2c1 JGE ee9b |
0xe2c7 MOV 0xb0(%RBP),%RAX |
0xe2ce MOV 0x98(%RBP),%R8 |
0xe2d5 LEA (%RAX,%R12,8),%R15 |
0xe2d9 LEA -0x1(%R8),%RAX |
0xe2dd SHR $0x1,%RAX |
0xe2e0 MOV %RAX,-0xa8(%RBP) |
0xe2e7 VPCMPEQD %YMM7,%YMM7,%YMM7 |
0xe2eb MOV 0x20(%RBP),%R8 |
0xe2ef MOVQ $0,-0x60(%RBP) |
0xe2f7 JMP e320 |
0xe2f9 NOPL (%RAX) |
(189) 0xe300 MOV -0x38(%RBP),%RDX |
(189) 0xe304 MOV -0x48(%RBP),%RSI |
(189) 0xe308 MOV -0x30(%RBP),%RDI |
(189) 0xe30c MOV -0x78(%RBP),%RCX |
(189) 0xe310 MOV 0x20(%RBP),%R8 |
(189) 0xe314 INC %RSI |
(189) 0xe317 CMP %RDI,%RSI |
(189) 0xe31a JGE ee9b |
(189) 0xe320 MOV -0xa0(%RBP),%RAX |
(189) 0xe327 MOV (%RAX,%RSI,8),%R9 |
(189) 0xe32b MOV (%RDX,%RSI,8),%R12 |
(189) 0xe32f MOV %R9,-0x70(%RBP) |
(189) 0xe333 SUB (%R8),%R9 |
(189) 0xe336 MOV %R12,-0x40(%RBP) |
(189) 0xe33a JL e3d0 |
(189) 0xe340 MOV -0x70(%RBP),%RAX |
(189) 0xe344 CMP 0x8(%R8),%RAX |
(189) 0xe348 JGE e3d0 |
(189) 0xe34e CMPQ $0,0x58(%RBP) |
(189) 0xe353 MOV %R9,-0x68(%RBP) |
(189) 0xe357 JE e5cf |
(189) 0xe35d MOV 0x38(%RBP),%RAX |
(189) 0xe361 MOV %R12,%R13 |
(189) 0xe364 MOV (%RAX,%R9,8),%R12 |
(189) 0xe368 MOV 0x40(%RBP),%RAX |
(189) 0xe36c MOV (%RAX,%R9,8),%RAX |
(189) 0xe370 MOV %RAX,-0x50(%RBP) |
(189) 0xe374 MOV 0x50(%RBP),%RAX |
(189) 0xe378 MOV (%RAX,%R9,8),%RDX |
(189) 0xe37c MOV 0x48(%RBP),%RAX |
(189) 0xe380 MOV (%RAX,%R9,8),%R14 |
(189) 0xe384 MOV %RDX,-0x88(%RBP) |
(189) 0xe38b MOV %RDX,%RAX |
(189) 0xe38e SUB %R14,%RAX |
(189) 0xe391 SUB %RAX,%R13 |
(189) 0xe394 MOV %RSI,-0x48(%RBP) |
(189) 0xe398 MOV %RCX,-0x78(%RBP) |
(189) 0xe39c JLE e638 |
(189) 0xe3a2 MOV $0x8,%ESI |
(189) 0xe3a7 MOV %R13,%RDI |
(189) 0xe3aa VZEROUPPER |
(189) 0xe3ad CALL 2de0 <hypre_CAlloc@plt> |
(189) 0xe3b2 MOV %RAX,-0x58(%RBP) |
(189) 0xe3b6 MOV $0x8,%ESI |
(189) 0xe3bb MOV %R13,%RDI |
(189) 0xe3be CALL 2de0 <hypre_CAlloc@plt> |
(189) 0xe3c3 MOV -0x58(%RBP),%R11 |
(189) 0xe3c7 MOV %RAX,-0x60(%RBP) |
(189) 0xe3cb JMP e63b |
(189) 0xe3d0 ADD %R12,%RCX |
(189) 0xe3d3 MOV 0x18(%RBP),%RAX |
(189) 0xe3d7 CMPQ $0,(%RAX) |
(189) 0xe3db JE e314 |
(189) 0xe3e1 CMPQ $0,0x98(%RBP) |
(189) 0xe3e9 JLE e314 |
(189) 0xe3ef MOV %RCX,-0x78(%RBP) |
(189) 0xe3f3 MOV %RSI,-0x48(%RBP) |
(189) 0xe3f7 LEA -0x1(%R12),%RAX |
(189) 0xe3fc LEA (%RBX,%R12,8),%RCX |
(189) 0xe400 ADD $-0x8,%RCX |
(189) 0xe404 XOR %ESI,%ESI |
(189) 0xe406 XOR %EDX,%EDX |
(189) 0xe408 JMP e424 |
0xe40a NOPW (%RAX,%RAX,1) |
(190) 0xe410 LEA 0x1(%RDX),%RDI |
(190) 0xe414 CMP -0xa8(%RBP),%RDX |
(190) 0xe41b MOV %RDI,%RDX |
(190) 0xe41e JE e300 |
(190) 0xe424 MOV %RSI,%R8 |
(190) 0xe427 MOV %RDX,%R9 |
(190) 0xe42a SAL $0x4,%R9 |
(190) 0xe42e MOV 0xa0(%RBP),%R10 |
(190) 0xe435 MOV 0x8(%R10,%R9,1),%RDI |
(190) 0xe43a ADD %RDI,%RSI |
(190) 0xe43d MOV -0x70(%RBP),%R11 |
(190) 0xe441 CMP %R11,(%R10,%R9,1) |
(190) 0xe445 JNE e410 |
(190) 0xe447 TEST %R12,%R12 |
(190) 0xe44a JLE e410 |
(190) 0xe44c MOV 0xa8(%RBP),%R10 |
(190) 0xe453 LEA (%R10,%RSI,8),%R9 |
(190) 0xe457 ADD $-0x8,%R9 |
(190) 0xe45b CMP %R15,%R9 |
(190) 0xe45e SETAE %R13B |
(190) 0xe462 LEA (%R10,%R8,8),%R8 |
(190) 0xe466 CMP %R8,%R15 |
(190) 0xe469 SETAE %R12B |
(190) 0xe46d CMP %RBX,%R9 |
(190) 0xe470 SETB %R10B |
(190) 0xe474 CMP %R8,%RCX |
(190) 0xe477 SETB %R14B |
(190) 0xe47b CMP %R15,%RCX |
(190) 0xe47e SETB %R9B |
(190) 0xe482 CMP %RBX,%R15 |
(190) 0xe485 SETB %R11B |
(190) 0xe489 TEST %R12B,%R13B |
(190) 0xe48c JNE e580 |
(190) 0xe492 OR %R14B,%R10B |
(190) 0xe495 JE e580 |
(190) 0xe49b OR %R11B,%R9B |
(190) 0xe49e JE e580 |
(190) 0xe4a4 XOR %R9D,%R9D |
(190) 0xe4a7 MOV -0x40(%RBP),%R12 |
(190) 0xe4ab JMP e4c0 |
0xe4ad NOPL (%RAX) |
(193) 0xe4b0 LEA 0x1(%R9),%R10 |
(193) 0xe4b4 CMP %RAX,%R9 |
(193) 0xe4b7 MOV %R10,%R9 |
(193) 0xe4ba JE e410 |
(193) 0xe4c0 TEST %RDI,%RDI |
(193) 0xe4c3 JLE e4b0 |
(193) 0xe4c5 MOV (%RBX,%R9,8),%R10 |
(193) 0xe4c9 MOV %RDI,%R11 |
(193) 0xe4cc AND $-0x4,%R11 |
(193) 0xe4d0 JE e550 |
(193) 0xe4d2 LEA -0x1(%R11),%R14 |
(193) 0xe4d6 VMOVQ %R10,%XMM0 |
(193) 0xe4db VPBROADCASTQ %XMM0,%YMM1 |
(193) 0xe4e0 VPXOR %XMM0,%XMM0,%XMM0 |
(193) 0xe4e4 XOR %R13D,%R13D |
(193) 0xe4e7 NOPW (%RAX,%RAX,1) |
(196) 0xe4f0 VPCMPEQQ (%R8,%R13,8),%YMM1,%YMM2 |
(196) 0xe4f6 VPMASKMOVQ %YMM7,%YMM2,(%R8,%R13,8) |
(196) 0xe4fc VPSUBQ %YMM2,%YMM0,%YMM0 |
(196) 0xe500 ADD $0x4,%R13 |
(196) 0xe504 CMP %R14,%R13 |
(196) 0xe507 JLE e4f0 |
(193) 0xe509 VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(193) 0xe50f VPADDQ %XMM1,%XMM0,%XMM0 |
(193) 0xe513 VPSHUFD $-0x12,%XMM0,%XMM1 |
(193) 0xe518 VPADDQ %XMM1,%XMM0,%XMM0 |
(193) 0xe51c VMOVQ %XMM0,%R14 |
(193) 0xe521 CMP %R11,%RDI |
(193) 0xe524 JNE e560 |
(193) 0xe526 NOPW %CS:(%RAX,%RAX,1) |
(193) 0xe530 TEST %R14,%R14 |
(193) 0xe533 JE e4b0 |
(193) 0xe539 ADD %R14,(%R15) |
(193) 0xe53c JMP e4b0 |
0xe541 NOPW %CS:(%RAX,%RAX,1) |
(193) 0xe550 XOR %R14D,%R14D |
(193) 0xe553 XOR %R11D,%R11D |
(193) 0xe556 CMP %R10,(%R8,%R11,8) |
(193) 0xe55a JE e566 |
(193) 0xe55c JMP e571 |
0xe55e XCHG %AX,%AX |
(194) 0xe560 CMP %R10,(%R8,%R11,8) |
(194) 0xe564 JNE e571 |
(195) 0xe566 MOVQ $-0x1,(%R8,%R11,8) |
(195) 0xe56e INC %R14 |
(194) 0xe571 INC %R11 |
(194) 0xe574 CMP %R11,%RDI |
(194) 0xe577 JNE e560 |
(193) 0xe579 JMP e530 |
0xe57b NOPL (%RAX,%RAX,1) |
(190) 0xe580 XOR %R9D,%R9D |
(190) 0xe583 MOV -0x40(%RBP),%R12 |
(190) 0xe587 JMP e5a0 |
0xe589 NOPL (%RAX) |
(191) 0xe590 LEA 0x1(%R9),%R10 |
(191) 0xe594 CMP %RAX,%R9 |
(191) 0xe597 MOV %R10,%R9 |
(191) 0xe59a JE e410 |
(191) 0xe5a0 TEST %RDI,%RDI |
(191) 0xe5a3 JLE e590 |
(191) 0xe5a5 XOR %R10D,%R10D |
(191) 0xe5a8 JMP e5b8 |
0xe5aa NOPW (%RAX,%RAX,1) |
(192) 0xe5b0 INC %R10 |
(192) 0xe5b3 CMP %R10,%RDI |
(192) 0xe5b6 JE e590 |
(192) 0xe5b8 MOV (%R8,%R10,8),%R11 |
(192) 0xe5bc CMP (%RBX,%R9,8),%R11 |
(192) 0xe5c0 JNE e5b0 |
(192) 0xe5c2 MOVQ $-0x1,(%R8,%R10,8) |
(192) 0xe5ca INCQ (%R15) |
(192) 0xe5cd JMP e5b0 |
(189) 0xe5cf MOV %RCX,%R13 |
(189) 0xe5d2 MOV 0x18(%RBP),%RAX |
(189) 0xe5d6 MOV (%RAX),%RAX |
(189) 0xe5d9 MOV 0x38(%RAX),%RCX |
(189) 0xe5dd MOV 0x40(%RAX),%RAX |
(189) 0xe5e1 MOV (%RAX,%R9,8),%R10 |
(189) 0xe5e5 MOV (%RCX,%R9,8),%R14 |
(189) 0xe5e9 TEST %R12,%R12 |
(189) 0xe5ec JLE edd9 |
(189) 0xe5f2 MOV %RSI,-0x48(%RBP) |
(189) 0xe5f6 MOV 0x60(%RBP),%RAX |
(189) 0xe5fa MOV 0x8(%RAX,%R9,8),%RAX |
(189) 0xe5ff MOV %RAX,-0x58(%RBP) |
(189) 0xe603 MOV 0x78(%RBP),%RAX |
(189) 0xe607 MOV 0x8(%RAX,%R9,8),%RAX |
(189) 0xe60c MOV %RAX,-0x90(%RBP) |
(189) 0xe613 MOV %R13,%RCX |
(189) 0xe616 LEA (%R12,%R13,1),%RAX |
(189) 0xe61a MOV %RAX,-0x98(%RBP) |
(189) 0xe621 XOR %R8D,%R8D |
(189) 0xe624 MOV %R10,-0x80(%RBP) |
(189) 0xe628 MOV %R10,-0x50(%RBP) |
(189) 0xe62c MOV %R14,-0x88(%RBP) |
(189) 0xe633 JMP e9d1 |
(189) 0xe638 XOR %R11D,%R11D |
(189) 0xe63b MOV -0x40(%RBP),%RDX |
(189) 0xe63f TEST %RDX,%RDX |
(189) 0xe642 JLE e952 |
(189) 0xe648 MOV %R12D,%ECX |
(189) 0xe64b AND $0x7f,%ECX |
(189) 0xe64e MOV $0x80,%EAX |
(189) 0xe653 SUB %ECX,%EAX |
(189) 0xe655 SHR $0x3,%EAX |
(189) 0xe658 CMP %RAX,%R14 |
(189) 0xe65b CMOVB %R14,%RAX |
(189) 0xe65f LEA -0x1(%RDX),%RCX |
(189) 0xe663 MOV %RCX,-0x70(%RBP) |
(189) 0xe667 MOV %EAX,%EDX |
(189) 0xe669 LEA (%R12,%RDX,8),%RDX |
(189) 0xe66d MOV %R14,%RCX |
(189) 0xe670 SUB %RAX,%RCX |
(189) 0xe673 AND $-0x10,%RCX |
(189) 0xe677 ADD %RAX,%RCX |
(189) 0xe67a MOV %RCX,-0x90(%RBP) |
(189) 0xe681 MOVQ $0,-0x80(%RBP) |
(189) 0xe689 XOR %EDI,%EDI |
(189) 0xe68b MOV %R14,%RCX |
(189) 0xe68e MOV %R11,-0x58(%RBP) |
(189) 0xe692 JMP e6d4 |
0xe694 NOPW %CS:(%RAX,%RAX,1) |
(207) 0xe6a0 MOV -0x80(%RBP),%R9 |
(207) 0xe6a4 MOV %R8,(%R11,%R9,8) |
(207) 0xe6a8 MOV 0x10(%RBP),%R8 |
(207) 0xe6ac VMOVQ (%R8,%RSI,8),%XMM0 |
(207) 0xe6b2 MOV -0x60(%RBP),%RSI |
(207) 0xe6b6 VMOVQ %XMM0,(%RSI,%R9,8) |
(207) 0xe6bc INC %R9 |
(207) 0xe6bf MOV %R9,-0x80(%RBP) |
(207) 0xe6c3 LEA 0x1(%RDI),%RSI |
(207) 0xe6c7 CMP -0x70(%RBP),%RDI |
(207) 0xe6cb MOV %RSI,%RDI |
(207) 0xe6ce JE e839 |
(207) 0xe6d4 TEST %R14,%R14 |
(207) 0xe6d7 JLE e7b0 |
(207) 0xe6dd MOV -0x78(%RBP),%RSI |
(207) 0xe6e1 LEA (%RSI,%RDI,1),%R8 |
(207) 0xe6e5 MOV (%RBX,%R8,8),%R9 |
(207) 0xe6e9 MOV %R12D,%R10D |
(207) 0xe6ec AND $0x7f,%R10D |
(207) 0xe6f0 MOV $0x80,%ESI |
(207) 0xe6f5 SUB %R10D,%ESI |
(207) 0xe6f8 SHR $0x3,%ESI |
(207) 0xe6fb CMP %RSI,%R14 |
(207) 0xe6fe MOV %R14,%R13 |
(207) 0xe701 CMOVA %RSI,%R13 |
(207) 0xe705 TEST %R13,%R13 |
(207) 0xe708 JE e722 |
(207) 0xe70a XOR %R10D,%R10D |
(207) 0xe70d NOPL (%RAX) |
(210) 0xe710 CMP %R9,(%R12,%R10,8) |
(210) 0xe714 JE e820 |
(210) 0xe71a INC %R10 |
(210) 0xe71d CMP %R10,%RAX |
(210) 0xe720 JNE e710 |
(207) 0xe722 CMP %RSI,%R14 |
(207) 0xe725 JBE e7b0 |
(207) 0xe72b MOV %R14,%R11 |
(207) 0xe72e SUB %R13,%R11 |
(207) 0xe731 MOV %R11,%RSI |
(207) 0xe734 AND $-0x10,%RSI |
(207) 0xe738 JE e787 |
(207) 0xe73a LEA -0x1(%RSI),%R13 |
(207) 0xe73e VMOVQ %R9,%XMM0 |
(207) 0xe743 VPBROADCASTQ %XMM0,%YMM0 |
(207) 0xe748 XOR %R10D,%R10D |
(207) 0xe74b NOPL (%RAX,%RAX,1) |
(209) 0xe750 VPCMPEQQ (%RDX,%R10,8),%YMM0,%YMM1 |
(209) 0xe756 VPCMPEQQ 0x20(%RDX,%R10,8),%YMM0,%YMM3 |
(209) 0xe75d VPCMPEQQ 0x40(%RDX,%R10,8),%YMM0,%YMM2 |
(209) 0xe764 VPCMPEQQ 0x60(%RDX,%R10,8),%YMM0,%YMM4 |
(209) 0xe76b VPOR %YMM3,%YMM1,%YMM5 |
(209) 0xe76f VPOR %YMM4,%YMM2,%YMM6 |
(209) 0xe773 VPOR %YMM5,%YMM6,%YMM5 |
(209) 0xe777 VTESTPD %YMM5,%YMM5 |
(209) 0xe77c JNE e7e7 |
(209) 0xe77e ADD $0x10,%R10 |
(209) 0xe782 CMP %R13,%R10 |
(209) 0xe785 JBE e750 |
(207) 0xe787 CMP %R11,%RSI |
(207) 0xe78a MOV -0x58(%RBP),%R11 |
(207) 0xe78e JE e7b0 |
(207) 0xe790 MOV -0x90(%RBP),%R10 |
(207) 0xe797 NOPW (%RAX,%RAX,1) |
(208) 0xe7a0 CMP %R9,(%R12,%R10,8) |
(208) 0xe7a4 JE e820 |
(208) 0xe7a6 INC %R10 |
(208) 0xe7a9 CMP %R10,%R14 |
(208) 0xe7ac JNE e7a0 |
(207) 0xe7ae XCHG %AX,%AX |
(207) 0xe7b0 MOV -0x78(%RBP),%RSI |
(207) 0xe7b4 ADD %RDI,%RSI |
(207) 0xe7b7 MOV (%RBX,%RSI,8),%R8 |
(207) 0xe7bb CMP -0x88(%RBP),%RCX |
(207) 0xe7c2 JGE e6a0 |
(207) 0xe7c8 MOV %R8,(%R12,%RCX,8) |
(207) 0xe7cc MOV 0x10(%RBP),%R8 |
(207) 0xe7d0 VMOVQ (%R8,%RSI,8),%XMM0 |
(207) 0xe7d6 MOV -0x50(%RBP),%RSI |
(207) 0xe7da VMOVQ %XMM0,(%RSI,%RCX,8) |
(207) 0xe7df INC %RCX |
(207) 0xe7e2 JMP e6c3 |
(207) 0xe7e7 VPACKSSDW %YMM3,%YMM1,%YMM0 |
(207) 0xe7eb VPERMQ $-0x28,%YMM0,%YMM0 |
(207) 0xe7f1 VPACKSSDW %YMM4,%YMM2,%YMM1 |
(207) 0xe7f5 VPERMQ $-0x28,%YMM1,%YMM1 |
(207) 0xe7fb VPACKSSDW %YMM1,%YMM0,%YMM0 |
(207) 0xe7ff VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(207) 0xe805 VPACKSSWB %XMM1,%XMM0,%XMM0 |
(207) 0xe809 VPSHUFD $-0x28,%XMM0,%XMM0 |
(207) 0xe80e VPMOVMSKB %XMM0,%ESI |
(207) 0xe812 TZCNT %ESI,%ESI |
(207) 0xe816 ADD %RAX,%R10 |
(207) 0xe819 ADD %RSI,%R10 |
(207) 0xe81c MOV -0x58(%RBP),%R11 |
(207) 0xe820 MOV 0x10(%RBP),%RSI |
(207) 0xe824 VMOVQ (%RSI,%R8,8),%XMM0 |
(207) 0xe82a MOV -0x50(%RBP),%RSI |
(207) 0xe82e VMOVQ %XMM0,(%RSI,%R10,8) |
(207) 0xe834 JMP e6c3 |
(189) 0xe839 MOV -0x40(%RBP),%RAX |
(189) 0xe83d ADD %RAX,-0x78(%RBP) |
(189) 0xe841 MOV -0x80(%RBP),%R12 |
(189) 0xe845 LEA (%RCX,%R12,1),%R13 |
(189) 0xe849 MOV 0x48(%RBP),%RAX |
(189) 0xe84d MOV -0x68(%RBP),%R14 |
(189) 0xe851 MOV %R13,(%RAX,%R14,8) |
(189) 0xe855 TEST %R12,%R12 |
(189) 0xe858 JE e95e |
(189) 0xe85e MOV %RCX,-0x40(%RBP) |
(189) 0xe862 MOV 0x38(%RBP),%RAX |
(189) 0xe866 MOV (%RAX,%R14,8),%RDI |
(189) 0xe86a LEA (,%R13,8),%RSI |
(189) 0xe872 MOV %RSI,-0x70(%RBP) |
(189) 0xe876 VZEROUPPER |
(189) 0xe879 CALL 2b70 <hypre_ReAlloc@plt> |
(189) 0xe87e MOV 0x38(%RBP),%RCX |
(189) 0xe882 MOV %RAX,(%RCX,%R14,8) |
(189) 0xe886 MOV 0x40(%RBP),%R14 |
(189) 0xe88a MOV -0x68(%RBP),%RAX |
(189) 0xe88e MOV (%R14,%RAX,8),%RDI |
(189) 0xe892 MOV -0x70(%RBP),%RSI |
(189) 0xe896 CALL 2b70 <hypre_ReAlloc@plt> |
(189) 0xe89b MOV -0x68(%RBP),%RCX |
(189) 0xe89f MOV %RAX,(%R14,%RCX,8) |
(189) 0xe8a3 MOV -0x68(%RBP),%RDX |
(189) 0xe8a7 MOV 0x50(%RBP),%RCX |
(189) 0xe8ab MOV %R13,(%RCX,%RDX,8) |
(189) 0xe8af TEST %R12,%R12 |
(189) 0xe8b2 JLE ed03 |
(189) 0xe8b8 MOV %R12,%R14 |
(189) 0xe8bb MOV 0x38(%RBP),%RCX |
(189) 0xe8bf MOV (%RCX,%RDX,8),%RCX |
(189) 0xe8c3 MOV -0x58(%RBP),%R11 |
(189) 0xe8c7 LEA (%R11,%R12,8),%RDX |
(189) 0xe8cb ADD $-0x8,%RDX |
(189) 0xe8cf MOV -0x40(%RBP),%R9 |
(189) 0xe8d3 LEA (%RCX,%R9,8),%RDI |
(189) 0xe8d7 CMP %RDI,%RDX |
(189) 0xe8da SETAE %DL |
(189) 0xe8dd LEA (%RCX,%R13,8),%RCX |
(189) 0xe8e1 ADD $-0x8,%RCX |
(189) 0xe8e5 CMP %R11,%RCX |
(189) 0xe8e8 SETAE %SIL |
(189) 0xe8ec MOV -0x60(%RBP),%R8 |
(189) 0xe8f0 LEA (%R8,%R12,8),%RCX |
(189) 0xe8f4 ADD $-0x8,%RCX |
(189) 0xe8f8 LEA (%RAX,%R9,8),%R12 |
(189) 0xe8fc CMP %R12,%RCX |
(189) 0xe8ff SETB %CL |
(189) 0xe902 LEA (%RAX,%R13,8),%RAX |
(189) 0xe906 ADD $-0x8,%RAX |
(189) 0xe90a CMP %R8,%RAX |
(189) 0xe90d SETB %AL |
(189) 0xe910 TEST %SIL,%DL |
(189) 0xe913 JNE ed15 |
(189) 0xe919 OR %AL,%CL |
(189) 0xe91b JE ed15 |
(189) 0xe921 CMP $0xd,%R14 |
(189) 0xe925 JB edfc |
(189) 0xe92b SAL $0x3,%R14 |
(189) 0xe92f MOV %R11,%RSI |
(189) 0xe932 MOV %R14,%RDX |
(189) 0xe935 CALL 2ae0 <_intel_fast_memcpy@plt> |
(189) 0xe93a MOV %R12,%RDI |
(189) 0xe93d MOV -0x60(%RBP),%RSI |
(189) 0xe941 MOV %R14,%RDX |
(189) 0xe944 CALL 2ae0 <_intel_fast_memcpy@plt> |
(189) 0xe949 MOV -0x58(%RBP),%R11 |
(189) 0xe94d JMP ee7d |
(189) 0xe952 MOV 0x48(%RBP),%RAX |
(189) 0xe956 MOV -0x68(%RBP),%RCX |
(189) 0xe95a MOV %R14,(%RAX,%RCX,8) |
(189) 0xe95e TEST %R11,%R11 |
(189) 0xe961 JNE ee7d |
(189) 0xe967 JMP ee39 |
(197) 0xe96c VPACKSSDW %YMM3,%YMM1,%YMM0 |
(197) 0xe970 VPERMQ $-0x28,%YMM0,%YMM0 |
(197) 0xe976 VPACKSSDW %YMM4,%YMM2,%YMM1 |
(197) 0xe97a VPERMQ $-0x28,%YMM1,%YMM1 |
(197) 0xe980 VPACKSSDW %YMM1,%YMM0,%YMM0 |
(197) 0xe984 VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(197) 0xe98a VPACKSSWB %XMM1,%XMM0,%XMM0 |
(197) 0xe98e VPSHUFD $-0x28,%XMM0,%XMM0 |
(197) 0xe993 VPMOVMSKB %XMM0,%EAX |
(197) 0xe997 TZCNT %EAX,%EAX |
(197) 0xe99b ADD %RDI,%R11 |
(197) 0xe99e ADD %RAX,%R11 |
(197) 0xe9a1 MOV -0x30(%RBP),%RDI |
(197) 0xe9a5 MOV %R12,%RCX |
(197) 0xe9a8 MOV -0x40(%RBP),%R12 |
(197) 0xe9ac MOV 0x10(%RBP),%RAX |
(197) 0xe9b0 VMOVQ (%RAX,%RCX,8),%XMM0 |
(197) 0xe9b5 MOV 0x88(%RBP),%RAX |
(197) 0xe9bc VMOVQ %XMM0,(%RAX,%R11,8) |
(197) 0xe9c2 INC %RCX |
(197) 0xe9c5 INC %R8 |
(197) 0xe9c8 CMP %R12,%R8 |
(197) 0xe9cb JE ecef |
(197) 0xe9d1 MOV (%RBX,%RCX,8),%R9 |
(197) 0xe9d5 CMP 0x28(%RBP),%R9 |
(197) 0xe9d9 JL eb30 |
(197) 0xe9df CMP 0x30(%RBP),%R9 |
(197) 0xe9e3 JG eb30 |
(197) 0xe9e9 MOV 0x60(%RBP),%RAX |
(197) 0xe9ed MOV -0x68(%RBP),%RDX |
(197) 0xe9f1 MOV (%RAX,%RDX,8),%R10 |
(197) 0xe9f5 MOV -0x88(%RBP),%R13 |
(197) 0xe9fc SUB %R10,%R13 |
(197) 0xe9ff JLE eb00 |
(197) 0xea05 MOV 0x68(%RBP),%RAX |
(197) 0xea09 LEA (%RAX,%R10,8),%EAX |
(197) 0xea0d AND $0x7f,%EAX |
(197) 0xea10 MOV $0x80,%EDX |
(197) 0xea15 SUB %EAX,%EDX |
(197) 0xea17 SHR $0x3,%EDX |
(197) 0xea1a CMP %RDX,%R13 |
(197) 0xea1d MOV %R13,%RSI |
(197) 0xea20 CMOVA %RDX,%RSI |
(197) 0xea24 TEST %RSI,%RSI |
(197) 0xea27 JE ea46 |
(197) 0xea29 MOV %R10,%R11 |
(197) 0xea2c MOV %RSI,%RAX |
(197) 0xea2f NOP |
(203) 0xea30 MOV 0x68(%RBP),%R12 |
(203) 0xea34 CMP %R9,(%R12,%R11,8) |
(203) 0xea38 JE ecd9 |
(203) 0xea3e INC %R11 |
(203) 0xea41 DEC %RAX |
(203) 0xea44 JNE ea30 |
(197) 0xea46 CMP %RDX,%R13 |
(197) 0xea49 MOV -0x40(%RBP),%R12 |
(197) 0xea4d JBE eb00 |
(197) 0xea53 MOV %RCX,%R12 |
(197) 0xea56 SUB %RSI,%R13 |
(197) 0xea59 MOV %R13,%RDX |
(197) 0xea5c AND $-0x10,%RDX |
(197) 0xea60 JE eabb |
(197) 0xea62 LEA -0x1(%RDX),%RAX |
(197) 0xea66 VMOVQ %R9,%XMM0 |
(197) 0xea6b VPBROADCASTQ %XMM0,%YMM0 |
(197) 0xea70 LEA (%R10,%RSI,1),%R11 |
(197) 0xea74 MOV 0x68(%RBP),%RCX |
(197) 0xea78 LEA (%RCX,%R11,8),%RCX |
(197) 0xea7c XOR %EDI,%EDI |
(197) 0xea7e XCHG %AX,%AX |
(202) 0xea80 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%YMM1 |
(202) 0xea86 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%YMM3 |
(202) 0xea8d VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%YMM2 |
(202) 0xea94 VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%YMM4 |
(202) 0xea9b VPOR %YMM3,%YMM1,%YMM5 |
(202) 0xea9f VPOR %YMM4,%YMM2,%YMM6 |
(202) 0xeaa3 VPOR %YMM5,%YMM6,%YMM5 |
(202) 0xeaa7 VTESTPD %YMM5,%YMM5 |
(202) 0xeaac JNE ec9d |
(202) 0xeab2 ADD $0x10,%RDI |
(202) 0xeab6 CMP %RAX,%RDI |
(202) 0xeab9 JBE ea80 |
(197) 0xeabb CMP %R13,%RDX |
(197) 0xeabe MOV -0x30(%RBP),%RDI |
(197) 0xeac2 MOV %R12,%RCX |
(197) 0xeac5 MOV -0x40(%RBP),%R12 |
(197) 0xeac9 JE eb00 |
(197) 0xeacb ADD %RSI,%R10 |
(197) 0xeace ADD %RDX,%R10 |
(197) 0xead1 MOV %R10,%R11 |
(197) 0xead4 NOPW %CS:(%RAX,%RAX,1) |
(201) 0xeae0 MOV 0x68(%RBP),%RAX |
(201) 0xeae4 CMP %R9,(%RAX,%R11,8) |
(201) 0xeae8 JE ecdd |
(201) 0xeaee INC %R11 |
(201) 0xeaf1 CMP %R11,-0x88(%RBP) |
(201) 0xeaf8 JNE eae0 |
(197) 0xeafa NOPW (%RAX,%RAX,1) |
(197) 0xeb00 CMP -0x58(%RBP),%R14 |
(197) 0xeb04 JGE ed7c |
(197) 0xeb0a MOV 0x68(%RBP),%RAX |
(197) 0xeb0e MOV %R9,(%RAX,%R14,8) |
(197) 0xeb12 MOV 0x10(%RBP),%RAX |
(197) 0xeb16 VMOVQ (%RAX,%RCX,8),%XMM0 |
(197) 0xeb1b MOV 0x70(%RBP),%RAX |
(197) 0xeb1f VMOVQ %XMM0,(%RAX,%R14,8) |
(197) 0xeb25 INC %R14 |
(197) 0xeb28 JMP e9c2 |
0xeb2d NOPL (%RAX) |
(197) 0xeb30 MOV 0x78(%RBP),%RAX |
(197) 0xeb34 MOV -0x68(%RBP),%RDX |
(197) 0xeb38 MOV (%RAX,%RDX,8),%R10 |
(197) 0xeb3c MOV -0x80(%RBP),%R13 |
(197) 0xeb40 SUB %R10,%R13 |
(197) 0xeb43 JLE ec60 |
(197) 0xeb49 MOV 0x80(%RBP),%RAX |
(197) 0xeb50 LEA (%RAX,%R10,8),%EAX |
(197) 0xeb54 AND $0x7f,%EAX |
(197) 0xeb57 MOV $0x80,%EDX |
(197) 0xeb5c SUB %EAX,%EDX |
(197) 0xeb5e SHR $0x3,%EDX |
(197) 0xeb61 CMP %RDX,%R13 |
(197) 0xeb64 MOV %R13,%RSI |
(197) 0xeb67 CMOVA %RDX,%RSI |
(197) 0xeb6b TEST %RSI,%RSI |
(197) 0xeb6e JE eb99 |
(197) 0xeb70 MOV %R10,%R11 |
(197) 0xeb73 MOV %RSI,%RAX |
(197) 0xeb76 NOPW %CS:(%RAX,%RAX,1) |
(200) 0xeb80 MOV 0x80(%RBP),%R12 |
(200) 0xeb87 CMP %R9,(%R12,%R11,8) |
(200) 0xeb8b JE e9a8 |
(200) 0xeb91 INC %R11 |
(200) 0xeb94 DEC %RAX |
(200) 0xeb97 JNE eb80 |
(197) 0xeb99 CMP %RDX,%R13 |
(197) 0xeb9c MOV -0x40(%RBP),%R12 |
(197) 0xeba0 JBE ec60 |
(197) 0xeba6 MOV %RCX,%R12 |
(197) 0xeba9 SUB %RSI,%R13 |
(197) 0xebac MOV %R13,%RDX |
(197) 0xebaf AND $-0x10,%RDX |
(197) 0xebb3 JE ec1b |
(197) 0xebb5 LEA -0x1(%RDX),%RAX |
(197) 0xebb9 VMOVQ %R9,%XMM0 |
(197) 0xebbe VPBROADCASTQ %XMM0,%YMM0 |
(197) 0xebc3 LEA (%R10,%RSI,1),%R11 |
(197) 0xebc7 MOV 0x80(%RBP),%RCX |
(197) 0xebce LEA (%RCX,%R11,8),%RCX |
(197) 0xebd2 XOR %EDI,%EDI |
(197) 0xebd4 NOPW %CS:(%RAX,%RAX,1) |
(199) 0xebe0 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%YMM1 |
(199) 0xebe6 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%YMM3 |
(199) 0xebed VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%YMM2 |
(199) 0xebf4 VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%YMM4 |
(199) 0xebfb VPOR %YMM3,%YMM1,%YMM5 |
(199) 0xebff VPOR %YMM4,%YMM2,%YMM6 |
(199) 0xec03 VPOR %YMM5,%YMM6,%YMM5 |
(199) 0xec07 VTESTPD %YMM5,%YMM5 |
(199) 0xec0c JNE e96c |
(199) 0xec12 ADD $0x10,%RDI |
(199) 0xec16 CMP %RAX,%RDI |
(199) 0xec19 JBE ebe0 |
(197) 0xec1b CMP %R13,%RDX |
(197) 0xec1e MOV -0x30(%RBP),%RDI |
(197) 0xec22 MOV %R12,%RCX |
(197) 0xec25 MOV -0x40(%RBP),%R12 |
(197) 0xec29 JE ec60 |
(197) 0xec2b ADD %RSI,%R10 |
(197) 0xec2e ADD %RDX,%R10 |
(197) 0xec31 MOV %R10,%R11 |
(197) 0xec34 NOPW %CS:(%RAX,%RAX,1) |
(198) 0xec40 MOV 0x80(%RBP),%RAX |
(198) 0xec47 CMP %R9,(%RAX,%R11,8) |
(198) 0xec4b JE e9ac |
(198) 0xec51 INC %R11 |
(198) 0xec54 CMP %R11,-0x80(%RBP) |
(198) 0xec58 JNE ec40 |
(197) 0xec5a NOPW (%RAX,%RAX,1) |
(197) 0xec60 MOV -0x50(%RBP),%RDX |
(197) 0xec64 CMP -0x90(%RBP),%RDX |
(197) 0xec6b JGE ed40 |
(197) 0xec71 MOV 0x80(%RBP),%RAX |
(197) 0xec78 MOV %R9,(%RAX,%RDX,8) |
(197) 0xec7c MOV 0x10(%RBP),%RAX |
(197) 0xec80 VMOVQ (%RAX,%RCX,8),%XMM0 |
(197) 0xec85 MOV 0x88(%RBP),%RAX |
(197) 0xec8c VMOVQ %XMM0,(%RAX,%RDX,8) |
(197) 0xec91 INC %RDX |
(197) 0xec94 MOV %RDX,-0x50(%RBP) |
(197) 0xec98 JMP e9c2 |
(197) 0xec9d VPACKSSDW %YMM3,%YMM1,%YMM0 |
(197) 0xeca1 VPERMQ $-0x28,%YMM0,%YMM0 |
(197) 0xeca7 VPACKSSDW %YMM4,%YMM2,%YMM1 |
(197) 0xecab VPERMQ $-0x28,%YMM1,%YMM1 |
(197) 0xecb1 VPACKSSDW %YMM1,%YMM0,%YMM0 |
(197) 0xecb5 VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(197) 0xecbb VPACKSSWB %XMM1,%XMM0,%XMM0 |
(197) 0xecbf VPSHUFD $-0x28,%XMM0,%XMM0 |
(197) 0xecc4 VPMOVMSKB %XMM0,%EAX |
(197) 0xecc8 TZCNT %EAX,%EAX |
(197) 0xeccc ADD %RDI,%R11 |
(197) 0xeccf ADD %RAX,%R11 |
(197) 0xecd2 MOV -0x30(%RBP),%RDI |
(197) 0xecd6 MOV %R12,%RCX |
(197) 0xecd9 MOV -0x40(%RBP),%R12 |
(197) 0xecdd MOV 0x10(%RBP),%RAX |
(197) 0xece1 VMOVQ (%RAX,%RCX,8),%XMM0 |
(197) 0xece6 MOV 0x70(%RBP),%RAX |
(197) 0xecea JMP e9bc |
(189) 0xecef MOV -0x98(%RBP),%R13 |
(189) 0xecf6 MOV -0x38(%RBP),%RDX |
(189) 0xecfa MOV -0x48(%RBP),%RSI |
(189) 0xecfe JMP edd1 |
(189) 0xed03 MOV -0x58(%RBP),%R11 |
(189) 0xed07 TEST %R11,%R11 |
(189) 0xed0a JNE ee7d |
(189) 0xed10 JMP ee39 |
(189) 0xed15 XOR %EAX,%EAX |
(189) 0xed17 MOV -0x60(%RBP),%RDX |
(189) 0xed1b NOPL (%RAX,%RAX,1) |
(204) 0xed20 MOV (%R11,%RAX,8),%RCX |
(204) 0xed24 MOV %RCX,(%RDI,%RAX,8) |
(204) 0xed28 VMOVQ (%RDX,%RAX,8),%XMM0 |
(204) 0xed2d VMOVQ %XMM0,(%R12,%RAX,8) |
(204) 0xed33 INC %RAX |
(204) 0xed36 CMP %RAX,%R14 |
(204) 0xed39 JNE ed20 |
(189) 0xed3b JMP ee7d |
(189) 0xed40 MOV %RCX,%R13 |
(189) 0xed43 MOV $0xd4e,%ESI |
(189) 0xed48 MOV $0x1,%EDX |
(189) 0xed4d LEA 0x458b(%RIP),%RDI |
(189) 0xed54 XOR %ECX,%ECX |
(189) 0xed56 VZEROUPPER |
(189) 0xed59 CALL 2ea0 <hypre_error_handler@plt> |
(189) 0xed5e MOV 0xd0(%RBP),%RAX |
(189) 0xed65 LOCK INCQ (%RAX) |
(189) 0xed69 CMPQ $0,0xc0(%RBP) |
(189) 0xed71 JE edc1 |
(189) 0xed73 LEA 0x46c1(%RIP),%RDI |
(189) 0xed7a JMP edb6 |
(189) 0xed7c MOV %RCX,%R13 |
(189) 0xed7f MOV $0xd70,%ESI |
(189) 0xed84 MOV $0x1,%EDX |
(189) 0xed89 LEA 0x454f(%RIP),%RDI |
(189) 0xed90 XOR %ECX,%ECX |
(189) 0xed92 VZEROUPPER |
(189) 0xed95 CALL 2ea0 <hypre_error_handler@plt> |
(189) 0xed9a MOV 0xd0(%RBP),%RAX |
(189) 0xeda1 LOCK INCQ (%RAX) |
(189) 0xeda5 CMPQ $0,0xc0(%RBP) |
(189) 0xedad JE edc1 |
(189) 0xedaf LEA 0x46ab(%RIP),%RDI |
(189) 0xedb6 MOV -0x70(%RBP),%RSI |
(189) 0xedba XOR %EAX,%EAX |
(189) 0xedbc CALL 2e90 <hypre_printf@plt> |
(189) 0xedc1 MOV -0x38(%RBP),%RDX |
(189) 0xedc5 MOV -0x48(%RBP),%RSI |
(189) 0xedc9 MOV -0x30(%RBP),%RDI |
(189) 0xedcd VPCMPEQD %YMM7,%YMM7,%YMM7 |
(189) 0xedd1 MOV 0x20(%RBP),%R8 |
(189) 0xedd5 MOV -0x50(%RBP),%R10 |
(189) 0xedd9 MOV 0x18(%RBP),%RAX |
(189) 0xeddd MOV (%RAX),%RAX |
(189) 0xede0 MOV 0x38(%RAX),%RCX |
(189) 0xede4 MOV -0x68(%RBP),%R9 |
(189) 0xede8 MOV %R14,(%RCX,%R9,8) |
(189) 0xedec MOV 0x40(%RAX),%RAX |
(189) 0xedf0 MOV %R10,(%RAX,%R9,8) |
(189) 0xedf4 MOV %R13,%RCX |
(189) 0xedf7 JMP e314 |
(189) 0xedfc MOV %R14,%RAX |
(189) 0xedff AND $-0x4,%RAX |
(189) 0xee03 JE ee31 |
(189) 0xee05 LEA -0x1(%RAX),%RCX |
(189) 0xee09 XOR %EDX,%EDX |
(189) 0xee0b MOV -0x60(%RBP),%RSI |
(189) 0xee0f NOP |
(206) 0xee10 VMOVUPS (%R11,%RDX,8),%YMM0 |
(206) 0xee16 VMOVUPS %YMM0,(%RDI,%RDX,8) |
(206) 0xee1b VMOVDQU (%RSI,%RDX,8),%YMM0 |
(206) 0xee20 VMOVDQU %YMM0,(%R12,%RDX,8) |
(206) 0xee26 ADD $0x4,%RDX |
(206) 0xee2a CMP %RCX,%RDX |
(206) 0xee2d JLE ee10 |
(189) 0xee2f JMP ee60 |
(189) 0xee31 XOR %EAX,%EAX |
(189) 0xee33 MOV -0x60(%RBP),%RSI |
(189) 0xee37 JMP ee65 |
(189) 0xee39 MOV -0x38(%RBP),%RDX |
(189) 0xee3d MOV -0x48(%RBP),%RSI |
(189) 0xee41 MOV -0x30(%RBP),%RDI |
(189) 0xee45 MOV -0x78(%RBP),%RCX |
(189) 0xee49 VPCMPEQD %YMM7,%YMM7,%YMM7 |
(189) 0xee4d JMP e310 |
0xee52 NOPW %CS:(%RAX,%RAX,1) |
(205) 0xee60 CMP %RAX,%R14 |
(205) 0xee63 JE ee7d |
(205) 0xee65 MOV (%R11,%RAX,8),%RCX |
(205) 0xee69 MOV %RCX,(%RDI,%RAX,8) |
(205) 0xee6d VMOVQ (%RSI,%RAX,8),%XMM0 |
(205) 0xee72 VMOVQ %XMM0,(%R12,%RAX,8) |
(205) 0xee78 INC %RAX |
(205) 0xee7b JMP ee60 |
(189) 0xee7d MOV %R11,%RDI |
(189) 0xee80 VZEROUPPER |
(189) 0xee83 CALL 2c40 <hypre_Free@plt> |
(189) 0xee88 MOV -0x60(%RBP),%RDI |
(189) 0xee8c CALL 2c40 <hypre_Free@plt> |
(189) 0xee91 MOVQ $0,-0x60(%RBP) |
(189) 0xee99 JMP ee39 |
0xee9b ADD $0x88,%RSP |
0xeea2 POP %RBX |
0xeea3 POP %R12 |
0xeea5 POP %R13 |
0xeea7 POP %R14 |
0xeea9 POP %R15 |
0xeeab POP %RBP |
0xeeac VZEROUPPER |
0xeeaf RET |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | libIJ_mv.so |
nb instructions | 159 |
nb uops | 172 |
loop length | 631 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 28.67 cycles |
front end | 28.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.70 | 11.60 | 8.67 | 8.67 | 8.50 | 11.60 | 11.50 | 8.50 | 8.50 | 8.50 | 11.60 | 8.67 |
cycles | 11.70 | 15.40 | 8.67 | 8.67 | 8.50 | 11.60 | 11.50 | 8.50 | 8.50 | 8.50 | 11.60 | 8.67 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 27.35-27.39 |
Stall cycles | 0.00 |
Front-end | 28.67 |
Dispatch | 15.40 |
DIV/SQRT | 16.00 |
Overall L1 | 28.67 |
all | 24% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2d90 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2b50 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE e0ae <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP e0b6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE e0dc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP e0f0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x90> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE e1df <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $-0x8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e154 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e154 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP e1df <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE e1b6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE e1c2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP e1dc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2090a7(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2e10 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e267 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x209010(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e10 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE e2b1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x251> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R12,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP e2b3 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x253> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE ee9b <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xe3b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM7,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP e320 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | libIJ_mv.so |
nb instructions | 159 |
nb uops | 172 |
loop length | 631 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 28.67 cycles |
front end | 28.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.70 | 11.60 | 8.67 | 8.67 | 8.50 | 11.60 | 11.50 | 8.50 | 8.50 | 8.50 | 11.60 | 8.67 |
cycles | 11.70 | 15.40 | 8.67 | 8.67 | 8.50 | 11.60 | 11.50 | 8.50 | 8.50 | 8.50 | 11.60 | 8.67 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 27.35-27.39 |
Stall cycles | 0.00 |
Front-end | 28.67 |
Dispatch | 15.40 |
DIV/SQRT | 16.00 |
Overall L1 | 28.67 |
all | 24% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2d90 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2b50 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE e0ae <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP e0b6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE e0dc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP e0f0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x90> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE e1df <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $-0x8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e154 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e154 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP e1df <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE e1b6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE e1c2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP e1dc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2090a7(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2e10 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB e267 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE e296 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x236> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x209010(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e10 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE e2b1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x251> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R12,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP e2b3 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x253> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE ee9b <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xe3b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM7,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP e320 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_IJMatrixSetValuesOMPParCSR.extracted.28– | 0.42 | 0.17 |
▼Loop 189 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0.04 | 0.01 |
▼Loop 207 - IJMatrix_parcsr.c:3262-3337 - libIJ_mv.so– | 1.21 | 0.41 |
○Loop 210 - IJMatrix_parcsr.c:3318-3320 - libIJ_mv.so | 0.65 | 0.22 |
○Loop 209 - IJMatrix_parcsr.c:3262-3318 - libIJ_mv.so | 0 | 0 |
○Loop 208 - IJMatrix_parcsr.c:3318-3320 - libIJ_mv.so | 0 | 0 |
▼Loop 197 - IJMatrix_parcsr.c:3262-3454 - libIJ_mv.so– | 0.38 | 0.13 |
○Loop 199 - IJMatrix_parcsr.c:3262-3424 - libIJ_mv.so | 0.2 | 0.07 |
○Loop 198 - IJMatrix_parcsr.c:3388-3390 - libIJ_mv.so | 0 | 0 |
○Loop 203 - IJMatrix_parcsr.c:3422-3424 - libIJ_mv.so | 0 | 0 |
○Loop 202 - IJMatrix_parcsr.c:3262-3422 - libIJ_mv.so | 0 | 0 |
○Loop 200 - IJMatrix_parcsr.c:3388-3390 - libIJ_mv.so | 0 | 0 |
○Loop 201 - IJMatrix_parcsr.c:3422-3424 - libIJ_mv.so | 0 | 0 |
○Loop 205 - IJMatrix_parcsr.c:3359-3482 - libIJ_mv.so | 0.24 | 0.08 |
○Loop 204 - IJMatrix_parcsr.c:3359-3362 - libIJ_mv.so | 0 | 0 |
○Loop 206 - IJMatrix_parcsr.c:3359-3362 - libIJ_mv.so | 0 | 0 |
▼Loop 190 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0 | 0 |
▼Loop 193 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0 | 0 |
▼Loop 195 - IJMatrix_parcsr.c:3262-3482 - libIJ_mv.so– | 0 | 0 |
○Loop 194 - IJMatrix_parcsr.c:3478-3480 - libIJ_mv.so | 0 | 0 |
○Loop 196 - IJMatrix_parcsr.c:3262-3482 - libIJ_mv.so | 0 | 0 |
▼Loop 191 - IJMatrix_parcsr.c:3475-3484 - libIJ_mv.so– | 0 | 0 |
○Loop 192 - IJMatrix_parcsr.c:3478-3484 - libIJ_mv.so | 0 | 0 |
○Loop 214 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0.01 | 0 |
○Loop 215 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0 | 0 |
○Loop 212 - IJMatrix_parcsr.c:3282-3283 - libIJ_mv.so | 0 | 0 |
○Loop 211 - IJMatrix_parcsr.c:3282-3283 - libIJ_mv.so | 0 | 0 |
○Loop 213 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0 | 0 |