| Function: s4malla3 | Module: engine_linuxa64_gf_ompi | Source: s4mall3.F:29-116 [...] | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
| Function: s4malla3 | Module: engine_linuxa64_gf_ompi | Source: s4mall3.F:29-116 [...] | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
/home/hbollore/pop3/openradioss/OpenRadioss/engine/source/elements/solid/solide4/s4mall3.F: 29 - 116 |
-------------------------------------------------------------------------------- |
29: 1 SAV, OFFG, OFF, WXX, |
[...] |
60: IF(ISMSTR==11)THEN |
61: DO I=1,NEL |
62: C----------------------------------------------- |
63: IF(OFFG(I)==ZERO) CYCLE |
64: X=SAV(I,1) |
65: Y=SAV(I,4) |
66: Z=SAV(I,7) |
67: SAV(I,1) = X - Y*WZZ(I) + Z*WYY(I) |
68: SAV(I,4) = Y - Z*WXX(I) + X*WZZ(I) |
69: SAV(I,7) = Z - X*WYY(I) + Y*WXX(I) |
70: C |
71: X=SAV(I,2) |
72: Y=SAV(I,5) |
73: Z=SAV(I,8) |
74: SAV(I,2) = X - Y*WZZ(I) + Z*WYY(I) |
75: SAV(I,5) = Y - Z*WXX(I) + X*WZZ(I) |
76: SAV(I,8) = Z - X*WYY(I) + Y*WXX(I) |
77: C |
78: X=SAV(I,3) |
79: Y=SAV(I,6) |
80: Z=SAV(I,9) |
81: SAV(I,3) = X - Y*WZZ(I) + Z*WYY(I) |
82: SAV(I,6) = Y - Z*WXX(I) + X*WZZ(I) |
83: SAV(I,9) = Z - X*WYY(I) + Y*WXX(I) |
84: C |
85: ENDDO |
86: ELSEIF(ISMSTR<=4.AND.JLAG>0)THEN |
87: DO I=1,NEL |
88: IF(OFFG(I)>ONE)THEN |
89: C----------------------------------------------- |
90: X=SAV(I,1) |
91: Y=SAV(I,2) |
92: Z=SAV(I,3) |
93: SAV(I,1) = X - Y*WZZ(I) + Z*WYY(I) |
94: SAV(I,2) = Y - Z*WXX(I) + X*WZZ(I) |
95: SAV(I,3) = Z - X*WYY(I) + Y*WXX(I) |
96: C |
97: X=SAV(I,4) |
98: Y=SAV(I,5) |
99: Z=SAV(I,6) |
100: SAV(I,4) = X - Y*WZZ(I) + Z*WYY(I) |
101: SAV(I,5) = Y - Z*WXX(I) + X*WZZ(I) |
102: SAV(I,6) = Z - X*WYY(I) + Y*WXX(I) |
103: C |
104: X=SAV(I,7) |
105: Y=SAV(I,8) |
106: Z=SAV(I,9) |
107: SAV(I,7) = X - Y*WZZ(I) + Z*WYY(I) |
108: SAV(I,8) = Y - Z*WXX(I) + X*WZZ(I) |
109: SAV(I,9) = Z - X*WYY(I) + Y*WXX(I) |
[...] |
116: END |
0xa02660 LDR W8, [X6] |
0xa02664 LDR W2, [X7] |
0xa02668 SBFM X6, X8, #0, #31 |
0xa0266c BIC X6, X6, X6,ASR #63 |
0xa02670 ORN X16, XZR, X6 |
0xa02674 CMP W2, #11 |
0xa02678 B.EQ a02728 |
0xa0267c CMP W2, #4 |
0xa02680 B.GT a02724 |
0xa02684 LDR X2, [SP] |
0xa02688 LDR W2, [X2] |
0xa0268c CMP W2, #0 |
0xa02690 B.LE a02724 |
0xa02694 CMP W8, #0 |
0xa02698 B.LE a02724 |
0xa0269c ADD X13, X16, W6,UXTW #1 |
0xa026a0 ADD X9, X13, X6,LSL #1 |
0xa026a4 MOVZ W7, #9 |
0xa026a8 ADD X14, X13, X6 |
0xa026ac FMOV D31, #1.0000000 |
0xa026b0 ADD X14, X14, #1 |
0xa026b4 ADD X13, X13, #1 |
0xa026b8 ADD X11, X9, X6,LSL #1 |
0xa026bc ADD X12, X6, X9 |
0xa026c0 ADD X9, X9, #1 |
0xa026c4 UMADDL X7, W6, W7, X16 |
0xa026c8 ADD X12, X12, #1 |
0xa026cc MOVZ X2, #0 |
0xa026d0 ADD X10, X6, X11 |
0xa026d4 ADD X11, X11, #1 |
0xa026d8 ADD X12, X0, X12,LSL #3 |
0xa026dc ADD X6, X6, X10 |
0xa026e0 ADD X10, X10, #1 |
0xa026e4 ADD X11, X0, X11,LSL #3 |
0xa026e8 ADD X6, X6, #1 |
0xa026ec ADD X7, X7, #1 |
0xa026f0 ADD X10, X0, X10,LSL #3 |
0xa026f4 ADD X6, X0, X6,LSL #3 |
0xa026f8 UBFM X8, X8, #61, #31 |
0xa026fc ADD X9, X0, X9,LSL #3 |
0xa02700 ADD X14, X0, X14,LSL #3 |
0xa02704 ADD X13, X0, X13,LSL #3 |
0xa02708 ADD X7, X0, X7,LSL #3 |
(14916) 0xa0270c LDR D0, [X1, X2] |
(14916) 0xa02710 FCMPE D0, D31 |
(14916) 0xa02714 B.GT a028ac |
(14916) 0xa02718 ADD X2, X2, #8 |
(14916) 0xa0271c CMP X2, X8 |
(14916) 0xa02720 B.NE a0270c |
(14913) 0xa02724 RET |
(14913) 0xa02728 CMP W8, #0 |
(14913) 0xa0272c B.LE a02724 |
(14913) 0xa02730 ADD X14, X16, W6,UXTW #2 |
(14913) 0xa02734 UBFM X7, X6, #63, #31 |
(14913) 0xa02738 ADD X7, X7, W6,UXTW |
(14913) 0xa0273c ADD X13, X7, X14 |
(14913) 0xa02740 UBFM X9, X6, #62, #31 |
(14913) 0xa02744 MOVZ W10, #9 |
(14913) 0xa02748 ADD X9, X9, W6,UXTW |
(14913) 0xa0274c SUB X11, X13, X9 |
(14913) 0xa02750 ADD X14, X14, #1 |
(14913) 0xa02754 ADD X12, X11, X7,LSL #1 |
(14913) 0xa02758 ADD X15, X7, X11 |
(14913) 0xa0275c ADD X13, X13, #1 |
(14913) 0xa02760 UMADDL X10, W6, W10, X16 |
(14913) 0xa02764 ADD X15, X15, #1 |
(14913) 0xa02768 ADD X11, X11, #1 |
(14913) 0xa0276c SUB X9, X12, X9 |
(14913) 0xa02770 ADD X12, X12, #1 |
(14913) 0xa02774 ADD X14, X0, X14,LSL #3 |
(14913) 0xa02778 ADD X6, X7, X9 |
(14913) 0xa0277c ADD X7, X9, #1 |
(14913) 0xa02780 ADD X13, X0, X13,LSL #3 |
(14913) 0xa02784 ADD X9, X10, #1 |
(14913) 0xa02788 ADD X6, X6, #1 |
(14913) 0xa0278c ADD X12, X0, X12,LSL #3 |
(14913) 0xa02790 ADD X6, X0, X6,LSL #3 |
(14913) 0xa02794 MOVZ X2, #0 |
(14913) 0xa02798 UBFM X8, X8, #61, #31 |
(14913) 0xa0279c ADD X7, X0, X7,LSL #3 |
(14913) 0xa027a0 ADD X15, X0, X15,LSL #3 |
(14913) 0xa027a4 ADD X10, X0, X11,LSL #3 |
(14913) 0xa027a8 ADD X9, X0, X9,LSL #3 |
(14914) 0xa027ac LDR D31, [X1, X2] |
(14914) 0xa027b0 FCMP D31, #0 |
(14914) 0xa027b4 B.EQ a0289c |
(14914) 0xa027b8 LDR D7, [X14, X2] |
(14914) 0xa027bc LDR D27, [X5, X2] |
(14914) 0xa027c0 LDR D28, [X13, X2] |
(14914) 0xa027c4 LDR D23, [X3, X2] |
(14914) 0xa027c8 LDR D16, [X0, X2] |
(14914) 0xa027cc FMUL D20, D27, D7 |
(14914) 0xa027d0 LDR D26, [X4, X2] |
(14914) 0xa027d4 FMUL D18, D23, D28 |
(14914) 0xa027d8 FMUL D17, D27, D16 |
(14914) 0xa027dc FSUB D20, D16, S20 |
(14914) 0xa027e0 FMUL D16, D26, D16 |
(14914) 0xa027e4 FMUL D19, D26, D28 |
(14914) 0xa027e8 FSUB D18, D7, S18 |
(14914) 0xa027ec FMUL D7, D23, D7 |
(14914) 0xa027f0 FSUB D16, D28, S16 |
(14914) 0xa027f4 FADD D19, D20, D19 |
(14914) 0xa027f8 FADD D17, D18, D17 |
(14914) 0xa027fc FADD D7, D16, D7 |
(14914) 0xa02800 STR D19, [X0, X2] |
(14914) 0xa02804 STR D17, [X14, X2] |
(14914) 0xa02808 STR D7, [X13, X2] |
(14914) 0xa0280c LDR D1, [X15, X2] |
(14914) 0xa02810 LDR D22, [X12, X2] |
(14914) 0xa02814 LDR D2, [X10, X2] |
(14914) 0xa02818 FMUL D6, D27, D1 |
(14914) 0xa0281c FMUL D4, D23, D22 |
(14914) 0xa02820 FMUL D5, D26, D22 |
(14914) 0xa02824 FMUL D3, D27, D2 |
(14914) 0xa02828 FSUB D6, D2, S6 |
(14914) 0xa0282c FMUL D2, D26, D2 |
(14914) 0xa02830 FSUB D4, D1, S4 |
(14914) 0xa02834 FMUL D1, D23, D1 |
(14914) 0xa02838 FSUB D2, D22, S2 |
(14914) 0xa0283c FADD D5, D6, D5 |
(14914) 0xa02840 FADD D3, D4, D3 |
(14914) 0xa02844 FADD D1, D2, D1 |
(14914) 0xa02848 STR D5, [X10, X2] |
(14914) 0xa0284c STR D3, [X15, X2] |
(14914) 0xa02850 STR D1, [X12, X2] |
(14914) 0xa02854 LDR D29, [X6, X2] |
(14914) 0xa02858 LDR D21, [X9, X2] |
(14914) 0xa0285c LDR D30, [X7, X2] |
(14914) 0xa02860 FMUL D25, D27, D29 |
(14914) 0xa02864 FMUL D24, D23, D21 |
(14914) 0xa02868 FMUL D0, D26, D21 |
(14914) 0xa0286c FMUL D27, D27, D30 |
(14914) 0xa02870 FSUB D25, D30, S25 |
(14914) 0xa02874 FMUL D30, D26, D30 |
(14914) 0xa02878 FSUB D24, D29, S24 |
(14914) 0xa0287c FMUL D29, D23, D29 |
(14914) 0xa02880 FSUB D30, D21, S30 |
(14914) 0xa02884 FADD D0, D25, D0 |
(14914) 0xa02888 FADD D27, D24, D27 |
(14914) 0xa0288c FADD D29, D30, D29 |
(14914) 0xa02890 STR D0, [X7, X2] |
(14914) 0xa02894 STR D27, [X6, X2] |
(14914) 0xa02898 STR D29, [X9, X2] |
(14914) 0xa0289c ADD X2, X2, #8 |
(14914) 0xa028a0 CMP X8, X2 |
(14914) 0xa028a4 B.NE a027ac |
(14913) 0xa028a8 RET |
(14915) 0xa028ac LDR D7, [X13, X2] |
(14915) 0xa028b0 LDR D26, [X5, X2] |
(14915) 0xa028b4 LDR D27, [X14, X2] |
(14915) 0xa028b8 LDR D22, [X3, X2] |
(14915) 0xa028bc LDR D16, [X0, X2] |
(14915) 0xa028c0 FMUL D20, D26, D7 |
(14915) 0xa028c4 LDR D25, [X4, X2] |
(14915) 0xa028c8 FMUL D18, D22, D27 |
(14915) 0xa028cc FMUL D17, D26, D16 |
(14915) 0xa028d0 FSUB D20, D16, S20 |
(14915) 0xa028d4 FMUL D16, D25, D16 |
(14915) 0xa028d8 FMUL D19, D25, D27 |
(14915) 0xa028dc FSUB D18, D7, S18 |
(14915) 0xa028e0 FMUL D7, D22, D7 |
(14915) 0xa028e4 FSUB D16, D27, S16 |
(14915) 0xa028e8 FADD D19, D20, D19 |
(14915) 0xa028ec FADD D17, D18, D17 |
(14915) 0xa028f0 FADD D7, D16, D7 |
(14915) 0xa028f4 STR D19, [X0, X2] |
(14915) 0xa028f8 STR D17, [X13, X2] |
(14915) 0xa028fc STR D7, [X14, X2] |
(14915) 0xa02900 LDR D1, [X12, X2] |
(14915) 0xa02904 LDR D21, [X11, X2] |
(14915) 0xa02908 LDR D2, [X9, X2] |
(14915) 0xa0290c FMUL D6, D26, D1 |
(14915) 0xa02910 FMUL D4, D22, D21 |
(14915) 0xa02914 FMUL D5, D25, D21 |
(14915) 0xa02918 FMUL D3, D26, D2 |
(14915) 0xa0291c FSUB D6, D2, S6 |
(14915) 0xa02920 FMUL D2, D25, D2 |
(14915) 0xa02924 FSUB D4, D1, S4 |
(14915) 0xa02928 FMUL D1, D22, D1 |
(14915) 0xa0292c FSUB D2, D21, S2 |
(14915) 0xa02930 FADD D5, D6, D5 |
(14915) 0xa02934 FADD D3, D4, D3 |
(14915) 0xa02938 FADD D1, D2, D1 |
(14915) 0xa0293c STR D5, [X9, X2] |
(14915) 0xa02940 STR D3, [X12, X2] |
(14915) 0xa02944 STR D1, [X11, X2] |
(14915) 0xa02948 LDR D28, [X6, X2] |
(14915) 0xa0294c LDR D30, [X7, X2] |
(14915) 0xa02950 LDR D29, [X10, X2] |
(14915) 0xa02954 FMUL D24, D26, D28 |
(14915) 0xa02958 FMUL D23, D22, D30 |
(14915) 0xa0295c FMUL D0, D25, D30 |
(14915) 0xa02960 FMUL D26, D26, D29 |
(14915) 0xa02964 FSUB D24, D29, S24 |
(14915) 0xa02968 FMUL D29, D25, D29 |
(14915) 0xa0296c FSUB D23, D28, S23 |
(14915) 0xa02970 FMUL D28, D22, D28 |
(14915) 0xa02974 FSUB D29, D30, S29 |
(14915) 0xa02978 FADD D0, D24, D0 |
(14915) 0xa0297c FADD D26, D23, D26 |
(14915) 0xa02980 FADD D28, D29, D28 |
(14915) 0xa02984 STR D0, [X10, X2] |
(14915) 0xa02988 STR D26, [X6, X2] |
(14915) 0xa0298c STR D28, [X7, X2] |
(14915) 0xa02990 B a02718 |
0xa02994 HINT #0 |
0xa02998 HINT #0 |
0xa0299c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.59+ | forint | forint.F:390 | engine_linuxa64_gf_ompi |
| ○ | resol_._omp_fn.15 | resol.F:4587 | engine_linuxa64_gf_ompi |
| ○ | gomp_thread_start | gomp_thread_start | libgomp.so.1.0.0 |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►49.41+ | forint | forint.F:390 | engine_linuxa64_gf_ompi |
| ○ | resol_._omp_fn.15 | resol.F:4587 | engine_linuxa64_gf_ompi |
| ○ | GOMP_parallel | libgomp.h:980 | libgomp.so.1.0.0 |
| ○ | resol | resol.F:4537 | engine_linuxa64_gf_ompi |
| ○ | resol_head | resol_head.F:284 | engine_linuxa64_gf_ompi |
| ○ | radioss2 | radioss2.F:2178 | engine_linuxa64_gf_ompi |
| ○ | radioss0 | radioss0.F:95 | engine_linuxa64_gf_ompi |
| ○ | main | radioss.F:38 | engine_linuxa64_gf_ompi |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | engine_linuxa64_gf_ompi |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run mpi_48_ranks_omp_2_threads
| Source file and lines | s4mall3.F:29-116 |
| Module | engine_linuxa64_gf_ompi |
| nb instructions | 46 |
| loop length | 184 |
| nb stack references | 0 |
| front end | 5.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.38 |
| Overall L1 | 5.67 |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X6, X8, #0, #31 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| BIC X6, X6, X6,ASR #63 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ORN X16, XZR, X6 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| CMP W2, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ a02728 <s4malla3_+0xc8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GT a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LE a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LE a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X13, X16, W6,UXTW #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (50.0%) |
| ADD X9, X13, X6,LSL #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| MOVZ W7, #9 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X14, X13, X6 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| FMOV D31, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| ADD X14, X14, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X13, X13, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X9, X6,LSL #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X12, X6, X9 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X9, X9, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| UMADDL X7, W6, W7, X16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| ADD X12, X12, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| MOVZ X2, #0 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X6, X11 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X11, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X12, X0, X12,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X6, X10 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X10, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X0, X11,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X6, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X7, X7, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X0, X10,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X0, X6,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| UBFM X8, X8, #61, #31 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X9, X0, X9,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X14, X0, X14,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X13, X0, X13,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X7, X0, X7,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| HINT #0 | N/A | ||||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run mpi_48_ranks_omp_2_threads
| Source file and lines | s4mall3.F:29-116 |
| Module | engine_linuxa64_gf_ompi |
| nb instructions | 46 |
| loop length | 184 |
| nb stack references | 0 |
| front end | 5.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 5.67 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.38 |
| Overall L1 | 5.67 |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| LDR W2, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X6, X8, #0, #31 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| BIC X6, X6, X6,ASR #63 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ORN X16, XZR, X6 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| CMP W2, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.EQ a02728 <s4malla3_+0xc8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GT a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LE a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.LE a02724 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X13, X16, W6,UXTW #1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (50.0%) |
| ADD X9, X13, X6,LSL #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| MOVZ W7, #9 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X14, X13, X6 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| FMOV D31, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| ADD X14, X14, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X13, X13, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X9, X6,LSL #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X12, X6, X9 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X9, X9, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| UMADDL X7, W6, W7, X16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| ADD X12, X12, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| MOVZ X2, #0 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X6, X11 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X11, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X12, X0, X12,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X6, X10 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X10, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X11, X0, X11,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X6, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X7, X7, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD X10, X0, X10,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X6, X0, X6,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| UBFM X8, X8, #61, #31 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X9, X0, X9,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X14, X0, X14,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X13, X0, X13,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (50.0%) |
| ADD X7, X0, X7,LSL #3 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| HINT #0 | N/A | ||||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼s4malla3– | 0.01 | 0.79 |
| ▼Loop 14913 - s4mall3.F:61-116 - engine_linuxa64_gf_ompi– | 0.00 | 0.00 |
| ▼Loop 14915 - s4mall3.F:87-109 - engine_linuxa64_gf_ompi– | 0.01 | 8.09 |
| ○Loop 14916 - s4mall3.F:87-88 - engine_linuxa64_gf_ompi | 0.00 | 0.33 |
| ○Loop 14914 - s4mall3.F:61-83 - engine_linuxa64_gf_ompi | 0.00 | 0.00 |
