| Function: s4malla3 | Module: engine_linuxa64_gf_ompi | Source: s4mall3.F:29-116 [...] | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
| Function: s4malla3 | Module: engine_linuxa64_gf_ompi | Source: s4mall3.F:29-116 [...] | Coverage (incl. loops): 0.01% | (excl. loops): 0.00% |
|---|
/home/eoseret/Applications/OpenRadioss/engine/source/elements/solid/solide4/s4mall3.F: 29 - 116 |
-------------------------------------------------------------------------------- |
29: 1 SAV, OFFG, OFF, WXX, |
[...] |
60: IF(ISMSTR==11)THEN |
61: DO I=1,NEL |
62: C----------------------------------------------- |
63: IF(OFFG(I)==ZERO) CYCLE |
64: X=SAV(I,1) |
65: Y=SAV(I,4) |
66: Z=SAV(I,7) |
67: SAV(I,1) = X - Y*WZZ(I) + Z*WYY(I) |
68: SAV(I,4) = Y - Z*WXX(I) + X*WZZ(I) |
69: SAV(I,7) = Z - X*WYY(I) + Y*WXX(I) |
70: C |
71: X=SAV(I,2) |
72: Y=SAV(I,5) |
73: Z=SAV(I,8) |
74: SAV(I,2) = X - Y*WZZ(I) + Z*WYY(I) |
75: SAV(I,5) = Y - Z*WXX(I) + X*WZZ(I) |
76: SAV(I,8) = Z - X*WYY(I) + Y*WXX(I) |
77: C |
78: X=SAV(I,3) |
79: Y=SAV(I,6) |
80: Z=SAV(I,9) |
81: SAV(I,3) = X - Y*WZZ(I) + Z*WYY(I) |
82: SAV(I,6) = Y - Z*WXX(I) + X*WZZ(I) |
83: SAV(I,9) = Z - X*WYY(I) + Y*WXX(I) |
84: C |
85: ENDDO |
86: ELSEIF(ISMSTR<=4.AND.JLAG>0)THEN |
87: DO I=1,NEL |
88: IF(OFFG(I)>ONE)THEN |
89: C----------------------------------------------- |
90: X=SAV(I,1) |
91: Y=SAV(I,2) |
92: Z=SAV(I,3) |
93: SAV(I,1) = X - Y*WZZ(I) + Z*WYY(I) |
94: SAV(I,2) = Y - Z*WXX(I) + X*WZZ(I) |
95: SAV(I,3) = Z - X*WYY(I) + Y*WXX(I) |
96: C |
97: X=SAV(I,4) |
98: Y=SAV(I,5) |
99: Z=SAV(I,6) |
100: SAV(I,4) = X - Y*WZZ(I) + Z*WYY(I) |
101: SAV(I,5) = Y - Z*WXX(I) + X*WZZ(I) |
102: SAV(I,6) = Z - X*WYY(I) + Y*WXX(I) |
103: C |
104: X=SAV(I,7) |
105: Y=SAV(I,8) |
106: Z=SAV(I,9) |
107: SAV(I,7) = X - Y*WZZ(I) + Z*WYY(I) |
108: SAV(I,8) = Y - Z*WXX(I) + X*WZZ(I) |
109: SAV(I,9) = Z - X*WYY(I) + Y*WXX(I) |
[...] |
116: END |
0x8cac20 LDR W8, [X6] |
0x8cac24 LDR W2, [X7] |
0x8cac28 SBFM X6, X8, #0, #31 |
0x8cac2c BIC X6, X6, X6,ASR #63 |
0x8cac30 ORN X16, XZR, X6 |
0x8cac34 CMP W2, #11 |
0x8cac38 B.EQ 8cace8 |
0x8cac3c CMP W2, #4 |
0x8cac40 B.GT 8cace4 |
0x8cac44 LDR X2, [SP] |
0x8cac48 LDR W2, [X2] |
0x8cac4c CMP W2, #0 |
0x8cac50 B.LE 8cace4 |
0x8cac54 CMP W8, #0 |
0x8cac58 B.LE 8cace4 |
0x8cac5c ADD X9, X16, W6,UXTW #1 |
0x8cac60 ADD X10, X9, X6,LSL #1 |
0x8cac64 MOVZ W7, #9 |
0x8cac68 ADD X13, X9, X6 |
0x8cac6c FMOV D31, #1.0000000 |
0x8cac70 ADD X9, X9, #1 |
0x8cac74 ADD X12, X10, X6,LSL #1 |
0x8cac78 ADD X14, X6, X10 |
0x8cac7c ADD X13, X13, #1 |
0x8cac80 UMADDL X7, W6, W7, X16 |
0x8cac84 ADD X10, X10, #1 |
0x8cac88 ADD X14, X14, #1 |
0x8cac8c ADD X11, X6, X12 |
0x8cac90 ADD X12, X12, #1 |
0x8cac94 ADD X14, X0, X14,LSL #3 |
0x8cac98 ADD X6, X6, X11 |
0x8cac9c ADD X11, X11, #1 |
0x8caca0 ADD X12, X0, X12,LSL #3 |
0x8caca4 ADD X6, X6, #1 |
0x8caca8 ADD X7, X7, #1 |
0x8cacac ADD X11, X0, X11,LSL #3 |
0x8cacb0 ADD X6, X0, X6,LSL #3 |
0x8cacb4 MOVZ X2, #0 |
0x8cacb8 UBFM X8, X8, #61, #31 |
0x8cacbc ADD X10, X0, X10,LSL #3 |
0x8cacc0 ADD X13, X0, X13,LSL #3 |
0x8cacc4 ADD X9, X0, X9,LSL #3 |
0x8cacc8 ADD X7, X0, X7,LSL #3 |
(14516) 0x8caccc LDR D0, [X1, X2] |
(14516) 0x8cacd0 FCMPE D0, D31 |
(14516) 0x8cacd4 B.GT 8cae6c |
(14516) 0x8cacd8 ADD X2, X2, #8 |
(14516) 0x8cacdc CMP X2, X8 |
(14516) 0x8cace0 B.NE 8caccc |
(14513) 0x8cace4 RET |
(14513) 0x8cace8 CMP W8, #0 |
(14513) 0x8cacec B.LE 8cace4 |
(14513) 0x8cacf0 ADD X14, X16, W6,UXTW #2 |
(14513) 0x8cacf4 UBFM X7, X6, #63, #31 |
(14513) 0x8cacf8 ADD X7, X7, W6,UXTW |
(14513) 0x8cacfc ADD X13, X14, X7 |
(14513) 0x8cad00 UBFM X9, X6, #62, #31 |
(14513) 0x8cad04 ADD X9, X9, W6,UXTW |
(14513) 0x8cad08 MOVZ W10, #9 |
(14513) 0x8cad0c SUB X11, X13, X9 |
(14513) 0x8cad10 ADD X12, X11, X7,LSL #1 |
(14513) 0x8cad14 ADD X15, X7, X11 |
(14513) 0x8cad18 ADD X14, X14, #1 |
(14513) 0x8cad1c UMADDL X10, W6, W10, X16 |
(14513) 0x8cad20 ADD X13, X13, #1 |
(14513) 0x8cad24 ADD X15, X15, #1 |
(14513) 0x8cad28 SUB X9, X12, X9 |
(14513) 0x8cad2c ADD X11, X11, #1 |
(14513) 0x8cad30 ADD X14, X0, X14,LSL #3 |
(14513) 0x8cad34 ADD X6, X7, X9 |
(14513) 0x8cad38 ADD X12, X12, #1 |
(14513) 0x8cad3c ADD X13, X0, X13,LSL #3 |
(14513) 0x8cad40 ADD X7, X9, #1 |
(14513) 0x8cad44 ADD X6, X6, #1 |
(14513) 0x8cad48 ADD X12, X0, X12,LSL #3 |
(14513) 0x8cad4c ADD X9, X10, #1 |
(14513) 0x8cad50 ADD X6, X0, X6,LSL #3 |
(14513) 0x8cad54 MOVZ X2, #0 |
(14513) 0x8cad58 ADD X7, X0, X7,LSL #3 |
(14513) 0x8cad5c UBFM X8, X8, #61, #31 |
(14513) 0x8cad60 ADD X15, X0, X15,LSL #3 |
(14513) 0x8cad64 ADD X10, X0, X11,LSL #3 |
(14513) 0x8cad68 ADD X9, X0, X9,LSL #3 |
(14514) 0x8cad6c LDR D31, [X1, X2] |
(14514) 0x8cad70 FCMP D31, #0 |
(14514) 0x8cad74 B.EQ 8cae5c |
(14514) 0x8cad78 LDR D7, [X14, X2] |
(14514) 0x8cad7c LDR D27, [X5, X2] |
(14514) 0x8cad80 LDR D28, [X13, X2] |
(14514) 0x8cad84 LDR D23, [X3, X2] |
(14514) 0x8cad88 LDR D16, [X0, X2] |
(14514) 0x8cad8c FMUL D20, D7, D27 |
(14514) 0x8cad90 LDR D26, [X4, X2] |
(14514) 0x8cad94 FMUL D18, D28, D23 |
(14514) 0x8cad98 FMUL D17, D16, D27 |
(14514) 0x8cad9c FSUB D20, D16, S20 |
(14514) 0x8cada0 FMUL D16, D16, D26 |
(14514) 0x8cada4 FMUL D19, D28, D26 |
(14514) 0x8cada8 FSUB D18, D7, S18 |
(14514) 0x8cadac FMUL D7, D7, D23 |
(14514) 0x8cadb0 FSUB D16, D28, S16 |
(14514) 0x8cadb4 FADD D19, D20, D19 |
(14514) 0x8cadb8 FADD D17, D18, D17 |
(14514) 0x8cadbc FADD D7, D16, D7 |
(14514) 0x8cadc0 STR D19, [X0, X2] |
(14514) 0x8cadc4 STR D17, [X14, X2] |
(14514) 0x8cadc8 STR D7, [X13, X2] |
(14514) 0x8cadcc LDR D1, [X15, X2] |
(14514) 0x8cadd0 LDR D22, [X12, X2] |
(14514) 0x8cadd4 LDR D2, [X10, X2] |
(14514) 0x8cadd8 FMUL D6, D1, D27 |
(14514) 0x8caddc FMUL D4, D23, D22 |
(14514) 0x8cade0 FMUL D5, D22, D26 |
(14514) 0x8cade4 FMUL D3, D2, D27 |
(14514) 0x8cade8 FSUB D6, D2, S6 |
(14514) 0x8cadec FMUL D2, D2, D26 |
(14514) 0x8cadf0 FSUB D4, D1, S4 |
(14514) 0x8cadf4 FMUL D1, D23, D1 |
(14514) 0x8cadf8 FSUB D2, D22, S2 |
(14514) 0x8cadfc FADD D5, D6, D5 |
(14514) 0x8cae00 FADD D3, D4, D3 |
(14514) 0x8cae04 FADD D1, D2, D1 |
(14514) 0x8cae08 STR D5, [X10, X2] |
(14514) 0x8cae0c STR D3, [X15, X2] |
(14514) 0x8cae10 STR D1, [X12, X2] |
(14514) 0x8cae14 LDR D29, [X6, X2] |
(14514) 0x8cae18 LDR D21, [X9, X2] |
(14514) 0x8cae1c LDR D30, [X7, X2] |
(14514) 0x8cae20 FMUL D25, D27, D29 |
(14514) 0x8cae24 FMUL D24, D23, D21 |
(14514) 0x8cae28 FMUL D0, D26, D21 |
(14514) 0x8cae2c FMUL D27, D27, D30 |
(14514) 0x8cae30 FSUB D25, D30, S25 |
(14514) 0x8cae34 FMUL D30, D26, D30 |
(14514) 0x8cae38 FSUB D24, D29, S24 |
(14514) 0x8cae3c FMUL D29, D23, D29 |
(14514) 0x8cae40 FSUB D30, D21, S30 |
(14514) 0x8cae44 FADD D0, D25, D0 |
(14514) 0x8cae48 FADD D27, D24, D27 |
(14514) 0x8cae4c FADD D29, D30, D29 |
(14514) 0x8cae50 STR D0, [X7, X2] |
(14514) 0x8cae54 STR D27, [X6, X2] |
(14514) 0x8cae58 STR D29, [X9, X2] |
(14514) 0x8cae5c ADD X2, X2, #8 |
(14514) 0x8cae60 CMP X2, X8 |
(14514) 0x8cae64 B.NE 8cad6c |
(14513) 0x8cae68 RET |
(14515) 0x8cae6c LDR D7, [X9, X2] |
(14515) 0x8cae70 LDR D26, [X5, X2] |
(14515) 0x8cae74 LDR D27, [X13, X2] |
(14515) 0x8cae78 LDR D22, [X3, X2] |
(14515) 0x8cae7c LDR D16, [X0, X2] |
(14515) 0x8cae80 FMUL D20, D7, D26 |
(14515) 0x8cae84 LDR D25, [X4, X2] |
(14515) 0x8cae88 FMUL D18, D27, D22 |
(14515) 0x8cae8c FMUL D17, D16, D26 |
(14515) 0x8cae90 FSUB D20, D16, S20 |
(14515) 0x8cae94 FMUL D16, D16, D25 |
(14515) 0x8cae98 FMUL D19, D27, D25 |
(14515) 0x8cae9c FSUB D18, D7, S18 |
(14515) 0x8caea0 FMUL D7, D7, D22 |
(14515) 0x8caea4 FSUB D16, D27, S16 |
(14515) 0x8caea8 FADD D19, D20, D19 |
(14515) 0x8caeac FADD D17, D18, D17 |
(14515) 0x8caeb0 FADD D7, D16, D7 |
(14515) 0x8caeb4 STR D19, [X0, X2] |
(14515) 0x8caeb8 STR D17, [X9, X2] |
(14515) 0x8caebc STR D7, [X13, X2] |
(14515) 0x8caec0 LDR D1, [X14, X2] |
(14515) 0x8caec4 LDR D21, [X12, X2] |
(14515) 0x8caec8 LDR D2, [X10, X2] |
(14515) 0x8caecc FMUL D6, D26, D1 |
(14515) 0x8caed0 FMUL D4, D22, D21 |
(14515) 0x8caed4 FMUL D5, D25, D21 |
(14515) 0x8caed8 FMUL D3, D26, D2 |
(14515) 0x8caedc FSUB D6, D2, S6 |
(14515) 0x8caee0 FMUL D2, D25, D2 |
(14515) 0x8caee4 FSUB D4, D1, S4 |
(14515) 0x8caee8 FMUL D1, D22, D1 |
(14515) 0x8caeec FSUB D2, D21, S2 |
(14515) 0x8caef0 FADD D5, D6, D5 |
(14515) 0x8caef4 FADD D3, D4, D3 |
(14515) 0x8caef8 FADD D1, D2, D1 |
(14515) 0x8caefc STR D5, [X10, X2] |
(14515) 0x8caf00 STR D3, [X14, X2] |
(14515) 0x8caf04 STR D1, [X12, X2] |
(14515) 0x8caf08 LDR D28, [X6, X2] |
(14515) 0x8caf0c LDR D30, [X7, X2] |
(14515) 0x8caf10 LDR D29, [X11, X2] |
(14515) 0x8caf14 FMUL D24, D26, D28 |
(14515) 0x8caf18 FMUL D23, D22, D30 |
(14515) 0x8caf1c FMUL D0, D25, D30 |
(14515) 0x8caf20 FMUL D26, D26, D29 |
(14515) 0x8caf24 FSUB D24, D29, S24 |
(14515) 0x8caf28 FMUL D29, D25, D29 |
(14515) 0x8caf2c FSUB D23, D28, S23 |
(14515) 0x8caf30 FMUL D28, D22, D28 |
(14515) 0x8caf34 FSUB D29, D30, S29 |
(14515) 0x8caf38 FADD D0, D24, D0 |
(14515) 0x8caf3c FADD D26, D23, D26 |
(14515) 0x8caf40 FADD D28, D29, D28 |
(14515) 0x8caf44 STR D0, [X11, X2] |
(14515) 0x8caf48 STR D26, [X6, X2] |
(14515) 0x8caf4c STR D28, [X7, X2] |
(14515) 0x8caf50 B 8cacd8 |
0x8caf54 HINT #0 |
0x8caf58 HINT #0 |
0x8caf5c HINT #0 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►50.09+ | forint | forint.F:390 | engine_linuxa64_gf_ompi |
| ○ | resol_._omp_fn.15 | resol.F:4587 | engine_linuxa64_gf_ompi |
| ○ | GOMP_parallel | libgomp.h:980 | libgomp.so.1.0.0 |
| ○ | resol | resol.F:4537 | engine_linuxa64_gf_ompi |
| ○ | resol_head | resol_head.F:284 | engine_linuxa64_gf_ompi |
| ○ | radioss2 | radioss2.F:2178 | engine_linuxa64_gf_ompi |
| ○ | radioss0 | radioss0.F:95 | engine_linuxa64_gf_ompi |
| ○ | main | radioss.F:38 | engine_linuxa64_gf_ompi |
| ○ | __libc_start_main | libc-2.31.so | |
| ○ | _start | engine_linuxa64_gf_ompi | |
| ►49.91+ | forint | forint.F:390 | engine_linuxa64_gf_ompi |
| ○ | resol_._omp_fn.15 | resol.F:4587 | engine_linuxa64_gf_ompi |
| ○ | gomp_thread_start | gomp_thread_start | libgomp.so.1.0.0 |
| ○ | start_thread | start_thread | libpthread-2.31.so |
| ○ | __clone | __clone | libc-2.31.so |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run mpi_48_ranks_omp_2_threads
| Source file and lines | s4mall3.F:29-116 |
| Module | engine_linuxa64_gf_ompi |
| nb instructions | 46 |
| loop length | 184 |
| nb stack references | 0 |
| front end | 5.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.38 |
| Overall L1 | 8.50 |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W2, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X6, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BIC X6, X6, X6,ASR #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORN X16, XZR, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CMP W2, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.EQ 8cace8 <s4malla3_+0xc8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GT 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X9, X16, W6,UXTW #1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| ADD X10, X9, X6,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W7, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X13, X9, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| FMOV D31, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X9, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X10, X6,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X6, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X13, X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| UMADDL X7, W6, W7, X16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| ADD X10, X10, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X14, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X6, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X0, X14,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X11, X11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X0, X12,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X7, X7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X11, X0, X11,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X0, X6,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ X2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| UBFM X8, X8, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X10, X0, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X13, X0, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X9, X0, X9,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X7, X0, X7,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run mpi_48_ranks_omp_2_threads
| Source file and lines | s4mall3.F:29-116 |
| Module | engine_linuxa64_gf_ompi |
| nb instructions | 46 |
| loop length | 184 |
| nb stack references | 0 |
| front end | 5.38 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.00 | 2.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| cycles | 2.00 | 2.00 | 8.50 | 8.50 | 8.50 | 8.50 | 0.25 | 0.25 | 0.25 | 0.25 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 5.38 |
| Overall L1 | 8.50 |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR W8, [X6] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (12.5%) |
| LDR W2, [X7] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| SBFM X6, X8, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BIC X6, X6, X6,ASR #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORN X16, XZR, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| CMP W2, #11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.EQ 8cace8 <s4malla3_+0xc8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W2, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.GT 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X2, [SP] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR W2, [X2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| CMP W2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| CMP W8, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| B.LE 8cace4 <s4malla3_+0xc4> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| ADD X9, X16, W6,UXTW #1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| ADD X10, X9, X6,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| MOVZ W7, #9 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X13, X9, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| FMOV D31, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ADD X9, X9, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X10, X6,LSL #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X6, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X13, X13, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| UMADDL X7, W6, W7, X16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | N/A |
| ADD X10, X10, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X14, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X11, X6, X12 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X12, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X14, X0, X14,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, X11 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X11, X11, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X12, X0, X12,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X7, X7, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X11, X0, X11,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X6, X0, X6,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| MOVZ X2, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| UBFM X8, X8, #61, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X10, X0, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X13, X0, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X9, X0, X9,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ADD X7, X0, X7,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A | ||||||||||||||||||
| HINT #0 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼s4malla3– | 0.01 | 1.41 |
| ▼Loop 14513 - s4mall3.F:61-116 - engine_linuxa64_gf_ompi– | 0.00 | 0.01 |
| ▼Loop 14515 - s4mall3.F:87-109 - engine_linuxa64_gf_ompi– | 0.01 | 12.75 |
| ○Loop 14516 - s4mall3.F:87-88 - engine_linuxa64_gf_ompi | 0.00 | 0.64 |
| ○Loop 14514 - s4mall3.F:61-83 - engine_linuxa64_gf_ompi | 0.00 | 0.00 |
