| Loop Id: 69 | Module: attention-armclang-native | Source: attention_v2.cpp:164-167 [...] | Coverage: 0.43% |
|---|
| Loop Id: 69 | Module: attention-armclang-native | Source: attention_v2.cpp:164-167 [...] | Coverage: 0.43% |
|---|
0x11c40 MOVI D0, #0 |
0x11c44 FMOV S1, #1.0000000 |
0x11c48 ORR X8, XZR, X17 |
0x11c4c B 11ce8 |
(76) 0x11c60 LDR X9, [SP, #5664] |
(76) 0x11c64 LDR X10, [SP, #680] |
(76) 0x11c68 LDR X11, [SP, #3848] |
(76) 0x11c6c ORR X21, XZR, XZR |
(76) 0x11c70 AND X9, X9, #0x0 |
(76) 0x11c74 AND X12, X10, #0x0 |
(76) 0x11c78 SBFM X10, X10, #0, #0 |
(76) 0x11c7c ORR X9, X12, X9 |
(76) 0x11c80 AND X10, X10, X27 |
(76) 0x11c84 EOR X9, X11, X9,LSR #1 |
(76) 0x11c88 EOR X9, X9, X10 |
(76) 0x11c8c STR X9, [SP, #5664] |
(76) 0x11c90 ORR X9, XZR, X21 |
(76) 0x11c94 ADD X21, X21, #1 |
(76) 0x11c98 ADD X10, SP, #680 |
(76) 0x11c9c SUBS X8, X8, #1 |
(76) 0x11ca0 STR X21, [SP, #5672] |
(76) 0x11ca4 LDR X9, [X10, X9,LSL #3] |
(76) 0x11ca8 UBFM X10, X9, #11, #42 |
(76) 0x11cac EOR X9, X10, X9 |
(76) 0x11cb0 MOVZ W10, #22144 |
(76) 0x11cb4 MOVK W10, #40236 |
(76) 0x11cb8 AND X10, X10, X9,LSL #7 |
(76) 0x11cbc EOR X9, X10, X9 |
(76) 0x11cc0 MOVZ W10, #61382 |
(76) 0x11cc4 AND X10, X10, X9,LSL #15 |
(76) 0x11cc8 EOR X9, X10, X9 |
(76) 0x11ccc EOR X9, X9, X9,LSR #18 |
(76) 0x11cd0 UCVTF S2, X9 |
(76) 0x11cd4 MOVZ W9, #20352 |
(76) 0x11cd8 FMADD S0, S2, S1, S0 |
(76) 0x11cdc FMOV S2, W9 |
(76) 0x11ce0 FMUL S1, S1, S2 |
(76) 0x11ce4 B.EQ 11ee4 |
(76) 0x11ce8 CMP X21, #624 |
(76) 0x11cec B.CC 11c90 |
(76) 0x11cf0 ADD X15, SP, #680 |
(76) 0x11cf4 DUPM Z22.D, #0x80000000 |
(76) 0x11cf8 DUPM Z23.D, #0x7ffffffe |
(76) 0x11cfc DUP Z24.D, #1 |
(76) 0x11d00 ORR X9, XZR, XZR |
(76) 0x11d04 LD1R {V2.2D}, [X15] |
(77) 0x11d08 ADD X10, X15, X9 |
(77) 0x11d0c ADD X9, X9, #64 |
(77) 0x11d10 LDUR Q3, [X10, #8] |
(77) 0x11d14 LDUR Q4, [X10, #24] |
(77) 0x11d18 LDUR Q5, [X10, #40] |
(77) 0x11d1c ADD X12, X10, #3192 |
(77) 0x11d20 ADD X11, X10, #3176 |
(77) 0x11d24 ADD X13, X10, #3208 |
(77) 0x11d28 ADD X14, X10, #3224 |
(77) 0x11d2c EXT V6.16B, V2.16B, V3.16B, #8 |
(77) 0x11d30 LDUR Q2, [X10, #56] |
(77) 0x11d34 EXT V7.16B, V3.16B, V4.16B, #8 |
(77) 0x11d38 AND V19.16B, V4.16B, V23.16B |
(77) 0x11d3c EXT V16.16B, V4.16B, V5.16B, #8 |
(77) 0x11d40 AND V18.16B, V3.16B, V23.16B |
(77) 0x11d44 AND V20.16B, V5.16B, V23.16B |
(77) 0x11d48 AND V3.16B, V3.16B, V24.16B |
(77) 0x11d4c AND V4.16B, V4.16B, V24.16B |
(77) 0x11d50 AND V7.16B, V7.16B, V22.16B |
(77) 0x11d54 AND V6.16B, V6.16B, V22.16B |
(77) 0x11d58 AND V16.16B, V16.16B, V22.16B |
(77) 0x11d5c CMEQ V3.2D, V3.2D, #0 |
(77) 0x11d60 CMEQ V4.2D, V4.2D, #0 |
(77) 0x11d64 ORR V7.16B, V19.16B, V7.16B |
(77) 0x11d68 LDR Q19, [X12] |
(77) 0x11d6c ORR V6.16B, V18.16B, V6.16B |
(77) 0x11d70 LDR Q18, [X11] |
(77) 0x11d74 ORR V16.16B, V20.16B, V16.16B |
(77) 0x11d78 LDR Q20, [X13] |
(77) 0x11d7c EXT V17.16B, V5.16B, V2.16B, #8 |
(77) 0x11d80 AND V21.16B, V2.16B, V23.16B |
(77) 0x11d84 USHR V7.2D, V7.2D, #1 |
(77) 0x11d88 USHR V6.2D, V6.2D, #1 |
(77) 0x11d8c USHR V16.2D, V16.2D, #1 |
(77) 0x11d90 AND V5.16B, V5.16B, V24.16B |
(77) 0x11d94 AND V17.16B, V17.16B, V22.16B |
(77) 0x11d98 CMEQ V5.2D, V5.2D, #0 |
(77) 0x11d9c ORR V17.16B, V21.16B, V17.16B |
(77) 0x11da0 LDR Q21, [X14] |
(77) 0x11da4 EOR V7.16B, V7.16B, V19.16B |
(77) 0x11da8 DUP V19.2D, X27 |
(77) 0x11dac EOR V6.16B, V6.16B, V18.16B |
(77) 0x11db0 AND V18.16B, V2.16B, V24.16B |
(77) 0x11db4 EOR V16.16B, V16.16B, V20.16B |
(77) 0x11db8 USHR V17.2D, V17.2D, #1 |
(77) 0x11dbc BCAX V3.16B, V6.16B, V19.16B, V3.16B |
(77) 0x11dc0 CMEQ V6.2D, V18.2D, #0 |
(77) 0x11dc4 BCAX V4.16B, V7.16B, V19.16B, V4.16B |
(77) 0x11dc8 BCAX V5.16B, V16.16B, V19.16B, V5.16B |
(77) 0x11dcc STP Q3, Q4, [X10] |
(77) 0x11dd0 EOR V17.16B, V17.16B, V21.16B |
(77) 0x11dd4 BCAX V6.16B, V17.16B, V19.16B, V6.16B |
(77) 0x11dd8 STP Q5, Q6, [X10, #32] |
(77) 0x11ddc CMP X9, #1792 |
(77) 0x11de0 B.NE 11d08 |
(76) 0x11de4 LDR X11, [SP, #2480] |
(76) 0x11de8 MOV X10, V2.D[1] |
(76) 0x11dec ORR X9, XZR, XZR |
(76) 0x11df0 AND X10, X10, #0x0 |
(76) 0x11df4 AND X12, X11, #0x0 |
(76) 0x11df8 ORR X10, X12, X10 |
(76) 0x11dfc LDR X12, [SP, #5648] |
(76) 0x11e00 EOR X10, X12, X10,LSR #1 |
(76) 0x11e04 SBFM X12, X11, #0, #0 |
(76) 0x11e08 AND X12, X12, X27 |
(76) 0x11e0c EOR X10, X10, X12 |
(76) 0x11e10 STR X10, [SP, #2472] |
(76) 0x11e14 AND X10, X11, #0x0 |
(76) 0x11e18 LDR X11, [SP, #2488] |
(76) 0x11e1c AND X12, X11, #0x0 |
(76) 0x11e20 ORR X10, X12, X10 |
(76) 0x11e24 LDR X12, [SP, #5656] |
(76) 0x11e28 EOR X10, X12, X10,LSR #1 |
(76) 0x11e2c SBFM X12, X11, #0, #0 |
(76) 0x11e30 AND X12, X12, X27 |
(76) 0x11e34 EOR X10, X10, X12 |
(76) 0x11e38 STR X10, [SP, #2480] |
(76) 0x11e3c AND X10, X11, #0x0 |
(76) 0x11e40 LDR X11, [SP, #2496] |
(76) 0x11e44 AND X12, X11, #0x0 |
(76) 0x11e48 DUP V2.2D, X11 |
(76) 0x11e4c ORR X10, X12, X10 |
(76) 0x11e50 LDR X12, [SP, #5664] |
(76) 0x11e54 EOR X10, X12, X10,LSR #1 |
(76) 0x11e58 SBFM X12, X11, #0, #0 |
(76) 0x11e5c AND X12, X12, X27 |
(76) 0x11e60 EOR X10, X10, X12 |
(76) 0x11e64 STR X10, [SP, #2488] |
(78) 0x11e68 ADD X10, X15, X9 |
(78) 0x11e6c DUP V6.2D, X27 |
(78) 0x11e70 ADD X9, X9, #32 |
(78) 0x11e74 LDR Q3, [X10, #1824] |
(78) 0x11e78 ADD X11, X10, #1816 |
(78) 0x11e7c EXT V2.16B, V2.16B, V3.16B, #8 |
(78) 0x11e80 AND V4.16B, V3.16B, V23.16B |
(78) 0x11e84 AND V2.16B, V2.16B, V22.16B |
(78) 0x11e88 ORR V2.16B, V4.16B, V2.16B |
(78) 0x11e8c LDP Q4, Q5, [X10] |
(78) 0x11e90 USHR V2.2D, V2.2D, #1 |
(78) 0x11e94 EOR V2.16B, V2.16B, V4.16B |
(78) 0x11e98 AND V4.16B, V3.16B, V24.16B |
(78) 0x11e9c CMEQ V4.2D, V4.2D, #0 |
(78) 0x11ea0 BCAX V2.16B, V2.16B, V6.16B, V4.16B |
(78) 0x11ea4 STR Q2, [X11] |
(78) 0x11ea8 LDR Q2, [X10, #1840] |
(78) 0x11eac ADD X10, X10, #1832 |
(78) 0x11eb0 EXT V3.16B, V3.16B, V2.16B, #8 |
(78) 0x11eb4 AND V4.16B, V2.16B, V23.16B |
(78) 0x11eb8 AND V3.16B, V3.16B, V22.16B |
(78) 0x11ebc ORR V3.16B, V4.16B, V3.16B |
(78) 0x11ec0 AND V4.16B, V2.16B, V24.16B |
(78) 0x11ec4 USHR V3.2D, V3.2D, #1 |
(78) 0x11ec8 CMEQ V4.2D, V4.2D, #0 |
(78) 0x11ecc EOR V3.16B, V3.16B, V5.16B |
(78) 0x11ed0 BCAX V3.16B, V3.16B, V6.16B, V4.16B |
(78) 0x11ed4 STR Q3, [X10] |
(78) 0x11ed8 CMP X9, #3168 |
(78) 0x11edc B.NE 11e68 |
(76) 0x11ee0 B 11c60 |
0x11ee4 FDIV S0, S0, S1 |
0x11ee8 FCMP S0, S10 |
0x11eec B.GE 12448 |
0x11ef0 STR S0, [X22, X16,LSL #2] |
0x11ef4 MOVI D0, #0 |
0x11ef8 FMOV S1, #1.0000000 |
0x11efc ORR X8, XZR, X17 |
0x11f00 B 11f8c |
(73) 0x11f04 LDR X9, [SP, #5664] |
(73) 0x11f08 LDR X10, [SP, #680] |
(73) 0x11f0c LDR X11, [SP, #3848] |
(73) 0x11f10 ORR X21, XZR, XZR |
(73) 0x11f14 AND X9, X9, #0x0 |
(73) 0x11f18 AND X12, X10, #0x0 |
(73) 0x11f1c SBFM X10, X10, #0, #0 |
(73) 0x11f20 ORR X9, X12, X9 |
(73) 0x11f24 AND X10, X10, X27 |
(73) 0x11f28 EOR X9, X11, X9,LSR #1 |
(73) 0x11f2c EOR X9, X9, X10 |
(73) 0x11f30 STR X9, [SP, #5664] |
(73) 0x11f34 ORR X9, XZR, X21 |
(73) 0x11f38 ADD X21, X21, #1 |
(73) 0x11f3c ADD X10, SP, #680 |
(73) 0x11f40 SUBS X8, X8, #1 |
(73) 0x11f44 STR X21, [SP, #5672] |
(73) 0x11f48 LDR X9, [X10, X9,LSL #3] |
(73) 0x11f4c UBFM X10, X9, #11, #42 |
(73) 0x11f50 EOR X9, X10, X9 |
(73) 0x11f54 MOVZ W10, #22144 |
(73) 0x11f58 MOVK W10, #40236 |
(73) 0x11f5c AND X10, X10, X9,LSL #7 |
(73) 0x11f60 EOR X9, X10, X9 |
(73) 0x11f64 MOVZ W10, #61382 |
(73) 0x11f68 AND X10, X10, X9,LSL #15 |
(73) 0x11f6c EOR X9, X10, X9 |
(73) 0x11f70 EOR X9, X9, X9,LSR #18 |
(73) 0x11f74 UCVTF S2, X9 |
(73) 0x11f78 MOVZ W9, #20352 |
(73) 0x11f7c FMADD S0, S2, S1, S0 |
(73) 0x11f80 FMOV S2, W9 |
(73) 0x11f84 FMUL S1, S1, S2 |
(73) 0x11f88 B.EQ 12188 |
(73) 0x11f8c CMP X21, #624 |
(73) 0x11f90 B.CC 11f34 |
(73) 0x11f94 ADD X15, SP, #680 |
(73) 0x11f98 DUPM Z22.D, #0x80000000 |
(73) 0x11f9c DUPM Z23.D, #0x7ffffffe |
(73) 0x11fa0 DUP Z24.D, #1 |
(73) 0x11fa4 ORR X9, XZR, XZR |
(73) 0x11fa8 LD1R {V2.2D}, [X15] |
(74) 0x11fac ADD X10, X15, X9 |
(74) 0x11fb0 ADD X9, X9, #64 |
(74) 0x11fb4 LDUR Q3, [X10, #8] |
(74) 0x11fb8 LDUR Q4, [X10, #24] |
(74) 0x11fbc LDUR Q5, [X10, #40] |
(74) 0x11fc0 ADD X12, X10, #3192 |
(74) 0x11fc4 ADD X11, X10, #3176 |
(74) 0x11fc8 ADD X13, X10, #3208 |
(74) 0x11fcc ADD X14, X10, #3224 |
(74) 0x11fd0 EXT V6.16B, V2.16B, V3.16B, #8 |
(74) 0x11fd4 LDUR Q2, [X10, #56] |
(74) 0x11fd8 EXT V7.16B, V3.16B, V4.16B, #8 |
(74) 0x11fdc AND V19.16B, V4.16B, V23.16B |
(74) 0x11fe0 EXT V16.16B, V4.16B, V5.16B, #8 |
(74) 0x11fe4 AND V18.16B, V3.16B, V23.16B |
(74) 0x11fe8 AND V20.16B, V5.16B, V23.16B |
(74) 0x11fec AND V3.16B, V3.16B, V24.16B |
(74) 0x11ff0 AND V4.16B, V4.16B, V24.16B |
(74) 0x11ff4 AND V7.16B, V7.16B, V22.16B |
(74) 0x11ff8 AND V6.16B, V6.16B, V22.16B |
(74) 0x11ffc AND V16.16B, V16.16B, V22.16B |
(74) 0x12000 CMEQ V3.2D, V3.2D, #0 |
(74) 0x12004 CMEQ V4.2D, V4.2D, #0 |
(74) 0x12008 ORR V7.16B, V19.16B, V7.16B |
(74) 0x1200c LDR Q19, [X12] |
(74) 0x12010 ORR V6.16B, V18.16B, V6.16B |
(74) 0x12014 LDR Q18, [X11] |
(74) 0x12018 ORR V16.16B, V20.16B, V16.16B |
(74) 0x1201c LDR Q20, [X13] |
(74) 0x12020 EXT V17.16B, V5.16B, V2.16B, #8 |
(74) 0x12024 AND V21.16B, V2.16B, V23.16B |
(74) 0x12028 USHR V7.2D, V7.2D, #1 |
(74) 0x1202c USHR V6.2D, V6.2D, #1 |
(74) 0x12030 USHR V16.2D, V16.2D, #1 |
(74) 0x12034 AND V5.16B, V5.16B, V24.16B |
(74) 0x12038 AND V17.16B, V17.16B, V22.16B |
(74) 0x1203c CMEQ V5.2D, V5.2D, #0 |
(74) 0x12040 ORR V17.16B, V21.16B, V17.16B |
(74) 0x12044 LDR Q21, [X14] |
(74) 0x12048 EOR V7.16B, V7.16B, V19.16B |
(74) 0x1204c DUP V19.2D, X27 |
(74) 0x12050 EOR V6.16B, V6.16B, V18.16B |
(74) 0x12054 AND V18.16B, V2.16B, V24.16B |
(74) 0x12058 EOR V16.16B, V16.16B, V20.16B |
(74) 0x1205c USHR V17.2D, V17.2D, #1 |
(74) 0x12060 BCAX V3.16B, V6.16B, V19.16B, V3.16B |
(74) 0x12064 CMEQ V6.2D, V18.2D, #0 |
(74) 0x12068 BCAX V4.16B, V7.16B, V19.16B, V4.16B |
(74) 0x1206c BCAX V5.16B, V16.16B, V19.16B, V5.16B |
(74) 0x12070 STP Q3, Q4, [X10] |
(74) 0x12074 EOR V17.16B, V17.16B, V21.16B |
(74) 0x12078 BCAX V6.16B, V17.16B, V19.16B, V6.16B |
(74) 0x1207c STP Q5, Q6, [X10, #32] |
(74) 0x12080 CMP X9, #1792 |
(74) 0x12084 B.NE 11fac |
(73) 0x12088 LDR X11, [SP, #2480] |
(73) 0x1208c MOV X10, V2.D[1] |
(73) 0x12090 ORR X9, XZR, XZR |
(73) 0x12094 AND X10, X10, #0x0 |
(73) 0x12098 AND X12, X11, #0x0 |
(73) 0x1209c ORR X10, X12, X10 |
(73) 0x120a0 LDR X12, [SP, #5648] |
(73) 0x120a4 EOR X10, X12, X10,LSR #1 |
(73) 0x120a8 SBFM X12, X11, #0, #0 |
(73) 0x120ac AND X12, X12, X27 |
(73) 0x120b0 EOR X10, X10, X12 |
(73) 0x120b4 STR X10, [SP, #2472] |
(73) 0x120b8 AND X10, X11, #0x0 |
(73) 0x120bc LDR X11, [SP, #2488] |
(73) 0x120c0 AND X12, X11, #0x0 |
(73) 0x120c4 ORR X10, X12, X10 |
(73) 0x120c8 LDR X12, [SP, #5656] |
(73) 0x120cc EOR X10, X12, X10,LSR #1 |
(73) 0x120d0 SBFM X12, X11, #0, #0 |
(73) 0x120d4 AND X12, X12, X27 |
(73) 0x120d8 EOR X10, X10, X12 |
(73) 0x120dc STR X10, [SP, #2480] |
(73) 0x120e0 AND X10, X11, #0x0 |
(73) 0x120e4 LDR X11, [SP, #2496] |
(73) 0x120e8 AND X12, X11, #0x0 |
(73) 0x120ec DUP V2.2D, X11 |
(73) 0x120f0 ORR X10, X12, X10 |
(73) 0x120f4 LDR X12, [SP, #5664] |
(73) 0x120f8 EOR X10, X12, X10,LSR #1 |
(73) 0x120fc SBFM X12, X11, #0, #0 |
(73) 0x12100 AND X12, X12, X27 |
(73) 0x12104 EOR X10, X10, X12 |
(73) 0x12108 STR X10, [SP, #2488] |
(75) 0x1210c ADD X10, X15, X9 |
(75) 0x12110 DUP V6.2D, X27 |
(75) 0x12114 ADD X9, X9, #32 |
(75) 0x12118 LDR Q3, [X10, #1824] |
(75) 0x1211c ADD X11, X10, #1816 |
(75) 0x12120 EXT V2.16B, V2.16B, V3.16B, #8 |
(75) 0x12124 AND V4.16B, V3.16B, V23.16B |
(75) 0x12128 AND V2.16B, V2.16B, V22.16B |
(75) 0x1212c ORR V2.16B, V4.16B, V2.16B |
(75) 0x12130 LDP Q4, Q5, [X10] |
(75) 0x12134 USHR V2.2D, V2.2D, #1 |
(75) 0x12138 EOR V2.16B, V2.16B, V4.16B |
(75) 0x1213c AND V4.16B, V3.16B, V24.16B |
(75) 0x12140 CMEQ V4.2D, V4.2D, #0 |
(75) 0x12144 BCAX V2.16B, V2.16B, V6.16B, V4.16B |
(75) 0x12148 STR Q2, [X11] |
(75) 0x1214c LDR Q2, [X10, #1840] |
(75) 0x12150 ADD X10, X10, #1832 |
(75) 0x12154 EXT V3.16B, V3.16B, V2.16B, #8 |
(75) 0x12158 AND V4.16B, V2.16B, V23.16B |
(75) 0x1215c AND V3.16B, V3.16B, V22.16B |
(75) 0x12160 ORR V3.16B, V4.16B, V3.16B |
(75) 0x12164 AND V4.16B, V2.16B, V24.16B |
(75) 0x12168 USHR V3.2D, V3.2D, #1 |
(75) 0x1216c CMEQ V4.2D, V4.2D, #0 |
(75) 0x12170 EOR V3.16B, V3.16B, V5.16B |
(75) 0x12174 BCAX V3.16B, V3.16B, V6.16B, V4.16B |
(75) 0x12178 STR Q3, [X10] |
(75) 0x1217c CMP X9, #3168 |
(75) 0x12180 B.NE 1210c |
(73) 0x12184 B 11f04 |
0x12188 FDIV S0, S0, S1 |
0x1218c FCMP S0, S10 |
0x12190 B.GE 12468 |
0x12194 STR S0, [X25, X16,LSL #2] |
0x12198 MOVI D0, #0 |
0x1219c FMOV S1, #1.0000000 |
0x121a0 ORR X8, XZR, X17 |
0x121a4 B 12234 |
(70) 0x121a8 ADD X10, SP, #680 |
(70) 0x121ac LDR X9, [SP, #5664] |
(70) 0x121b0 ORR X21, XZR, XZR |
(70) 0x121b4 LDR X10, [X10] |
(70) 0x121b8 AND X9, X9, #0x0 |
(70) 0x121bc AND X11, X10, #0x0 |
(70) 0x121c0 SBFM X10, X10, #0, #0 |
(70) 0x121c4 ORR X9, X11, X9 |
(70) 0x121c8 LDR X11, [SP, #3848] |
(70) 0x121cc AND X10, X10, X27 |
(70) 0x121d0 EOR X9, X11, X9,LSR #1 |
(70) 0x121d4 EOR X9, X9, X10 |
(70) 0x121d8 STR X9, [SP, #5664] |
(70) 0x121dc ORR X9, XZR, X21 |
(70) 0x121e0 ADD X21, X21, #1 |
(70) 0x121e4 ADD X10, SP, #680 |
(70) 0x121e8 SUBS X8, X8, #1 |
(70) 0x121ec STR X21, [SP, #5672] |
(70) 0x121f0 LDR X9, [X10, X9,LSL #3] |
(70) 0x121f4 UBFM X10, X9, #11, #42 |
(70) 0x121f8 EOR X9, X10, X9 |
(70) 0x121fc MOVZ W10, #22144 |
(70) 0x12200 MOVK W10, #40236 |
(70) 0x12204 AND X10, X10, X9,LSL #7 |
(70) 0x12208 EOR X9, X10, X9 |
(70) 0x1220c MOVZ W10, #61382 |
(70) 0x12210 AND X10, X10, X9,LSL #15 |
(70) 0x12214 EOR X9, X10, X9 |
(70) 0x12218 EOR X9, X9, X9,LSR #18 |
(70) 0x1221c UCVTF S2, X9 |
(70) 0x12220 MOVZ W9, #20352 |
(70) 0x12224 FMADD S0, S2, S1, S0 |
(70) 0x12228 FMOV S2, W9 |
(70) 0x1222c FMUL S1, S1, S2 |
(70) 0x12230 B.EQ 12420 |
(70) 0x12234 CMP X21, #624 |
(70) 0x12238 B.CC 121dc |
(70) 0x1223c ADD X9, SP, #680 |
(70) 0x12240 DUPM Z22.D, #0x80000000 |
(70) 0x12244 DUPM Z23.D, #0x7ffffffe |
(70) 0x12248 DUP Z24.D, #1 |
(70) 0x1224c MOVZ W10, #224 |
(70) 0x12250 LD1R {V2.2D}, [X9] |
(70) 0x12254 HINT #0 |
(70) 0x12258 HINT #0 |
(70) 0x1225c HINT #0 |
(71) 0x12260 LDUR Q3, [X9, #8] |
(71) 0x12264 LDUR Q4, [X9, #24] |
(71) 0x12268 LDUR Q5, [X9, #40] |
(71) 0x1226c ADD X11, X9, #3176 |
(71) 0x12270 SUBS X10, X10, #8 |
(71) 0x12274 EXT V6.16B, V2.16B, V3.16B, #8 |
(71) 0x12278 LDUR Q2, [X9, #56] |
(71) 0x1227c EXT V7.16B, V3.16B, V4.16B, #8 |
(71) 0x12280 EXT V16.16B, V4.16B, V5.16B, #8 |
(71) 0x12284 AND V18.16B, V3.16B, V23.16B |
(71) 0x12288 AND V19.16B, V4.16B, V23.16B |
(71) 0x1228c AND V20.16B, V5.16B, V23.16B |
(71) 0x12290 AND V3.16B, V3.16B, V24.16B |
(71) 0x12294 AND V4.16B, V4.16B, V24.16B |
(71) 0x12298 AND V6.16B, V6.16B, V22.16B |
(71) 0x1229c AND V7.16B, V7.16B, V22.16B |
(71) 0x122a0 AND V16.16B, V16.16B, V22.16B |
(71) 0x122a4 CMEQ V3.2D, V3.2D, #0 |
(71) 0x122a8 CMEQ V4.2D, V4.2D, #0 |
(71) 0x122ac ORR V6.16B, V18.16B, V6.16B |
(71) 0x122b0 ORR V7.16B, V19.16B, V7.16B |
(71) 0x122b4 LDP Q18, Q19, [X11] |
(71) 0x122b8 ORR V16.16B, V20.16B, V16.16B |
(71) 0x122bc EXT V17.16B, V5.16B, V2.16B, #8 |
(71) 0x122c0 AND V21.16B, V2.16B, V23.16B |
(71) 0x122c4 USHR V7.2D, V7.2D, #1 |
(71) 0x122c8 USHR V6.2D, V6.2D, #1 |
(71) 0x122cc USHR V16.2D, V16.2D, #1 |
(71) 0x122d0 AND V5.16B, V5.16B, V24.16B |
(71) 0x122d4 AND V17.16B, V17.16B, V22.16B |
(71) 0x122d8 EOR V7.16B, V7.16B, V19.16B |
(71) 0x122dc DUP V19.2D, X27 |
(71) 0x122e0 CMEQ V5.2D, V5.2D, #0 |
(71) 0x122e4 ORR V17.16B, V21.16B, V17.16B |
(71) 0x122e8 LDP Q20, Q21, [X11, #32] |
(71) 0x122ec EOR V6.16B, V6.16B, V18.16B |
(71) 0x122f0 AND V18.16B, V2.16B, V24.16B |
(71) 0x122f4 BCAX V4.16B, V7.16B, V19.16B, V4.16B |
(71) 0x122f8 USHR V17.2D, V17.2D, #1 |
(71) 0x122fc BCAX V3.16B, V6.16B, V19.16B, V3.16B |
(71) 0x12300 CMEQ V6.2D, V18.2D, #0 |
(71) 0x12304 EOR V17.16B, V17.16B, V21.16B |
(71) 0x12308 EOR V16.16B, V16.16B, V20.16B |
(71) 0x1230c BCAX V6.16B, V17.16B, V19.16B, V6.16B |
(71) 0x12310 STP Q3, Q4, [X9] |
(71) 0x12314 BCAX V5.16B, V16.16B, V19.16B, V5.16B |
(71) 0x12318 STP Q5, Q6, [X9, #32] |
(71) 0x1231c ADD X9, X9, #64 |
(71) 0x12320 B.NE 12260 |
(70) 0x12324 LDR X10, [SP, #2480] |
(70) 0x12328 MOV X9, V2.D[1] |
(70) 0x1232c AND X9, X9, #0x0 |
(70) 0x12330 AND X11, X10, #0x0 |
(70) 0x12334 ORR X9, X11, X9 |
(70) 0x12338 LDR X11, [SP, #5648] |
(70) 0x1233c EOR X9, X11, X9,LSR #1 |
(70) 0x12340 SBFM X11, X10, #0, #0 |
(70) 0x12344 AND X11, X11, X27 |
(70) 0x12348 EOR X9, X9, X11 |
(70) 0x1234c STR X9, [SP, #2472] |
(70) 0x12350 AND X9, X10, #0x0 |
(70) 0x12354 LDR X10, [SP, #2488] |
(70) 0x12358 AND X11, X10, #0x0 |
(70) 0x1235c ORR X9, X11, X9 |
(70) 0x12360 LDR X11, [SP, #5656] |
(70) 0x12364 EOR X9, X11, X9,LSR #1 |
(70) 0x12368 SBFM X11, X10, #0, #0 |
(70) 0x1236c AND X11, X11, X27 |
(70) 0x12370 EOR X9, X9, X11 |
(70) 0x12374 STR X9, [SP, #2480] |
(70) 0x12378 AND X9, X10, #0x0 |
(70) 0x1237c LDR X10, [SP, #2496] |
(70) 0x12380 AND X11, X10, #0x0 |
(70) 0x12384 DUP V2.2D, X10 |
(70) 0x12388 ORR X9, X11, X9 |
(70) 0x1238c LDR X11, [SP, #5664] |
(70) 0x12390 EOR X9, X11, X9,LSR #1 |
(70) 0x12394 SBFM X11, X10, #0, #0 |
(70) 0x12398 MOVZ W10, #396 |
(70) 0x1239c AND X11, X11, X27 |
(70) 0x123a0 EOR X9, X9, X11 |
(70) 0x123a4 STR X9, [SP, #2488] |
(70) 0x123a8 ADD X9, SP, #680 |
(72) 0x123ac LDR Q3, [X9, #1824] |
(72) 0x123b0 DUP V6.2D, X27 |
(72) 0x123b4 ADD X11, X9, #1816 |
(72) 0x123b8 SUBS X10, X10, #4 |
(72) 0x123bc EXT V2.16B, V2.16B, V3.16B, #8 |
(72) 0x123c0 AND V4.16B, V3.16B, V23.16B |
(72) 0x123c4 AND V2.16B, V2.16B, V22.16B |
(72) 0x123c8 ORR V2.16B, V4.16B, V2.16B |
(72) 0x123cc LDP Q4, Q5, [X9] |
(72) 0x123d0 USHR V2.2D, V2.2D, #1 |
(72) 0x123d4 EOR V2.16B, V2.16B, V4.16B |
(72) 0x123d8 AND V4.16B, V3.16B, V24.16B |
(72) 0x123dc CMEQ V4.2D, V4.2D, #0 |
(72) 0x123e0 BCAX V4.16B, V2.16B, V6.16B, V4.16B |
(72) 0x123e4 LDR Q2, [X9, #1840] |
(72) 0x123e8 ADD X9, X9, #32 |
(72) 0x123ec EXT V3.16B, V3.16B, V2.16B, #8 |
(72) 0x123f0 AND V7.16B, V2.16B, V23.16B |
(72) 0x123f4 AND V3.16B, V3.16B, V22.16B |
(72) 0x123f8 ORR V3.16B, V7.16B, V3.16B |
(72) 0x123fc USHR V3.2D, V3.2D, #1 |
(72) 0x12400 EOR V3.16B, V3.16B, V5.16B |
(72) 0x12404 AND V5.16B, V2.16B, V24.16B |
(72) 0x12408 CMEQ V5.2D, V5.2D, #0 |
(72) 0x1240c BCAX V3.16B, V3.16B, V6.16B, V5.16B |
(72) 0x12410 STP Q4, Q3, [X11] |
(72) 0x12414 B.NE 123ac |
(70) 0x12418 B 121a8 |
0x12420 FDIV S0, S0, S1 |
0x12424 FCMP S0, S10 |
0x12428 B.GE 12488 |
0x1242c LDR X8, [SP, #448] |
0x12430 STR S0, [X8, X16,LSL #2] |
0x12434 LDR X8, [SP, #344] |
0x12438 ADD X16, X16, #1 |
0x1243c CMP X16, X8 |
0x12440 B.NE 11c40 |
0x12448 FMOV S0, #1.0000000 |
0x1244c MOVI D1, #0 |
0x12450 STR X16, [SP, #624] |
0x12454 STR X17, [SP, #616] |
0x12458 BL 10140 |
0x1245c LDR X17, [SP, #616] |
0x12460 LDR X16, [SP, #624] |
0x12464 B 11ef0 |
0x12468 FMOV S0, #1.0000000 |
0x1246c MOVI D1, #0 |
0x12470 STR X16, [SP, #624] |
0x12474 STR X17, [SP, #616] |
0x12478 BL 10140 |
0x1247c LDR X17, [SP, #616] |
0x12480 LDR X16, [SP, #624] |
0x12484 B 12194 |
0x12488 FMOV S0, #1.0000000 |
0x1248c MOVI D1, #0 |
0x12490 STR X16, [SP, #624] |
0x12494 STR X17, [SP, #616] |
0x12498 BL 10140 |
0x1249c LDR X17, [SP, #616] |
0x124a0 LDR X16, [SP, #624] |
0x124a4 LDR X8, [SP, #448] |
0x124a8 STR S0, [X8, X16,LSL #2] |
0x124ac LDR X8, [SP, #344] |
0x124b0 ADD X16, X16, #1 |
0x124b4 CMP X16, X8 |
0x124b8 B.NE 11c40 |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/cmath: 2622 - 2622 |
-------------------------------------------------------------------------------- |
2622: { return __builtin_nextafterf(__x, __y); } |
/home/eoseret/llm-attention/attention_v2.cpp: 164 - 167 |
-------------------------------------------------------------------------------- |
164: for (size_t i = 0; i < elemsW; ++i) { |
165: h_WQ[i] = dist(rng); |
166: h_WK[i] = dist(rng); |
167: h_WV[i] = dist(rng); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/random.tcc: 404 - 3371 |
-------------------------------------------------------------------------------- |
404: for (size_t __k = 0; __k < (__n - __m); ++__k) |
405: { |
406: _UIntType __y = ((_M_x[__k] & __upper_mask) |
407: | (_M_x[__k + 1] & __lower_mask)); |
408: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
409: ^ ((__y & 0x01) ? __a : 0)); |
410: } |
411: |
412: for (size_t __k = (__n - __m); __k < (__n - 1); ++__k) |
413: { |
414: _UIntType __y = ((_M_x[__k] & __upper_mask) |
415: | (_M_x[__k + 1] & __lower_mask)); |
416: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
417: ^ ((__y & 0x01) ? __a : 0)); |
418: } |
419: |
420: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
421: | (_M_x[0] & __lower_mask)); |
422: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
423: ^ ((__y & 0x01) ? __a : 0)); |
[...] |
458: if (_M_p >= state_size) |
459: _M_gen_rand(); |
460: |
461: // Calculate o(x(i)). |
462: result_type __z = _M_x[_M_p++]; |
463: __z ^= (__z >> __u) & __d; |
464: __z ^= (__z << __s) & __b; |
465: __z ^= (__z << __t) & __c; |
466: __z ^= (__z >> __l); |
[...] |
3365: for (size_t __k = __m; __k != 0; --__k) |
3366: { |
3367: __sum += _RealType(__urng() - __urng.min()) * __tmp; |
3368: __tmp *= __r; |
3369: } |
3370: __ret = __sum / __tmp; |
3371: if (__builtin_expect(__ret >= _RealType(1), 0)) |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | attention-armclang-native |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.98 |
| CQA speedup if FP arith vectorized | 1.45 |
| CQA speedup if fully vectorized | 3.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.09 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | cmath:2622-2622,attention_v2.cpp:164-167,random.tcc:3370-3371 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 7.25 |
| CQA cycles if no scalar integer | 3.67 |
| CQA cycles if FP arith vectorized | 5.00 |
| CQA cycles if fully vectorized | 2.42 |
| Front-end cycles | 7.25 |
| P0 cycles | 6.50 |
| P1 cycles | 6.50 |
| P2 cycles | 1.67 |
| P3 cycles | 1.67 |
| P4 cycles | 1.67 |
| P5 cycles | 1.67 |
| P6 cycles | 1.67 |
| P7 cycles | 1.67 |
| P8 cycles | 5.50 |
| P9 cycles | 5.50 |
| P10 cycles | 5.50 |
| P11 cycles | 5.50 |
| P12 cycles | 6.67 |
| P13 cycles | 6.67 |
| P14 cycles | 6.67 |
| P15 cycles | 3.00 |
| P16 cycles | 3.00 |
| DIV/SQRT cycles | 2.62 - 3.38 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 58.00 |
| Nb uops | 58.00 |
| Nb loads | NA |
| Nb stores | 10.00 |
| Nb stack references | 16.00 |
| FLOP/cycle | 0.41 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 3.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 1.66 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 12.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 37.50 |
| Vector-efficiency ratio load | 50.00 |
| Vector-efficiency ratio store | 40.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 25.00 |
| Vector-efficiency ratio other | 35.94 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.98 |
| CQA speedup if FP arith vectorized | 1.45 |
| CQA speedup if fully vectorized | 3.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.09 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | cmath:2622-2622,attention_v2.cpp:164-167,random.tcc:3370-3371 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 7.25 |
| CQA cycles if no scalar integer | 3.67 |
| CQA cycles if FP arith vectorized | 5.00 |
| CQA cycles if fully vectorized | 2.42 |
| Front-end cycles | 7.25 |
| P0 cycles | 6.50 |
| P1 cycles | 6.50 |
| P2 cycles | 1.67 |
| P3 cycles | 1.67 |
| P4 cycles | 1.67 |
| P5 cycles | 1.67 |
| P6 cycles | 1.67 |
| P7 cycles | 1.67 |
| P8 cycles | 5.50 |
| P9 cycles | 5.50 |
| P10 cycles | 5.50 |
| P11 cycles | 5.50 |
| P12 cycles | 6.67 |
| P13 cycles | 6.67 |
| P14 cycles | 6.67 |
| P15 cycles | 3.00 |
| P16 cycles | 3.00 |
| DIV/SQRT cycles | 2.62 - 3.38 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 58.00 |
| Nb uops | 58.00 |
| Nb loads | NA |
| Nb stores | 10.00 |
| Nb stack references | 16.00 |
| FLOP/cycle | 0.41 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 3.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 1.66 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 0.00 |
| Bytes stored | 12.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 37.50 |
| Vector-efficiency ratio load | 50.00 |
| Vector-efficiency ratio store | 40.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 25.00 |
| Vector-efficiency ratio other | 35.94 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:164-167 |
| Module | attention-armclang-native |
| nb instructions | 58 |
| nb uops | 58 |
| loop length | 232 |
| used w registers | 0 |
| used x registers | 6 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 4 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 16 |
| micro-operation queue | 7.25 cycles |
| front end | 7.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 6.50 | 6.50 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 5.50 | 5.50 | 5.50 | 5.50 | 6.67 | 6.67 | 6.67 | 3.00 | 3.00 |
| cycles | 6.50 | 6.50 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 5.50 | 5.50 | 5.50 | 5.50 | 6.67 | 6.67 | 6.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 2.62-3.38 |
| Front-end | 7.25 |
| Dispatch | 6.67 |
| DIV/SQRT | 2.62-3.38 |
| Overall L1 | 7.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 45% |
| load | 50% |
| store | 40% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 50% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 25% |
| all | 37% |
| load | 50% |
| store | 40% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 35% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 11ce8 <main+0xac8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12448 <main+0x1228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STR S0, [X22, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 11f8c <main+0xd6c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12468 <main+0x1248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STR S0, [X25, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 12234 <main+0x1014> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12488 <main+0x1268> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #448] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR S0, [X8, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LDR X8, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP X16, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (50.0%) |
| B.NE 11c40 <main+0xa20> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 11ef0 <main+0xcd0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 12194 <main+0xf74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [SP, #448] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR S0, [X8, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LDR X8, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP X16, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.NE 11c40 <main+0xa20> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:164-167 |
| Module | attention-armclang-native |
| nb instructions | 58 |
| nb uops | 58 |
| loop length | 232 |
| used w registers | 0 |
| used x registers | 6 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 4 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 16 |
| micro-operation queue | 7.25 cycles |
| front end | 7.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 6.50 | 6.50 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 5.50 | 5.50 | 5.50 | 5.50 | 6.67 | 6.67 | 6.67 | 3.00 | 3.00 |
| cycles | 6.50 | 6.50 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 1.67 | 5.50 | 5.50 | 5.50 | 5.50 | 6.67 | 6.67 | 6.67 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 2.62-3.38 |
| Front-end | 7.25 |
| Dispatch | 6.67 |
| DIV/SQRT | 2.62-3.38 |
| Overall L1 | 7.25 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 0% |
| all | 45% |
| load | 50% |
| store | 40% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 50% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 25% |
| all | 37% |
| load | 50% |
| store | 40% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 25% |
| other | 35% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | P16 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 11ce8 <main+0xac8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12448 <main+0x1228> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STR S0, [X22, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 11f8c <main+0xd6c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12468 <main+0x1248> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STR S0, [X25, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| MOVI D0, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| FMOV S1, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| ORR X8, XZR, X17 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| B 12234 <main+0x1014> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FDIV S0, S0, S1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7-10 | 0.87-1.13 | scal (25.0%) |
| FCMP S0, S10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 12488 <main+0x1268> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #448] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR S0, [X8, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LDR X8, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP X16, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (50.0%) |
| B.NE 11c40 <main+0xa20> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 11ef0 <main+0xcd0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| B 12194 <main+0xf74> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| FMOV S0, #1.0000000 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (25.0%) |
| MOVI D1, #0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 2 | 0.25 | scal (50.0%) |
| STR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| BL 10140 <@plt_start@+0x120> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X17, [SP, #616] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (50.0%) |
| LDR X16, [SP, #624] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [SP, #448] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| STR S0, [X8, X16,LSL #2] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 | scal (25.0%) |
| LDR X8, [SP, #344] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X16, X16, #1 | 1 | 0 | 0 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP X16, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| B.NE 11c40 <main+0xa20> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
