| Loop Id: 69 | Module: attention-aocc-znver5-512 | Source: attention_v2.cpp:164-167 [...] | Coverage: 0.15% |
|---|
| Loop Id: 69 | Module: attention-aocc-znver5-512 | Source: attention_v2.cpp:164-167 [...] | Coverage: 0.15% |
|---|
0x54b0 VMOVSS -0x4614(%RIP),%XMM1 |
0x54b8 VXORPS %XMM0,%XMM0,%XMM0 |
0x54bc MOV %R9,%RAX |
0x54bf JMP 5535 |
(76) 0x54d0 MOV %R12,%RCX |
(76) 0x54d3 INC %R12 |
(76) 0x54d6 MOV $0x200b,%EDX |
(76) 0x54db MOV %R12,0x18f8(%RSP) |
(76) 0x54e3 MOV 0x578(%RSP,%RCX,8),%RCX |
(76) 0x54eb BEXTR %RDX,%RCX,%RDX |
(76) 0x54f0 XOR %RCX,%RDX |
(76) 0x54f3 MOV %EDX,%ECX |
(76) 0x54f5 SAL $0x7,%ECX |
(76) 0x54f8 AND $-0x62d3a980,%ECX |
(76) 0x54fe XOR %RDX,%RCX |
(76) 0x5501 MOV %ECX,%EDX |
(76) 0x5503 SAL $0xf,%EDX |
(76) 0x5506 AND $-0x103a0000,%EDX |
(76) 0x550c XOR %RCX,%RDX |
(76) 0x550f MOV %RDX,%RCX |
(76) 0x5512 SHR $0x12,%RCX |
(76) 0x5516 XOR %RDX,%RCX |
(76) 0x5519 DEC %RAX |
(76) 0x551c VCVTUSI2SS %RCX,%XMM5,%XMM2 |
(76) 0x5522 VFMADD231SS %XMM2,%XMM1,%XMM0 |
(76) 0x5527 VMULSS -0x468f(%RIP),%XMM1,%XMM1 |
(76) 0x552f JE 5740 |
(76) 0x5535 CMP $0x270,%R12 |
(76) 0x553c JB 54d0 |
(76) 0x553e VPBROADCASTQ %RSI,%ZMM2 |
(76) 0x5544 XOR %ECX,%ECX |
(76) 0x5546 NOPW %CS:(%RAX,%RAX,1) |
(77) 0x5550 VMOVDQA64 %ZMM2,%ZMM3 |
(77) 0x5556 VMOVDQU64 0x580(%RSP,%RCX,8),%ZMM2 |
(77) 0x555e VPANDQ -0x46a8(%RIP){1to8},%ZMM2,%ZMM4 |
(77) 0x5568 VPTESTMQ -0x4692(%RIP){1to0},%ZMM2,%K1 |
(77) 0x5572 VALIGNQ $0x7,%ZMM3,%ZMM2,%ZMM3 |
(77) 0x5579 VPTERNLOGQ $-0x8,-0x469c(%RIP){1to8},%ZMM3,%ZMM4 |
(77) 0x5584 VPSRLQ $0x1,%ZMM4,%ZMM3 |
(77) 0x558b VPXORQ 0x11e0(%RSP,%RCX,8),%ZMM3,%ZMM3 |
(77) 0x5596 VPXORQ -0x46b0(%RIP){1to8},%ZMM3,%ZMM3{%K1} |
(77) 0x55a0 VMOVDQU64 %ZMM3,0x578(%RSP,%RCX,8) |
(77) 0x55ab ADD $0x8,%RCX |
(77) 0x55af CMP $0xe0,%RCX |
(77) 0x55b6 JNE 5550 |
(76) 0x55b8 MOV 0xc80(%RSP),%RDX |
(76) 0x55c0 VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 |
(76) 0x55c7 MOV 0xc88(%RSP),%RCX |
(76) 0x55cf MOV $-0x66f74f21,%R8D |
(76) 0x55d5 VPEXTRQ $0x1,%XMM2,%RSI |
(76) 0x55db AND $-0x80000000,%RSI |
(76) 0x55e2 MOV %EDX,%EDI |
(76) 0x55e4 AND $0x7ffffffe,%EDI |
(76) 0x55ea OR %RSI,%RDI |
(76) 0x55ed MOV %EDX,%ESI |
(76) 0x55ef AND $0x1,%ESI |
(76) 0x55f2 AND $-0x80000000,%RDX |
(76) 0x55f9 SHR $0x1,%RDI |
(76) 0x55fc XOR 0x18e0(%RSP),%RDI |
(76) 0x5604 NEG %ESI |
(76) 0x5606 AND %R8D,%ESI |
(76) 0x5609 XOR %RDI,%RSI |
(76) 0x560c MOV $-0x66f74f21,%EDI |
(76) 0x5611 MOV %RSI,0xc78(%RSP) |
(76) 0x5619 MOV %ECX,%ESI |
(76) 0x561b AND $0x7ffffffe,%ESI |
(76) 0x5621 OR %RDX,%RSI |
(76) 0x5624 MOV %ECX,%EDX |
(76) 0x5626 AND $0x1,%EDX |
(76) 0x5629 AND $-0x80000000,%RCX |
(76) 0x5630 SHR $0x1,%RSI |
(76) 0x5633 XOR 0x18e8(%RSP),%RSI |
(76) 0x563b NEG %EDX |
(76) 0x563d AND %R8D,%EDX |
(76) 0x5640 XOR %RSI,%RDX |
(76) 0x5643 MOV %RDX,0xc80(%RSP) |
(76) 0x564b MOV 0xc90(%RSP),%RDX |
(76) 0x5653 MOV %EDX,%ESI |
(76) 0x5655 VPBROADCASTQ %RDX,%XMM2 |
(76) 0x565b AND $0x7ffffffe,%EDX |
(76) 0x5661 AND $0x1,%ESI |
(76) 0x5664 OR %RCX,%RDX |
(76) 0x5667 NEG %ESI |
(76) 0x5669 MOV $0xe4,%ECX |
(76) 0x566e SHR $0x1,%RDX |
(76) 0x5671 XOR 0x18f0(%RSP),%RDX |
(76) 0x5679 AND %R8D,%ESI |
(76) 0x567c XOR %RDX,%RSI |
(76) 0x567f MOV %RSI,0xc88(%RSP) |
(76) 0x5687 NOPW (%RAX,%RAX,1) |
(78) 0x5690 VMOVDQU 0x578(%RSP,%RCX,8),%XMM3 |
(78) 0x5699 VPANDQ -0x47e3(%RIP){1to2},%XMM3,%XMM4 |
(78) 0x56a3 VPTESTMQ -0x47cd(%RIP){1to0},%XMM3,%K1 |
(78) 0x56ad VPALIGNR $0x8,%XMM2,%XMM3,%XMM2 |
(78) 0x56b3 VPTERNLOGQ $-0x8,-0x47d6(%RIP){1to2},%XMM2,%XMM4 |
(78) 0x56be VPSRLQ $0x1,%XMM4,%XMM2 |
(78) 0x56c3 VPXOR -0x1a8(%RSP,%RCX,8),%XMM2,%XMM2 |
(78) 0x56cc VPXORQ -0x47e6(%RIP){1to2},%XMM2,%XMM2{%K1} |
(78) 0x56d6 VMOVDQU %XMM2,0x570(%RSP,%RCX,8) |
(78) 0x56df ADD $0x2,%RCX |
(78) 0x56e3 VMOVDQA %XMM3,%XMM2 |
(78) 0x56e7 CMP $0x270,%RCX |
(78) 0x56ee JNE 5690 |
(76) 0x56f0 MOV 0x18f0(%RSP),%RCX |
(76) 0x56f8 MOV 0x578(%RSP),%RSI |
(76) 0x5700 MOV $-0x80000000,%RDX |
(76) 0x5707 XOR %R12D,%R12D |
(76) 0x570a AND %RDX,%RCX |
(76) 0x570d MOV %ESI,%EDX |
(76) 0x570f AND $0x7ffffffe,%EDX |
(76) 0x5715 OR %RCX,%RDX |
(76) 0x5718 MOV %ESI,%ECX |
(76) 0x571a AND $0x1,%ECX |
(76) 0x571d SHR $0x1,%RDX |
(76) 0x5720 XOR 0x11d8(%RSP),%RDX |
(76) 0x5728 NEG %ECX |
(76) 0x572a AND %EDI,%ECX |
(76) 0x572c XOR %RDX,%RCX |
(76) 0x572f MOV %RCX,0x18f0(%RSP) |
(76) 0x5737 JMP 54d0 |
0x5740 VDIVSS %XMM1,%XMM0,%XMM0 |
0x5744 VMOVSS -0x48a8(%RIP),%XMM2 |
0x574c VUCOMISS %XMM2,%XMM0 |
0x5750 JAE 5cc6 |
0x5756 MOV 0x58(%RSP),%RAX |
0x575b VMOVAPS %XMM2,%XMM1 |
0x575f VMOVSS %XMM0,(%RAX,%R10,4) |
0x5765 VXORPS %XMM0,%XMM0,%XMM0 |
0x5769 MOV %R9,%RAX |
0x576c JMP 57d5 |
(73) 0x5770 MOV %R12,%RCX |
(73) 0x5773 INC %R12 |
(73) 0x5776 MOV $0x200b,%EDX |
(73) 0x577b MOV %R12,0x18f8(%RSP) |
(73) 0x5783 MOV 0x578(%RSP,%RCX,8),%RCX |
(73) 0x578b BEXTR %RDX,%RCX,%RDX |
(73) 0x5790 XOR %RCX,%RDX |
(73) 0x5793 MOV %EDX,%ECX |
(73) 0x5795 SAL $0x7,%ECX |
(73) 0x5798 AND $-0x62d3a980,%ECX |
(73) 0x579e XOR %RDX,%RCX |
(73) 0x57a1 MOV %ECX,%EDX |
(73) 0x57a3 SAL $0xf,%EDX |
(73) 0x57a6 AND $-0x103a0000,%EDX |
(73) 0x57ac XOR %RCX,%RDX |
(73) 0x57af MOV %RDX,%RCX |
(73) 0x57b2 SHR $0x12,%RCX |
(73) 0x57b6 XOR %RDX,%RCX |
(73) 0x57b9 DEC %RAX |
(73) 0x57bc VCVTUSI2SS %RCX,%XMM5,%XMM2 |
(73) 0x57c2 VFMADD231SS %XMM2,%XMM1,%XMM0 |
(73) 0x57c7 VMULSS -0x492f(%RIP),%XMM1,%XMM1 |
(73) 0x57cf JE 59e0 |
(73) 0x57d5 CMP $0x270,%R12 |
(73) 0x57dc JB 5770 |
(73) 0x57de VPBROADCASTQ %RSI,%ZMM2 |
(73) 0x57e4 XOR %ECX,%ECX |
(73) 0x57e6 NOPW %CS:(%RAX,%RAX,1) |
(74) 0x57f0 VMOVDQA64 %ZMM2,%ZMM3 |
(74) 0x57f6 VMOVDQU64 0x580(%RSP,%RCX,8),%ZMM2 |
(74) 0x57fe VPANDQ -0x4948(%RIP){1to8},%ZMM2,%ZMM4 |
(74) 0x5808 VPTESTMQ -0x4932(%RIP){1to0},%ZMM2,%K1 |
(74) 0x5812 VALIGNQ $0x7,%ZMM3,%ZMM2,%ZMM3 |
(74) 0x5819 VPTERNLOGQ $-0x8,-0x493c(%RIP){1to8},%ZMM3,%ZMM4 |
(74) 0x5824 VPSRLQ $0x1,%ZMM4,%ZMM3 |
(74) 0x582b VPXORQ 0x11e0(%RSP,%RCX,8),%ZMM3,%ZMM3 |
(74) 0x5836 VPXORQ -0x4950(%RIP){1to8},%ZMM3,%ZMM3{%K1} |
(74) 0x5840 VMOVDQU64 %ZMM3,0x578(%RSP,%RCX,8) |
(74) 0x584b ADD $0x8,%RCX |
(74) 0x584f CMP $0xe0,%RCX |
(74) 0x5856 JNE 57f0 |
(73) 0x5858 MOV 0xc80(%RSP),%RDX |
(73) 0x5860 VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 |
(73) 0x5867 MOV 0xc88(%RSP),%RCX |
(73) 0x586f MOV $-0x66f74f21,%R8D |
(73) 0x5875 VPEXTRQ $0x1,%XMM2,%RSI |
(73) 0x587b AND $-0x80000000,%RSI |
(73) 0x5882 MOV %EDX,%EDI |
(73) 0x5884 AND $0x7ffffffe,%EDI |
(73) 0x588a OR %RSI,%RDI |
(73) 0x588d MOV %EDX,%ESI |
(73) 0x588f AND $0x1,%ESI |
(73) 0x5892 AND $-0x80000000,%RDX |
(73) 0x5899 SHR $0x1,%RDI |
(73) 0x589c XOR 0x18e0(%RSP),%RDI |
(73) 0x58a4 NEG %ESI |
(73) 0x58a6 AND %R8D,%ESI |
(73) 0x58a9 XOR %RDI,%RSI |
(73) 0x58ac MOV $-0x66f74f21,%EDI |
(73) 0x58b1 MOV %RSI,0xc78(%RSP) |
(73) 0x58b9 MOV %ECX,%ESI |
(73) 0x58bb AND $0x7ffffffe,%ESI |
(73) 0x58c1 OR %RDX,%RSI |
(73) 0x58c4 MOV %ECX,%EDX |
(73) 0x58c6 AND $0x1,%EDX |
(73) 0x58c9 AND $-0x80000000,%RCX |
(73) 0x58d0 SHR $0x1,%RSI |
(73) 0x58d3 XOR 0x18e8(%RSP),%RSI |
(73) 0x58db NEG %EDX |
(73) 0x58dd AND %R8D,%EDX |
(73) 0x58e0 XOR %RSI,%RDX |
(73) 0x58e3 MOV %RDX,0xc80(%RSP) |
(73) 0x58eb MOV 0xc90(%RSP),%RDX |
(73) 0x58f3 MOV %EDX,%ESI |
(73) 0x58f5 VPBROADCASTQ %RDX,%XMM2 |
(73) 0x58fb AND $0x7ffffffe,%EDX |
(73) 0x5901 AND $0x1,%ESI |
(73) 0x5904 OR %RCX,%RDX |
(73) 0x5907 NEG %ESI |
(73) 0x5909 MOV $0xe4,%ECX |
(73) 0x590e SHR $0x1,%RDX |
(73) 0x5911 XOR 0x18f0(%RSP),%RDX |
(73) 0x5919 AND %R8D,%ESI |
(73) 0x591c XOR %RDX,%RSI |
(73) 0x591f MOV %RSI,0xc88(%RSP) |
(73) 0x5927 NOPW (%RAX,%RAX,1) |
(75) 0x5930 VMOVDQU 0x578(%RSP,%RCX,8),%XMM3 |
(75) 0x5939 VPANDQ -0x4a83(%RIP){1to2},%XMM3,%XMM4 |
(75) 0x5943 VPTESTMQ -0x4a6d(%RIP){1to0},%XMM3,%K1 |
(75) 0x594d VPALIGNR $0x8,%XMM2,%XMM3,%XMM2 |
(75) 0x5953 VPTERNLOGQ $-0x8,-0x4a76(%RIP){1to2},%XMM2,%XMM4 |
(75) 0x595e VPSRLQ $0x1,%XMM4,%XMM2 |
(75) 0x5963 VPXOR -0x1a8(%RSP,%RCX,8),%XMM2,%XMM2 |
(75) 0x596c VPXORQ -0x4a86(%RIP){1to2},%XMM2,%XMM2{%K1} |
(75) 0x5976 VMOVDQU %XMM2,0x570(%RSP,%RCX,8) |
(75) 0x597f ADD $0x2,%RCX |
(75) 0x5983 VMOVDQA %XMM3,%XMM2 |
(75) 0x5987 CMP $0x270,%RCX |
(75) 0x598e JNE 5930 |
(73) 0x5990 MOV 0x18f0(%RSP),%RCX |
(73) 0x5998 MOV 0x578(%RSP),%RSI |
(73) 0x59a0 MOV $-0x80000000,%RDX |
(73) 0x59a7 XOR %R12D,%R12D |
(73) 0x59aa AND %RDX,%RCX |
(73) 0x59ad MOV %ESI,%EDX |
(73) 0x59af AND $0x7ffffffe,%EDX |
(73) 0x59b5 OR %RCX,%RDX |
(73) 0x59b8 MOV %ESI,%ECX |
(73) 0x59ba AND $0x1,%ECX |
(73) 0x59bd SHR $0x1,%RDX |
(73) 0x59c0 XOR 0x11d8(%RSP),%RDX |
(73) 0x59c8 NEG %ECX |
(73) 0x59ca AND %EDI,%ECX |
(73) 0x59cc XOR %RDX,%RCX |
(73) 0x59cf MOV %RCX,0x18f0(%RSP) |
(73) 0x59d7 JMP 5770 |
0x59e0 VDIVSS %XMM1,%XMM0,%XMM0 |
0x59e4 VMOVSS -0x4b48(%RIP),%XMM2 |
0x59ec VUCOMISS %XMM2,%XMM0 |
0x59f0 JAE 5d0d |
0x59f6 MOV 0x128(%RSP),%RAX |
0x59fe VMOVAPS %XMM2,%XMM1 |
0x5a02 VMOVSS %XMM0,(%RAX,%R10,4) |
0x5a08 VXORPS %XMM0,%XMM0,%XMM0 |
0x5a0c MOV %R9,%RAX |
0x5a0f JMP 5a85 |
(70) 0x5a20 MOV %R12,%RCX |
(70) 0x5a23 INC %R12 |
(70) 0x5a26 MOV $0x200b,%EDX |
(70) 0x5a2b MOV %R12,0x18f8(%RSP) |
(70) 0x5a33 MOV 0x578(%RSP,%RCX,8),%RCX |
(70) 0x5a3b BEXTR %RDX,%RCX,%RDX |
(70) 0x5a40 XOR %RCX,%RDX |
(70) 0x5a43 MOV %EDX,%ECX |
(70) 0x5a45 SAL $0x7,%ECX |
(70) 0x5a48 AND $-0x62d3a980,%ECX |
(70) 0x5a4e XOR %RDX,%RCX |
(70) 0x5a51 MOV %ECX,%EDX |
(70) 0x5a53 SAL $0xf,%EDX |
(70) 0x5a56 AND $-0x103a0000,%EDX |
(70) 0x5a5c XOR %RCX,%RDX |
(70) 0x5a5f MOV %RDX,%RCX |
(70) 0x5a62 SHR $0x12,%RCX |
(70) 0x5a66 XOR %RDX,%RCX |
(70) 0x5a69 DEC %RAX |
(70) 0x5a6c VCVTUSI2SS %RCX,%XMM5,%XMM2 |
(70) 0x5a72 VFMADD231SS %XMM2,%XMM1,%XMM0 |
(70) 0x5a77 VMULSS -0x4bdf(%RIP),%XMM1,%XMM1 |
(70) 0x5a7f JE 5c90 |
(70) 0x5a85 CMP $0x270,%R12 |
(70) 0x5a8c JB 5a20 |
(70) 0x5a8e VPBROADCASTQ %RSI,%ZMM2 |
(70) 0x5a94 XOR %ECX,%ECX |
(70) 0x5a96 NOPW %CS:(%RAX,%RAX,1) |
(71) 0x5aa0 VMOVDQA64 %ZMM2,%ZMM3 |
(71) 0x5aa6 VMOVDQU64 0x580(%RSP,%RCX,8),%ZMM2 |
(71) 0x5aae VPANDQ -0x4bf8(%RIP){1to8},%ZMM2,%ZMM4 |
(71) 0x5ab8 VPTESTMQ -0x4be2(%RIP){1to0},%ZMM2,%K1 |
(71) 0x5ac2 VALIGNQ $0x7,%ZMM3,%ZMM2,%ZMM3 |
(71) 0x5ac9 VPTERNLOGQ $-0x8,-0x4bec(%RIP){1to8},%ZMM3,%ZMM4 |
(71) 0x5ad4 VPSRLQ $0x1,%ZMM4,%ZMM3 |
(71) 0x5adb VPXORQ 0x11e0(%RSP,%RCX,8),%ZMM3,%ZMM3 |
(71) 0x5ae6 VPXORQ -0x4c00(%RIP){1to8},%ZMM3,%ZMM3{%K1} |
(71) 0x5af0 VMOVDQU64 %ZMM3,0x578(%RSP,%RCX,8) |
(71) 0x5afb ADD $0x8,%RCX |
(71) 0x5aff CMP $0xe0,%RCX |
(71) 0x5b06 JNE 5aa0 |
(70) 0x5b08 MOV 0xc80(%RSP),%RDX |
(70) 0x5b10 VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 |
(70) 0x5b17 MOV 0xc88(%RSP),%RCX |
(70) 0x5b1f MOV $-0x66f74f21,%R8D |
(70) 0x5b25 VPEXTRQ $0x1,%XMM2,%RSI |
(70) 0x5b2b AND $-0x80000000,%RSI |
(70) 0x5b32 MOV %EDX,%EDI |
(70) 0x5b34 AND $0x7ffffffe,%EDI |
(70) 0x5b3a OR %RSI,%RDI |
(70) 0x5b3d MOV %EDX,%ESI |
(70) 0x5b3f AND $0x1,%ESI |
(70) 0x5b42 AND $-0x80000000,%RDX |
(70) 0x5b49 SHR $0x1,%RDI |
(70) 0x5b4c XOR 0x18e0(%RSP),%RDI |
(70) 0x5b54 NEG %ESI |
(70) 0x5b56 AND %R8D,%ESI |
(70) 0x5b59 XOR %RDI,%RSI |
(70) 0x5b5c MOV $-0x66f74f21,%EDI |
(70) 0x5b61 MOV %RSI,0xc78(%RSP) |
(70) 0x5b69 MOV %ECX,%ESI |
(70) 0x5b6b AND $0x7ffffffe,%ESI |
(70) 0x5b71 OR %RDX,%RSI |
(70) 0x5b74 MOV %ECX,%EDX |
(70) 0x5b76 AND $0x1,%EDX |
(70) 0x5b79 AND $-0x80000000,%RCX |
(70) 0x5b80 SHR $0x1,%RSI |
(70) 0x5b83 XOR 0x18e8(%RSP),%RSI |
(70) 0x5b8b NEG %EDX |
(70) 0x5b8d AND %R8D,%EDX |
(70) 0x5b90 XOR %RSI,%RDX |
(70) 0x5b93 MOV %RDX,0xc80(%RSP) |
(70) 0x5b9b MOV 0xc90(%RSP),%RDX |
(70) 0x5ba3 MOV %EDX,%ESI |
(70) 0x5ba5 VPBROADCASTQ %RDX,%XMM2 |
(70) 0x5bab AND $0x7ffffffe,%EDX |
(70) 0x5bb1 AND $0x1,%ESI |
(70) 0x5bb4 OR %RCX,%RDX |
(70) 0x5bb7 NEG %ESI |
(70) 0x5bb9 MOV $0xe4,%ECX |
(70) 0x5bbe SHR $0x1,%RDX |
(70) 0x5bc1 XOR 0x18f0(%RSP),%RDX |
(70) 0x5bc9 AND %R8D,%ESI |
(70) 0x5bcc XOR %RDX,%RSI |
(70) 0x5bcf MOV %RSI,0xc88(%RSP) |
(70) 0x5bd7 NOPW (%RAX,%RAX,1) |
(72) 0x5be0 VMOVDQU 0x578(%RSP,%RCX,8),%XMM3 |
(72) 0x5be9 VPANDQ -0x4d33(%RIP){1to2},%XMM3,%XMM4 |
(72) 0x5bf3 VPTESTMQ -0x4d1d(%RIP){1to0},%XMM3,%K1 |
(72) 0x5bfd VPALIGNR $0x8,%XMM2,%XMM3,%XMM2 |
(72) 0x5c03 VPTERNLOGQ $-0x8,-0x4d26(%RIP){1to2},%XMM2,%XMM4 |
(72) 0x5c0e VPSRLQ $0x1,%XMM4,%XMM2 |
(72) 0x5c13 VPXOR -0x1a8(%RSP,%RCX,8),%XMM2,%XMM2 |
(72) 0x5c1c VPXORQ -0x4d36(%RIP){1to2},%XMM2,%XMM2{%K1} |
(72) 0x5c26 VMOVDQU %XMM2,0x570(%RSP,%RCX,8) |
(72) 0x5c2f ADD $0x2,%RCX |
(72) 0x5c33 VMOVDQA %XMM3,%XMM2 |
(72) 0x5c37 CMP $0x270,%RCX |
(72) 0x5c3e JNE 5be0 |
(70) 0x5c40 MOV 0x18f0(%RSP),%RCX |
(70) 0x5c48 MOV 0x578(%RSP),%RSI |
(70) 0x5c50 MOV $-0x80000000,%RDX |
(70) 0x5c57 XOR %R12D,%R12D |
(70) 0x5c5a AND %RDX,%RCX |
(70) 0x5c5d MOV %ESI,%EDX |
(70) 0x5c5f AND $0x7ffffffe,%EDX |
(70) 0x5c65 OR %RCX,%RDX |
(70) 0x5c68 MOV %ESI,%ECX |
(70) 0x5c6a AND $0x1,%ECX |
(70) 0x5c6d SHR $0x1,%RDX |
(70) 0x5c70 XOR 0x11d8(%RSP),%RDX |
(70) 0x5c78 NEG %ECX |
(70) 0x5c7a AND %EDI,%ECX |
(70) 0x5c7c XOR %RDX,%RCX |
(70) 0x5c7f MOV %RCX,0x18f0(%RSP) |
(70) 0x5c87 JMP 5a20 |
0x5c90 VDIVSS %XMM1,%XMM0,%XMM0 |
0x5c94 VUCOMISS -0x4df8(%RIP),%XMM0 |
0x5c9c JAE 5d54 |
0x5ca2 MOV 0x120(%RSP),%RAX |
0x5caa VMOVSS %XMM0,(%RAX,%R10,4) |
0x5cb0 INC %R10 |
0x5cb3 CMP 0x3d0(%RSP),%R10 |
0x5cbb JNE 54b0 |
0x5cc6 VXORPS %XMM1,%XMM1,%XMM1 |
0x5cca VMOVAPS %XMM2,%XMM0 |
0x5cce MOV %R9,0x240(%RSP) |
0x5cd6 MOV %R10,0x1c0(%RSP) |
0x5cde MOV %RSI,0x18(%RSP) |
0x5ce3 VZEROUPPER |
0x5ce6 CALL 8900 <@plt_start@+0xf0> |
0x5ceb VMOVSS -0x4e4f(%RIP),%XMM2 |
0x5cf3 MOV 0x18(%RSP),%RSI |
0x5cf8 MOV 0x1c0(%RSP),%R10 |
0x5d00 MOV 0x240(%RSP),%R9 |
0x5d08 JMP 5756 |
0x5d0d VXORPS %XMM1,%XMM1,%XMM1 |
0x5d11 VMOVAPS %XMM2,%XMM0 |
0x5d15 MOV %R9,0x240(%RSP) |
0x5d1d MOV %R10,0x1c0(%RSP) |
0x5d25 MOV %RSI,0x18(%RSP) |
0x5d2a VZEROUPPER |
0x5d2d CALL 8900 <@plt_start@+0xf0> |
0x5d32 VMOVSS -0x4e96(%RIP),%XMM2 |
0x5d3a MOV 0x18(%RSP),%RSI |
0x5d3f MOV 0x1c0(%RSP),%R10 |
0x5d47 MOV 0x240(%RSP),%R9 |
0x5d4f JMP 59f6 |
0x5d54 VMOVSS -0x4eb8(%RIP),%XMM0 |
0x5d5c VXORPS %XMM1,%XMM1,%XMM1 |
0x5d60 MOV %R9,0x240(%RSP) |
0x5d68 MOV %R10,0x1c0(%RSP) |
0x5d70 MOV %RSI,0x18(%RSP) |
0x5d75 VZEROUPPER |
0x5d78 CALL 8900 <@plt_start@+0xf0> |
0x5d7d MOV 0x18(%RSP),%RSI |
0x5d82 MOV 0x1c0(%RSP),%R10 |
0x5d8a MOV 0x240(%RSP),%R9 |
0x5d92 JMP 5ca2 |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/cmath: 1661 - 1661 |
-------------------------------------------------------------------------------- |
1661: { return __builtin_nextafterf(__x, __y); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/random.tcc: 401 - 3370 |
-------------------------------------------------------------------------------- |
401: for (size_t __k = 0; __k < (__n - __m); ++__k) |
402: { |
403: _UIntType __y = ((_M_x[__k] & __upper_mask) |
404: | (_M_x[__k + 1] & __lower_mask)); |
405: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
406: ^ ((__y & 0x01) ? __a : 0)); |
[...] |
412: | (_M_x[__k + 1] & __lower_mask)); |
413: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
414: ^ ((__y & 0x01) ? __a : 0)); |
415: } |
416: |
417: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
418: | (_M_x[0] & __lower_mask)); |
419: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
420: ^ ((__y & 0x01) ? __a : 0)); |
[...] |
455: if (_M_p >= state_size) |
456: _M_gen_rand(); |
457: |
458: // Calculate o(x(i)). |
459: result_type __z = _M_x[_M_p++]; |
460: __z ^= (__z >> __u) & __d; |
461: __z ^= (__z << __s) & __b; |
462: __z ^= (__z << __t) & __c; |
463: __z ^= (__z >> __l); |
[...] |
3364: for (size_t __k = __m; __k != 0; --__k) |
3365: { |
3366: __sum += _RealType(__urng() - __urng.min()) * __tmp; |
3367: __tmp *= __r; |
3368: } |
3369: __ret = __sum / __tmp; |
3370: if (__builtin_expect(__ret >= _RealType(1), 0)) |
/home/eoseret/llm-attention/attention_v2.cpp: 164 - 167 |
-------------------------------------------------------------------------------- |
164: for (size_t i = 0; i < elemsW; ++i) { |
165: h_WQ[i] = dist(rng); |
166: h_WK[i] = dist(rng); |
167: h_WV[i] = dist(rng); |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.22 |
| CQA speedup if FP arith vectorized | 1.43 |
| CQA speedup if fully vectorized | 11.78 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.14 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | cmath:1661-1661,random.tcc:3369-3370,attention_v2.cpp:164-167 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 9.13 |
| CQA cycles if no scalar integer | 7.50 |
| CQA cycles if FP arith vectorized | 6.36 |
| CQA cycles if fully vectorized | 0.77 |
| Front-end cycles | 9.13 |
| P0 cycles | 0.67 |
| P1 cycles | 0.67 |
| P2 cycles | 0.67 |
| P3 cycles | 3.33 |
| P4 cycles | 3.33 |
| P5 cycles | 3.33 |
| P6 cycles | 8.00 |
| P7 cycles | 8.00 |
| P8 cycles | 8.00 |
| P9 cycles | 8.00 |
| P10 cycles | 1.50 |
| P11 cycles | 1.50 |
| P12 cycles | 1.50 |
| P13 cycles | 1.50 |
| P14 cycles | 3.00 |
| P15 cycles | 3.00 |
| DIV/SQRT cycles | 7.50 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 67.00 |
| Nb uops | 73.00 |
| Nb loads | 20.00 |
| Nb stores | 12.00 |
| Nb stack references | 7.00 |
| FLOP/cycle | 0.33 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 3.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 23.67 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 132.00 |
| Bytes stored | 84.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 30.23 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 81.25 |
| Vector-efficiency ratio all | 14.10 |
| Vector-efficiency ratio load | 9.13 |
| Vector-efficiency ratio store | 10.94 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 6.25 |
| Vector-efficiency ratio other | 21.48 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.22 |
| CQA speedup if FP arith vectorized | 1.43 |
| CQA speedup if fully vectorized | 11.78 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.14 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | cmath:1661-1661,random.tcc:3369-3370,attention_v2.cpp:164-167 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 9.13 |
| CQA cycles if no scalar integer | 7.50 |
| CQA cycles if FP arith vectorized | 6.36 |
| CQA cycles if fully vectorized | 0.77 |
| Front-end cycles | 9.13 |
| P0 cycles | 0.67 |
| P1 cycles | 0.67 |
| P2 cycles | 0.67 |
| P3 cycles | 3.33 |
| P4 cycles | 3.33 |
| P5 cycles | 3.33 |
| P6 cycles | 8.00 |
| P7 cycles | 8.00 |
| P8 cycles | 8.00 |
| P9 cycles | 8.00 |
| P10 cycles | 1.50 |
| P11 cycles | 1.50 |
| P12 cycles | 1.50 |
| P13 cycles | 1.50 |
| P14 cycles | 3.00 |
| P15 cycles | 3.00 |
| DIV/SQRT cycles | 7.50 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 67.00 |
| Nb uops | 73.00 |
| Nb loads | 20.00 |
| Nb stores | 12.00 |
| Nb stack references | 7.00 |
| FLOP/cycle | 0.33 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 3.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 23.67 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 132.00 |
| Bytes stored | 84.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 30.23 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | NA |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 81.25 |
| Vector-efficiency ratio all | 14.10 |
| Vector-efficiency ratio load | 9.13 |
| Vector-efficiency ratio store | 10.94 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | NA |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 6.25 |
| Vector-efficiency ratio other | 21.48 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:164-167 |
| Module | attention-aocc-znver5-512 |
| nb instructions | 67 |
| nb uops | 73 |
| loop length | 370 |
| used x86 registers | 5 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 7 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.67 | 0.67 | 0.67 | 3.33 | 3.33 | 3.33 | 8.00 | 8.00 | 8.00 | 8.00 | 1.50 | 1.50 | 1.50 | 1.50 | 3.00 | 3.00 |
| cycles | 0.67 | 0.67 | 0.67 | 3.33 | 3.33 | 3.33 | 8.00 | 8.00 | 8.00 | 8.00 | 1.50 | 1.50 | 1.50 | 1.50 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 7.50 |
| Front-end | 9.13 |
| Dispatch | 8.00 |
| DIV/SQRT | 7.50 |
| Overall L1 | 9.13 |
| all | 16% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 100% |
| all | 40% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 76% |
| all | 30% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 81% |
| all | 14% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 13% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 6% |
| other | 20% |
| all | 14% |
| load | 9% |
| store | 10% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 6% |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VMOVSS -0x4614(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 5535 <main+0xbd5> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VMOVSS -0x48a8(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VUCOMISS %XMM2,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5cc6 <main+0x1366> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVAPS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 57d5 <main+0xe75> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VMOVSS -0x4b48(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VUCOMISS %XMM2,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5d0d <main+0x13ad> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x128(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVAPS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 5a85 <main+0x1125> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VUCOMISS -0x4df8(%RIP),%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5d54 <main+0x13f4> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x120(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| INC %R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x3d0(%RSP),%R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| JNE 54b0 <main+0xb50> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVSS -0x4e4f(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5756 <main+0xdf6> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVSS -0x4e96(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 59f6 <main+0x1096> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVSS -0x4eb8(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5ca2 <main+0x1342> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:164-167 |
| Module | attention-aocc-znver5-512 |
| nb instructions | 67 |
| nb uops | 73 |
| loop length | 370 |
| used x86 registers | 5 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 7 |
| micro-operation queue | 9.13 cycles |
| front end | 9.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 0.67 | 0.67 | 0.67 | 3.33 | 3.33 | 3.33 | 8.00 | 8.00 | 8.00 | 8.00 | 1.50 | 1.50 | 1.50 | 1.50 | 3.00 | 3.00 |
| cycles | 0.67 | 0.67 | 0.67 | 3.33 | 3.33 | 3.33 | 8.00 | 8.00 | 8.00 | 8.00 | 1.50 | 1.50 | 1.50 | 1.50 | 3.00 | 3.00 |
| Cycles executing div or sqrt instructions | 7.50 |
| Front-end | 9.13 |
| Dispatch | 8.00 |
| DIV/SQRT | 7.50 |
| Overall L1 | 9.13 |
| all | 16% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 100% |
| all | 40% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 76% |
| all | 30% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 81% |
| all | 14% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 25% |
| all | 13% |
| load | 6% |
| store | 6% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 6% |
| other | 20% |
| all | 14% |
| load | 9% |
| store | 10% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 6% |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VMOVSS -0x4614(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 5535 <main+0xbd5> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VMOVSS -0x48a8(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VUCOMISS %XMM2,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5cc6 <main+0x1366> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVAPS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 57d5 <main+0xe75> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VMOVSS -0x4b48(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VUCOMISS %XMM2,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5d0d <main+0x13ad> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x128(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVAPS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| JMP 5a85 <main+0x1125> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VDIVSS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 10 | 2.50 | scal (6.3%) |
| VUCOMISS -0x4df8(%RIP),%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 7 | 0.50 | scal (6.3%) |
| JAE 5d54 <main+0x13f4> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x120(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VMOVSS %XMM0,(%RAX,%R10,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| INC %R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x3d0(%RSP),%R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| JNE 54b0 <main+0xb50> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVSS -0x4e4f(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5756 <main+0xdf6> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVSS -0x4e96(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 59f6 <main+0x1096> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVSS -0x4eb8(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R10,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 8900 <@plt_start@+0xf0> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| MOV 0x18(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1c0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x240(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5ca2 <main+0x1342> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
