| Loop Id: 57 | Module: attention-clang-gnr256 | Source: attention_v2.cpp:43-61 | Coverage: 0.13% |
|---|
| Loop Id: 57 | Module: attention-clang-gnr256 | Source: attention_v2.cpp:43-61 | Coverage: 0.13% |
|---|
0x56b1 MOV 0x130(%RSP),%RAX |
0x56b9 VMOVSS %XMM1,(%RAX,%RCX,4) |
0x56be INC %RSI |
0x56c1 MOV 0x128(%RSP),%RAX |
0x56c9 MOV 0x2c0(%RSP),%RDX |
0x56d1 ADD %RAX,%RDX |
0x56d4 ADD %RAX,%R8 |
0x56d7 MOV 0x230(%RSP),%RDI |
0x56df ADD %RAX,%RDI |
0x56e2 MOV %RBX,%R9 |
0x56e5 CMP %R13,%RBX |
0x56e8 JE 6294 |
0x56ee MOV %RSI,%R10 |
0x56f1 AND $-0x4,%R10 |
0x56f5 MOV %RSI,%RBX |
0x56f8 AND $-0x20,%RBX |
0x56fc CMP $0x4,%RSI |
0x5700 JAE 5711 |
0x5702 XOR %EAX,%EAX |
0x5704 VMOVSS 0x18f8(%RIP),%XMM1 |
0x570c JMP 57d7 |
0x5711 CMP $0x20,%RSI |
0x5715 JAE 5723 |
0x5717 XOR %EAX,%EAX |
0x5719 VMOVSS 0x18e3(%RIP),%XMM1 |
0x5721 JMP 579c |
0x5723 MOV %RSI,%RAX |
0x5726 MOV $0x7fffffffffffffe0,%RCX |
0x5730 AND %RCX,%RAX |
0x5733 XOR %ECX,%ECX |
0x5735 VBROADCASTSS 0x18c6(%RIP),%YMM3 |
0x573e VMOVAPS %YMM3,%YMM0 |
0x5742 VMOVAPS %YMM3,%YMM1 |
0x5746 VMOVAPS %YMM3,%YMM2 |
(46) 0x574a VMAXPS -0x60(%RDX,%RCX,4),%YMM0,%YMM0 |
(46) 0x5750 VMAXPS -0x40(%RDX,%RCX,4),%YMM1,%YMM1 |
(46) 0x5756 VMAXPS -0x20(%RDX,%RCX,4),%YMM2,%YMM2 |
(46) 0x575c VMAXPS (%RDX,%RCX,4),%YMM3,%YMM3 |
(46) 0x5761 ADD $0x20,%RCX |
(46) 0x5765 CMP %RCX,%RBX |
(46) 0x5768 JNE 574a |
0x576a VMAXPS %YMM1,%YMM0,%YMM0 |
0x576e VMAXPS %YMM3,%YMM2,%YMM1 |
0x5772 VMAXPS %YMM1,%YMM0,%YMM0 |
0x5776 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x577c VMAXPS %XMM1,%XMM0,%XMM0 |
0x5780 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5785 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5789 VMOVSHDUP %XMM0,%XMM1 |
0x578d VMAXSS %XMM1,%XMM0,%XMM1 |
0x5791 CMP %RAX,%RSI |
0x5794 JE 57e5 |
0x5796 TEST $0x1c,%SIL |
0x579a JE 57d7 |
0x579c MOV %RAX,%RCX |
0x579f MOV $0x7fffffffffffffe0,%RAX |
0x57a9 ADD $0x1c,%RAX |
0x57ad AND %RSI,%RAX |
0x57b0 VBROADCASTSS %XMM1,%XMM0 |
(61) 0x57b5 VMAXPS (%R8,%RCX,4),%XMM0,%XMM0 |
(61) 0x57bb ADD $0x4,%RCX |
(61) 0x57bf CMP %RCX,%R10 |
(61) 0x57c2 JNE 57b5 |
0x57c4 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x57c9 VMAXPS %XMM1,%XMM0,%XMM0 |
0x57cd VMOVSHDUP %XMM0,%XMM1 |
0x57d1 VMAXSS %XMM1,%XMM0,%XMM1 |
0x57d5 JMP 57e0 |
(60) 0x57d7 VMAXSS (%R8,%RAX,4),%XMM1,%XMM1 |
(60) 0x57dd INC %RAX |
(60) 0x57e0 CMP %RAX,%RSI |
(60) 0x57e3 JNE 57d7 |
0x57e5 CMP $0x3,%RSI |
0x57e9 MOV %RSI,0x120(%RSP) |
0x57f1 MOV %R8,0x20(%RSP) |
0x57f6 VMOVAPS %XMM1,0x2a0(%RSP) |
0x57ff MOV %RDX,0x2c0(%RSP) |
0x5807 MOV %RDI,0x230(%RSP) |
0x580f MOV %R9,0x2b8(%RSP) |
0x5817 JA 5824 |
0x5819 VXORPS %XMM2,%XMM2,%XMM2 |
0x581d XOR %EBX,%EBX |
0x581f JMP 5f9e |
0x5824 CMP $0x20,%RSI |
0x5828 JAE 5835 |
0x582a VXORPS %XMM2,%XMM2,%XMM2 |
0x582e XOR %EBX,%EBX |
0x5830 JMP 5e7e |
0x5835 MOV %R10,0x310(%RSP) |
0x583d MOV $0x7fffffffffffffe0,%RAX |
0x5847 AND %RAX,%RSI |
0x584a MOV %RSI,0x228(%RSP) |
0x5852 VBROADCASTSS %XMM1,%YMM0 |
0x5857 VMOVAPS %YMM0,0x420(%RSP) |
0x5860 VXORPS %XMM0,%XMM0,%XMM0 |
0x5864 VMOVAPS %YMM0,0x3c0(%RSP) |
0x586d XOR %R13D,%R13D |
0x5870 VMOVAPS %YMM0,0x3a0(%RSP) |
0x5879 VMOVAPS %YMM0,0x3e0(%RSP) |
0x5882 VXORPS %XMM1,%XMM1,%XMM1 |
(47) 0x5886 VMOVAPS %YMM1,0x440(%RSP) |
(47) 0x588f VMOVUPS -0x60(%RDX,%R13,4),%YMM0 |
(47) 0x5896 VMOVUPS -0x40(%RDX,%R13,4),%YMM1 |
(47) 0x589d VMOVUPS -0x20(%RDX,%R13,4),%YMM2 |
(47) 0x58a4 VMOVUPS (%RDX,%R13,4),%YMM3 |
(47) 0x58aa VMOVAPS 0x420(%RSP),%YMM4 |
(47) 0x58b3 VSUBPS %YMM4,%YMM0,%YMM5 |
(47) 0x58b7 VMOVAPS %YMM5,0x100(%RSP) |
(47) 0x58c0 VSUBPS %YMM4,%YMM1,%YMM0 |
(47) 0x58c4 VMOVAPS %YMM0,0xa0(%RSP) |
(47) 0x58cd VSUBPS %YMM4,%YMM2,%YMM0 |
(47) 0x58d1 VMOVAPS %YMM0,0x160(%RSP) |
(47) 0x58da VSUBPS %YMM4,%YMM3,%YMM0 |
(47) 0x58de VMOVAPS %YMM0,0x60(%RSP) |
(47) 0x58e4 VEXTRACTF128 $0x1,%YMM5,%XMM0 |
(47) 0x58ea VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x58f3 VZEROUPPER |
(47) 0x58f6 CALL 1160 <expf@plt> |
(47) 0x58fb VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x5904 VMOVSHDUP 0xe0(%RSP),%XMM0 |
(47) 0x590d CALL 1160 <expf@plt> |
(47) 0x5912 VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x591b VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5921 VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x592a VPERMILPD $0x1,0xe0(%RSP),%XMM0 |
(47) 0x5935 CALL 1160 <expf@plt> |
(47) 0x593a VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x5943 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5949 VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x5952 VPSHUFD $-0x1,0xe0(%RSP),%XMM0 |
(47) 0x595c CALL 1160 <expf@plt> |
(47) 0x5961 VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x596a VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5970 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5979 VMOVAPS 0x100(%RSP),%YMM0 |
(47) 0x5982 VZEROUPPER |
(47) 0x5985 CALL 1160 <expf@plt> |
(47) 0x598a VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x5993 VMOVSHDUP 0x100(%RSP),%XMM0 |
(47) 0x599c CALL 1160 <expf@plt> |
(47) 0x59a1 VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x59aa VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x59b0 VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x59b9 VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(47) 0x59c4 CALL 1160 <expf@plt> |
(47) 0x59c9 VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x59d2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x59d8 VMOVAPS %XMM0,0x1b0(%RSP) |
(47) 0x59e1 VPSHUFD $-0x1,0x100(%RSP),%XMM0 |
(47) 0x59eb CALL 1160 <expf@plt> |
(47) 0x59f0 VMOVAPS 0x1b0(%RSP),%XMM1 |
(47) 0x59f9 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x59ff VINSERTF128 $0x1,0xe0(%RSP),%YMM0,%YMM0 |
(47) 0x5a0a VMOVAPS 0x3c0(%RSP),%YMM1 |
(47) 0x5a13 VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x5a17 VMOVAPS %YMM1,0x3c0(%RSP) |
(47) 0x5a20 VMOVAPS 0xa0(%RSP),%YMM0 |
(47) 0x5a29 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x5a2f VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5a38 VZEROUPPER |
(47) 0x5a3b CALL 1160 <expf@plt> |
(47) 0x5a40 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5a49 VMOVSHDUP 0x100(%RSP),%XMM0 |
(47) 0x5a52 CALL 1160 <expf@plt> |
(47) 0x5a57 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5a60 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5a66 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5a6f VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(47) 0x5a7a CALL 1160 <expf@plt> |
(47) 0x5a7f VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5a88 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5a8e VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5a97 VPSHUFD $-0x1,0x100(%RSP),%XMM0 |
(47) 0x5aa1 CALL 1160 <expf@plt> |
(47) 0x5aa6 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5aaf VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5ab5 VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5abe VMOVAPS 0xa0(%RSP),%YMM0 |
(47) 0x5ac7 VZEROUPPER |
(47) 0x5aca CALL 1160 <expf@plt> |
(47) 0x5acf VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5ad8 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(47) 0x5ae1 CALL 1160 <expf@plt> |
(47) 0x5ae6 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5aef VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5af5 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5afe VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(47) 0x5b09 CALL 1160 <expf@plt> |
(47) 0x5b0e VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5b17 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5b1d VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x5b26 VPSHUFD $-0x1,0xa0(%RSP),%XMM0 |
(47) 0x5b30 CALL 1160 <expf@plt> |
(47) 0x5b35 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x5b3e VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5b44 VINSERTF128 $0x1,0x100(%RSP),%YMM0,%YMM0 |
(47) 0x5b4f VMOVAPS 0x3a0(%RSP),%YMM1 |
(47) 0x5b58 VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x5b5c VMOVAPS %YMM1,0x3a0(%RSP) |
(47) 0x5b65 VMOVAPS 0x160(%RSP),%YMM0 |
(47) 0x5b6e VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x5b74 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5b7d VZEROUPPER |
(47) 0x5b80 CALL 1160 <expf@plt> |
(47) 0x5b85 VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5b8e VMOVSHDUP 0xa0(%RSP),%XMM0 |
(47) 0x5b97 CALL 1160 <expf@plt> |
(47) 0x5b9c VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5ba5 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5bab VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5bb4 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(47) 0x5bbf CALL 1160 <expf@plt> |
(47) 0x5bc4 VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5bcd VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5bd3 VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5bdc VPSHUFD $-0x1,0xa0(%RSP),%XMM0 |
(47) 0x5be6 CALL 1160 <expf@plt> |
(47) 0x5beb VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5bf4 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5bfa VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5c03 VMOVAPS 0x160(%RSP),%YMM0 |
(47) 0x5c0c VZEROUPPER |
(47) 0x5c0f CALL 1160 <expf@plt> |
(47) 0x5c14 VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5c1d VMOVSHDUP 0x160(%RSP),%XMM0 |
(47) 0x5c26 CALL 1160 <expf@plt> |
(47) 0x5c2b VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5c34 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5c3a VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5c43 VPERMILPD $0x1,0x160(%RSP),%XMM0 |
(47) 0x5c4e CALL 1160 <expf@plt> |
(47) 0x5c53 VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5c5c VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5c62 VMOVAPS %XMM0,0x100(%RSP) |
(47) 0x5c6b VPSHUFD $-0x1,0x160(%RSP),%XMM0 |
(47) 0x5c75 CALL 1160 <expf@plt> |
(47) 0x5c7a VMOVAPS 0x100(%RSP),%XMM1 |
(47) 0x5c83 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5c89 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(47) 0x5c94 VMOVAPS 0x3e0(%RSP),%YMM1 |
(47) 0x5c9d VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x5ca1 VMOVAPS %YMM1,0x3e0(%RSP) |
(47) 0x5caa VMOVAPS 0x60(%RSP),%YMM0 |
(47) 0x5cb0 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x5cb6 VMOVAPS %XMM0,0x160(%RSP) |
(47) 0x5cbf VZEROUPPER |
(47) 0x5cc2 CALL 1160 <expf@plt> |
(47) 0x5cc7 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5cd0 VMOVSHDUP 0x160(%RSP),%XMM0 |
(47) 0x5cd9 CALL 1160 <expf@plt> |
(47) 0x5cde VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x5ce7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5ced VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5cf6 VPERMILPD $0x1,0x160(%RSP),%XMM0 |
(47) 0x5d01 CALL 1160 <expf@plt> |
(47) 0x5d06 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x5d0f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5d15 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5d1e VPSHUFD $-0x1,0x160(%RSP),%XMM0 |
(47) 0x5d28 CALL 1160 <expf@plt> |
(47) 0x5d2d VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x5d36 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x5d3c VMOVAPS %XMM0,0x160(%RSP) |
(47) 0x5d45 VMOVAPS 0x60(%RSP),%YMM0 |
(47) 0x5d4b VZEROUPPER |
(47) 0x5d4e CALL 1160 <expf@plt> |
(47) 0x5d53 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5d5c VMOVSHDUP 0x60(%RSP),%XMM0 |
(47) 0x5d62 CALL 1160 <expf@plt> |
(47) 0x5d67 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x5d70 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x5d76 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5d7f VPERMILPD $0x1,0x60(%RSP),%XMM0 |
(47) 0x5d87 CALL 1160 <expf@plt> |
(47) 0x5d8c VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x5d95 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x5d9b VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x5da4 VPSHUFD $-0x1,0x60(%RSP),%XMM0 |
(47) 0x5dab CALL 1160 <expf@plt> |
(47) 0x5db0 VMOVAPS 0x440(%RSP),%YMM1 |
(47) 0x5db9 MOV 0x2c0(%RSP),%RDX |
(47) 0x5dc1 VMOVAPS 0xa0(%RSP),%XMM2 |
(47) 0x5dca VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
(47) 0x5dd0 VINSERTF128 $0x1,0x160(%RSP),%YMM0,%YMM0 |
(47) 0x5ddb VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x5ddf ADD $0x20,%R13 |
(47) 0x5de3 CMP %R13,%RBX |
(47) 0x5de6 JNE 5886 |
0x5dec VMOVAPS 0x3a0(%RSP),%YMM0 |
0x5df5 VADDPS 0x3c0(%RSP),%YMM0,%YMM0 |
0x5dfe VADDPS 0x3e0(%RSP),%YMM0,%YMM0 |
0x5e07 VADDPS %YMM0,%YMM1,%YMM0 |
0x5e0b VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x5e11 VADDPS %XMM1,%XMM0,%XMM0 |
0x5e15 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5e1a VADDPS %XMM1,%XMM0,%XMM0 |
0x5e1e VMOVSHDUP %XMM0,%XMM1 |
0x5e22 VADDSS %XMM1,%XMM0,%XMM2 |
0x5e26 MOV 0x120(%RSP),%RSI |
0x5e2e MOV 0x228(%RSP),%RBX |
0x5e36 CMP %RBX,%RSI |
0x5e39 JNE 5e56 |
0x5e3b MOV 0x88(%RSP),%R13 |
0x5e43 MOV 0x20(%RSP),%R8 |
0x5e48 VMOVAPS 0x2a0(%RSP),%XMM1 |
0x5e51 JMP 5fe4 |
0x5e56 TEST $0x1c,%SIL |
0x5e5a MOV 0x88(%RSP),%R13 |
0x5e62 MOV 0x20(%RSP),%R8 |
0x5e67 VMOVAPS 0x2a0(%RSP),%XMM1 |
0x5e70 MOV 0x310(%RSP),%R10 |
0x5e78 JE 5f9e |
0x5e7e MOV %RBX,%R13 |
0x5e81 VXORPS %XMM0,%XMM0,%XMM0 |
0x5e85 VMOVSS %XMM2,%XMM0,%XMM2 |
0x5e89 MOV $0x7fffffffffffffe0,%RAX |
0x5e93 ADD $0x1c,%RAX |
0x5e97 AND %RSI,%RAX |
0x5e9a MOV %RAX,0x228(%RSP) |
0x5ea2 VBROADCASTSS %XMM1,%XMM0 |
0x5ea7 VMOVAPS %XMM0,0x100(%RSP) |
(59) 0x5eb0 VMOVAPS %XMM2,0x160(%RSP) |
(59) 0x5eb9 VMOVUPS (%R8,%R13,4),%XMM0 |
(59) 0x5ebf VSUBPS 0x100(%RSP),%XMM0,%XMM0 |
(59) 0x5ec8 VMOVAPS %XMM0,0x60(%RSP) |
(59) 0x5ece MOV %R10,%RBX |
(59) 0x5ed1 VZEROUPPER |
(59) 0x5ed4 CALL 1160 <expf@plt> |
(59) 0x5ed9 VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x5ee2 VMOVSHDUP 0x60(%RSP),%XMM0 |
(59) 0x5ee8 CALL 1160 <expf@plt> |
(59) 0x5eed VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x5ef6 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(59) 0x5efc VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x5f05 VPERMILPD $0x1,0x60(%RSP),%XMM0 |
(59) 0x5f0d CALL 1160 <expf@plt> |
(59) 0x5f12 VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x5f1b VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(59) 0x5f21 VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x5f2a VPSHUFD $-0x1,0x60(%RSP),%XMM0 |
(59) 0x5f31 CALL 1160 <expf@plt> |
(59) 0x5f36 VMOVAPS 0x160(%RSP),%XMM2 |
(59) 0x5f3f MOV %RBX,%R10 |
(59) 0x5f42 MOV 0x20(%RSP),%R8 |
(59) 0x5f47 VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x5f50 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(59) 0x5f56 VADDPS %XMM2,%XMM0,%XMM2 |
(59) 0x5f5a ADD $0x4,%R13 |
(59) 0x5f5e CMP %R13,%RBX |
(59) 0x5f61 JNE 5eb0 |
0x5f67 VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 |
0x5f6c VADDPS %XMM0,%XMM2,%XMM0 |
0x5f70 VMOVSHDUP %XMM0,%XMM1 |
0x5f74 VADDSS %XMM1,%XMM0,%XMM2 |
0x5f78 MOV 0x120(%RSP),%RSI |
0x5f80 MOV 0x228(%RSP),%RBX |
0x5f88 CMP %RBX,%RSI |
0x5f8b MOV 0x88(%RSP),%R13 |
0x5f93 VMOVAPS 0x2a0(%RSP),%XMM1 |
0x5f9c JE 5fe4 |
(48) 0x5f9e VMOVAPS %XMM2,0x160(%RSP) |
(48) 0x5fa7 VMOVSS (%R8,%RBX,4),%XMM0 |
(48) 0x5fad VSUBSS %XMM1,%XMM0,%XMM0 |
(48) 0x5fb1 VZEROUPPER |
(48) 0x5fb4 CALL 1160 <expf@plt> |
(48) 0x5fb9 VMOVAPS 0x160(%RSP),%XMM2 |
(48) 0x5fc2 VMOVAPS 0x2a0(%RSP),%XMM1 |
(48) 0x5fcb MOV 0x20(%RSP),%R8 |
(48) 0x5fd0 MOV 0x120(%RSP),%RSI |
(48) 0x5fd8 VADDSS %XMM2,%XMM0,%XMM2 |
(48) 0x5fdc INC %RBX |
(48) 0x5fdf CMP %RBX,%RSI |
(48) 0x5fe2 JNE 5f9e |
0x5fe4 CMP $0x8,%RSI |
0x5fe8 VMOVAPS %XMM2,0x160(%RSP) |
0x5ff1 JAE 6006 |
0x5ff3 XOR %EBX,%EBX |
0x5ff5 MOV 0x230(%RSP),%RAX |
0x5ffd VMOVAPS %XMM1,%XMM2 |
0x6001 JMP 61c9 |
0x6006 MOV %RSI,%RAX |
0x6009 AND $-0x8,%RAX |
0x600d MOV %RAX,0xe0(%RSP) |
0x6015 MOV $0x7fffffffffffffe0,%RAX |
0x601f LEA 0x18(%RAX),%RBX |
0x6023 AND %RSI,%RBX |
0x6026 VBROADCASTSS %XMM2,%YMM0 |
0x602b VMOVAPS %YMM0,0x3c0(%RSP) |
0x6034 VBROADCASTSS %XMM1,%YMM0 |
0x6039 VMOVAPS %YMM0,0x3a0(%RSP) |
0x6042 XOR %R13D,%R13D |
(49) 0x6045 VMOVUPS (%R8,%R13,4),%YMM0 |
(49) 0x604b VSUBPS 0x3a0(%RSP),%YMM0,%YMM0 |
(49) 0x6054 VMOVAPS %YMM0,0x60(%RSP) |
(49) 0x605a VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(49) 0x6060 VMOVAPS %XMM0,0xa0(%RSP) |
(49) 0x6069 VZEROUPPER |
(49) 0x606c CALL 1160 <expf@plt> |
(49) 0x6071 VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x607a VMOVSHDUP 0xa0(%RSP),%XMM0 |
(49) 0x6083 CALL 1160 <expf@plt> |
(49) 0x6088 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x6091 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x6097 VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x60a0 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(49) 0x60ab CALL 1160 <expf@plt> |
(49) 0x60b0 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x60b9 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x60bf VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x60c8 VPSHUFD $-0x1,0xa0(%RSP),%XMM0 |
(49) 0x60d2 CALL 1160 <expf@plt> |
(49) 0x60d7 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x60e0 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x60e6 VMOVAPS %XMM0,0xa0(%RSP) |
(49) 0x60ef VMOVAPS 0x60(%RSP),%YMM0 |
(49) 0x60f5 VZEROUPPER |
(49) 0x60f8 CALL 1160 <expf@plt> |
(49) 0x60fd VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x6106 VMOVSHDUP 0x60(%RSP),%XMM0 |
(49) 0x610c CALL 1160 <expf@plt> |
(49) 0x6111 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x611a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x6120 VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x6129 VPERMILPD $0x1,0x60(%RSP),%XMM0 |
(49) 0x6131 CALL 1160 <expf@plt> |
(49) 0x6136 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x613f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x6145 VMOVAPS %XMM0,0x100(%RSP) |
(49) 0x614e VPSHUFD $-0x1,0x60(%RSP),%XMM0 |
(49) 0x6155 CALL 1160 <expf@plt> |
(49) 0x615a MOV 0x230(%RSP),%RAX |
(49) 0x6162 MOV 0x20(%RSP),%R8 |
(49) 0x6167 VMOVAPS 0x100(%RSP),%XMM1 |
(49) 0x6170 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x6176 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(49) 0x6181 VDIVPS 0x3c0(%RSP),%YMM0,%YMM0 |
(49) 0x618a VMOVUPS %YMM0,(%RAX,%R13,4) |
(49) 0x6190 ADD $0x8,%R13 |
(49) 0x6194 CMP %R13,0xe0(%RSP) |
(49) 0x619c JNE 6045 |
0x61a2 MOV 0x120(%RSP),%RSI |
0x61aa CMP %RBX,%RSI |
0x61ad MOV 0x88(%RSP),%R13 |
0x61b5 VMOVAPS 0x2a0(%RSP),%XMM2 |
0x61be VMOVAPS 0x160(%RSP),%XMM1 |
0x61c7 JE 621b |
(58) 0x61c9 MOV %RBX,0x60(%RSP) |
(58) 0x61ce VMOVSS (%R8,%RBX,4),%XMM0 |
(58) 0x61d4 VSUBSS %XMM2,%XMM0,%XMM0 |
(58) 0x61d8 MOV %RAX,%RBX |
(58) 0x61db VZEROUPPER |
(58) 0x61de CALL 1160 <expf@plt> |
(58) 0x61e3 VMOVAPS 0x2a0(%RSP),%XMM2 |
(58) 0x61ec MOV %RBX,%RAX |
(58) 0x61ef MOV 0x60(%RSP),%RBX |
(58) 0x61f4 VMOVAPS 0x160(%RSP),%XMM1 |
(58) 0x61fd MOV 0x20(%RSP),%R8 |
(58) 0x6202 MOV 0x120(%RSP),%RSI |
(58) 0x620a VDIVSS %XMM1,%XMM0,%XMM0 |
(58) 0x620e VMOVSS %XMM0,(%RAX,%RBX,4) |
(58) 0x6213 INC %RBX |
(58) 0x6216 CMP %RBX,%RSI |
(58) 0x6219 JNE 61c9 |
0x621b MOV 0x2b8(%RSP),%RCX |
0x6223 LEA 0x1(%RCX),%RBX |
0x6227 CMP %R13,%RBX |
0x622a JAE 56b1 |
0x6230 MOV 0x358(%RSP),%RDX |
0x6238 SUB %ECX,%EDX |
0x623a SAL $0x2,%RDX |
0x623e MOV $0x3fffffffc,%RAX |
0x6248 AND %RAX,%RDX |
0x624b ADD $0x4,%RDX |
0x624f MOV 0x350(%RSP),%RDI |
0x6257 IMUL %RCX,%RDI |
0x625b ADD $0x4,%RDI |
0x625f AND %RAX,%RDI |
0x6262 ADD 0x48(%RSP),%RDI |
0x6267 XOR %ESI,%ESI |
0x6269 VZEROUPPER |
0x626c CALL 1090 <memset@plt> |
0x6271 VMOVAPS 0x160(%RSP),%XMM1 |
0x627a MOV 0x2b8(%RSP),%RCX |
0x6282 MOV 0x20(%RSP),%R8 |
0x6287 MOV 0x120(%RSP),%RSI |
0x628f JMP 56b1 |
/home/eoseret/llm-attention/attention_v2.cpp: 43 - 61 |
-------------------------------------------------------------------------------- |
43: for (int row = 0; row < N; ++row) { |
44: const float *S_row = &S[row * N]; |
45: |
46: float max_val = -FLT_MAX; |
47: for (int idx = 0; idx <= row; ++idx) // vectorised |
48: if (S_row[idx] > max_val) max_val = S_row[idx]; |
49: |
50: float sum = 0.0f; |
51: #pragma clang loop vectorize(enable) |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
54: |
55: for (int idx = 0; idx <= row; ++idx) //vectorised |
56: P[row * N + idx] = expf(S_row[idx] - max_val) / sum; |
57: |
58: for (int idx = row + 1; idx < N; ++idx) |
59: P[row * N + idx] = 0.0f; |
60: |
61: D[row] = sum; |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.13 |
| CQA speedup if FP arith vectorized | 1.84 |
| CQA speedup if fully vectorized | 7.41 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.82 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 29.50 |
| CQA cycles if no scalar integer | 13.83 |
| CQA cycles if FP arith vectorized | 15.99 |
| CQA cycles if fully vectorized | 3.98 |
| Front-end cycles | 29.50 |
| P0 cycles | 16.20 |
| P1 cycles | 16.20 |
| P2 cycles | 12.00 |
| P3 cycles | 12.00 |
| P4 cycles | 10.00 |
| P5 cycles | 16.20 |
| P6 cycles | 16.20 |
| P7 cycles | 10.00 |
| P8 cycles | 10.00 |
| P9 cycles | 10.00 |
| P10 cycles | 16.20 |
| P11 cycles | 12.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 175.00 |
| Nb uops | 177.00 |
| Nb loads | 36.00 |
| Nb stores | 19.00 |
| Nb stack references | 21.00 |
| FLOP/cycle | 1.29 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 24.14 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 396.00 |
| Bytes stored | 316.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 40.34 |
| Vectorization ratio load | 32.14 |
| Vectorization ratio store | 47.37 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 46.15 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 41.94 |
| Vector-efficiency ratio all | 19.38 |
| Vector-efficiency ratio load | 18.53 |
| Vector-efficiency ratio store | 25.99 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 22.60 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.84 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.13 |
| CQA speedup if FP arith vectorized | 1.84 |
| CQA speedup if fully vectorized | 7.41 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.82 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 29.50 |
| CQA cycles if no scalar integer | 13.83 |
| CQA cycles if FP arith vectorized | 15.99 |
| CQA cycles if fully vectorized | 3.98 |
| Front-end cycles | 29.50 |
| P0 cycles | 16.20 |
| P1 cycles | 16.20 |
| P2 cycles | 12.00 |
| P3 cycles | 12.00 |
| P4 cycles | 10.00 |
| P5 cycles | 16.20 |
| P6 cycles | 16.20 |
| P7 cycles | 10.00 |
| P8 cycles | 10.00 |
| P9 cycles | 10.00 |
| P10 cycles | 16.20 |
| P11 cycles | 12.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 175.00 |
| Nb uops | 177.00 |
| Nb loads | 36.00 |
| Nb stores | 19.00 |
| Nb stack references | 21.00 |
| FLOP/cycle | 1.29 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 24.14 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 396.00 |
| Bytes stored | 316.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 40.34 |
| Vectorization ratio load | 32.14 |
| Vectorization ratio store | 47.37 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 46.15 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 41.94 |
| Vector-efficiency ratio all | 19.38 |
| Vector-efficiency ratio load | 18.53 |
| Vector-efficiency ratio store | 25.99 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 22.60 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.84 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-gnr256 |
| nb instructions | 175 |
| nb uops | 177 |
| loop length | 916 |
| used x86 registers | 11 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 29.50 cycles |
| front end | 29.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 16.20 | 16.20 | 12.00 | 12.00 | 10.00 | 16.20 | 16.20 | 10.00 | 10.00 | 10.00 | 16.20 | 12.00 |
| cycles | 16.20 | 16.20 | 12.00 | 12.00 | 10.00 | 16.20 | 16.20 | 10.00 | 10.00 | 10.00 | 16.20 | 12.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 29.50 |
| Dispatch | 16.20 |
| Overall L1 | 29.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 75% |
| store | 90% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 40% |
| load | 32% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 46% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 41% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 11% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 26% |
| load | 26% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 18% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x130(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVSS %XMM1,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x128(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x2c0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| ADD %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x230(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
| MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 6294 <main+0x3584> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RSI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| CMP $0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5711 <main+0x2a01> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x18f8(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 57d7 <main+0x2ac7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x20,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5723 <main+0x2a13> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x18e3(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 579c <main+0x2a8c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffe0,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VBROADCASTSS 0x18c6(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 | scal (6.3%) |
| VMOVAPS %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 57e5 <main+0x2ad5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0x1c,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 57d7 <main+0x2ac7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x1c,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 57e0 <main+0x2ad0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| CMP $0x3,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RSI,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x2a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV %RDX,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %RDI,0x230(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R9,0x2b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| JA 5824 <main+0x2b14> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 5f9e <main+0x328e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x20,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5835 <main+0x2b25> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 5e7e <main+0x316e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R10,0x310(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RSI,0x228(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x420(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS 0x3a0(%RSP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (50.0%) |
| VADDPS 0x3c0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | vect (50.0%) |
| VADDPS 0x3e0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x228(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JNE 5e56 <main+0x3146> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JMP 5fe4 <main+0x32d4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| TEST $0x1c,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x310(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JE 5f9e <main+0x328e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x1c,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x228(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x228(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 5fe4 <main+0x32d4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS %XMM2,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| JAE 6006 <main+0x32f6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x230(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (25.0%) |
| JMP 61c9 <main+0x34b9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x18(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RSI,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x160(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 621b <main+0x350b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x2b8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 56b1 <main+0x29a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x358(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x350(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| ADD 0x48(%RSP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x160(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x2b8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 56b1 <main+0x29a1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-gnr256 |
| nb instructions | 175 |
| nb uops | 177 |
| loop length | 916 |
| used x86 registers | 11 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 21 |
| micro-operation queue | 29.50 cycles |
| front end | 29.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 16.20 | 16.20 | 12.00 | 12.00 | 10.00 | 16.20 | 16.20 | 10.00 | 10.00 | 10.00 | 16.20 | 12.00 |
| cycles | 16.20 | 16.20 | 12.00 | 12.00 | 10.00 | 16.20 | 16.20 | 10.00 | 10.00 | 10.00 | 16.20 | 12.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 29.50 |
| Dispatch | 16.20 |
| Overall L1 | 29.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 75% |
| store | 90% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 40% |
| load | 32% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 46% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 41% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 11% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 26% |
| load | 26% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 18% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 22% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x130(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVSS %XMM1,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x128(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x2c0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| ADD %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x230(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
| MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 6294 <main+0x3584> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x4,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RSI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| CMP $0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5711 <main+0x2a01> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x18f8(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 57d7 <main+0x2ac7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x20,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5723 <main+0x2a13> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x18e3(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 579c <main+0x2a8c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffe0,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VBROADCASTSS 0x18c6(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 | scal (6.3%) |
| VMOVAPS %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 57e5 <main+0x2ad5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0x1c,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 57d7 <main+0x2ac7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x1c,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 57e0 <main+0x2ad0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| CMP $0x3,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RSI,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x2a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV %RDX,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %RDI,0x230(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R9,0x2b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| JA 5824 <main+0x2b14> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 5f9e <main+0x328e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x20,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 5835 <main+0x2b25> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 5e7e <main+0x316e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R10,0x310(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RSI,0x228(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x420(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS 0x3a0(%RSP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (50.0%) |
| VADDPS 0x3c0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | vect (50.0%) |
| VADDPS 0x3e0(%RSP),%YMM0,%YMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x228(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JNE 5e56 <main+0x3146> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JMP 5fe4 <main+0x32d4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| TEST $0x1c,%SIL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x310(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JE 5f9e <main+0x328e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x1c,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x228(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x228(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 5fe4 <main+0x32d4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS %XMM2,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| JAE 6006 <main+0x32f6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x230(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (25.0%) |
| JMP 61c9 <main+0x34b9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x18(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RSI,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x3a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x88(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VMOVAPS 0x2a0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x160(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 621b <main+0x350b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x2b8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 56b1 <main+0x29a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x358(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x350(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| ADD 0x48(%RSP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x160(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x2b8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x120(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 56b1 <main+0x29a1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
