| Loop Id: 41 | Module: attention-avx512 | Source: attention.cpp:43-284 [...] | Coverage: 0.39% |
|---|
| Loop Id: 41 | Module: attention-avx512 | Source: attention.cpp:43-284 [...] | Coverage: 0.39% |
|---|
0x5700 MOV 0x1e8(%RSP),%RAX |
0x5708 VMOVSS %XMM2,(%RAX,%RCX,4) |
0x570d MOV 0x1f0(%RSP),%RDX |
0x5715 INC %RDX |
0x5718 LEA (,%R15,4),%RAX |
0x5720 MOV 0x280(%RSP),%RDI |
0x5728 ADD %RAX,%RDI |
0x572b ADD %RAX,%RSI |
0x572e MOV 0x1f8(%RSP),%RCX |
0x5736 ADD %RAX,%RCX |
0x5739 MOV %RCX,%RAX |
0x573c MOV %RBX,%R8 |
0x573f CMP %R15,%RBX |
0x5742 MOV 0xd8(%RSP),%RBX |
0x574a MOV 0xd0(%RSP),%RCX |
0x5752 MOV 0x270(%RSP),%R12 |
0x575a JE 56c0 |
0x5760 MOV %RAX,0x1f8(%RSP) |
0x5768 MOV %RDX,%R9 |
0x576b AND $-0x8,%R9 |
0x576f MOV %RDX,%R10 |
0x5772 AND $-0x40,%R10 |
0x5776 CMP $0x8,%RDX |
0x577a MOV %RDX,%RBX |
0x577d JAE 5790 |
0x577f XOR %EAX,%EAX |
0x5781 VMOVSS 0x287b(%RIP),%XMM1 |
0x5789 JMP 58a0 |
0x5790 CMP $0x40,%RBX |
0x5794 JAE 57b0 |
0x5796 XOR %EAX,%EAX |
0x5798 VMOVSS 0x2864(%RIP),%XMM1 |
0x57a0 JMP 584a |
0x57b0 MOV %RBX,%RAX |
0x57b3 MOV $0x7fffffffffffffc0,%RCX |
0x57bd AND %RCX,%RAX |
0x57c0 XOR %ECX,%ECX |
0x57c2 VBROADCASTSS 0x2838(%RIP),%ZMM3 |
0x57cc VMOVAPS %ZMM3,%ZMM0 |
0x57d2 VMOVAPS %ZMM3,%ZMM1 |
0x57d8 VMOVAPS %ZMM3,%ZMM2 |
0x57de XCHG %AX,%AX |
(36) 0x57e0 VMAXPS -0xc0(%RDI,%RCX,4),%ZMM0,%ZMM0 |
(36) 0x57e8 VMAXPS -0x80(%RDI,%RCX,4),%ZMM1,%ZMM1 |
(36) 0x57f0 VMAXPS -0x40(%RDI,%RCX,4),%ZMM2,%ZMM2 |
(36) 0x57f8 VMAXPS (%RDI,%RCX,4),%ZMM3,%ZMM3 |
(36) 0x57ff ADD $0x40,%RCX |
(36) 0x5803 CMP %RCX,%R10 |
(36) 0x5806 JNE 57e0 |
0x5808 VMAXPS %ZMM1,%ZMM0,%ZMM0 |
0x580e VMAXPS %ZMM3,%ZMM2,%ZMM1 |
0x5814 VMAXPS %ZMM1,%ZMM0,%ZMM0 |
0x581a VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x5821 VMAXPS %YMM1,%YMM0,%YMM0 |
0x5825 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x582b VMAXPS %XMM1,%XMM0,%XMM0 |
0x582f VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5834 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5838 VMOVSHDUP %XMM0,%XMM1 |
0x583c VMAXSS %XMM1,%XMM0,%XMM1 |
0x5840 CMP %RAX,%RBX |
0x5843 JE 58ad |
0x5845 TEST $0x38,%BL |
0x5848 JE 58a0 |
0x584a MOV %RAX,%RCX |
0x584d MOV $0x7fffffffffffffc0,%RAX |
0x5857 ADD $0x38,%RAX |
0x585b AND %RBX,%RAX |
0x585e VBROADCASTSS %XMM1,%YMM0 |
0x5863 NOPW %CS:(%RAX,%RAX,1) |
(46) 0x5870 VMAXPS (%RSI,%RCX,4),%YMM0,%YMM0 |
(46) 0x5875 ADD $0x8,%RCX |
(46) 0x5879 CMP %RCX,%R9 |
(46) 0x587c JNE 5870 |
0x587e VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x5884 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5888 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x588d VMAXPS %XMM1,%XMM0,%XMM0 |
0x5891 VMOVSHDUP %XMM0,%XMM1 |
0x5895 VMAXSS %XMM1,%XMM0,%XMM1 |
0x5899 JMP 58a8 |
(45) 0x58a0 VMAXSS (%RSI,%RAX,4),%XMM1,%XMM1 |
(45) 0x58a5 INC %RAX |
(45) 0x58a8 CMP %RAX,%RBX |
(45) 0x58ab JNE 58a0 |
0x58ad CMP $0x8,%RBX |
0x58b1 MOV %RBX,0x1f0(%RSP) |
0x58b9 MOV %RSI,0x38(%RSP) |
0x58be VMOVAPS %XMM1,0x1c0(%RSP) |
0x58c7 MOV %RDI,0x280(%RSP) |
0x58cf MOV %R8,0x278(%RSP) |
0x58d7 JAE 58f0 |
0x58d9 VXORPS %XMM2,%XMM2,%XMM2 |
0x58dd XOR %R15D,%R15D |
0x58e0 JMP 6630 |
0x58f0 CMP $0x40,%RBX |
0x58f4 JAE 5910 |
0x58f6 VXORPS %XMM2,%XMM2,%XMM2 |
0x58fa XOR %R15D,%R15D |
0x58fd JMP 6442 |
0x5910 MOV %R9,0x298(%RSP) |
0x5918 MOV %RBX,%R15 |
0x591b MOV $0x7fffffffffffffc0,%RAX |
0x5925 AND %RAX,%R15 |
0x5928 VBROADCASTSS %XMM1,%ZMM0 |
0x592e VMOVAPS %ZMM0,0x380(%RSP) |
0x5936 VXORPS %XMM0,%XMM0,%XMM0 |
0x593a VMOVAPS %ZMM0,0x2c0(%RSP) |
0x5942 XOR %EAX,%EAX |
0x5944 VMOVAPS %ZMM0,0x300(%RSP) |
0x594c VMOVAPS %ZMM0,0x340(%RSP) |
0x5954 VXORPS %XMM1,%XMM1,%XMM1 |
0x5958 MOV %R10,0x2b0(%RSP) |
(37) 0x5960 VMOVAPS %ZMM1,0x3c0(%RSP) |
(37) 0x5968 MOV %RAX,0x2b8(%RSP) |
(37) 0x5970 VMOVUPS -0xc0(%RDI,%RAX,4),%ZMM0 |
(37) 0x5978 VMOVUPS -0x80(%RDI,%RAX,4),%ZMM1 |
(37) 0x5980 VMOVUPS -0x40(%RDI,%RAX,4),%ZMM2 |
(37) 0x5988 VMOVUPS (%RDI,%RAX,4),%ZMM3 |
(37) 0x598f VMOVAPS 0x380(%RSP),%ZMM4 |
(37) 0x5997 VSUBPS %ZMM4,%ZMM0,%ZMM5 |
(37) 0x599d VMOVAPS %ZMM5,0x80(%RSP) |
(37) 0x59a5 VSUBPS %ZMM4,%ZMM1,%ZMM0 |
(37) 0x59ab VMOVAPS %ZMM0,0x100(%RSP) |
(37) 0x59b3 VSUBPS %ZMM4,%ZMM2,%ZMM0 |
(37) 0x59b9 VMOVAPS %ZMM0,0x180(%RSP) |
(37) 0x59c1 VSUBPS %ZMM4,%ZMM3,%ZMM0 |
(37) 0x59c7 VMOVAPS %ZMM0,0x200(%RSP) |
(37) 0x59cf VEXTRACTF32X4 $0x3,%ZMM5,%XMM0 |
(37) 0x59d6 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x59dc VZEROUPPER |
(37) 0x59df CALL 1110 <expf@plt> |
(37) 0x59e4 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x59ed VMOVSHDUP 0x40(%RSP),%XMM0 |
(37) 0x59f3 CALL 1110 <expf@plt> |
(37) 0x59f8 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5a01 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5a07 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5a10 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(37) 0x5a18 CALL 1110 <expf@plt> |
(37) 0x5a1d VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5a26 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5a2c VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5a35 VPERMILPS $-0x1,0x40(%RSP),%XMM0 |
(37) 0x5a3d CALL 1110 <expf@plt> |
(37) 0x5a42 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5a4b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5a51 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5a5a VMOVAPS 0x80(%RSP),%ZMM0 |
(37) 0x5a62 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(37) 0x5a69 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5a6f VZEROUPPER |
(37) 0x5a72 CALL 1110 <expf@plt> |
(37) 0x5a77 VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5a80 VMOVSHDUP 0x40(%RSP),%XMM0 |
(37) 0x5a86 CALL 1110 <expf@plt> |
(37) 0x5a8b VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5a94 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5a9a VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5aa3 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(37) 0x5aab CALL 1110 <expf@plt> |
(37) 0x5ab0 VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5ab9 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5abf VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5ac8 VPERMILPS $-0x1,0x40(%RSP),%XMM0 |
(37) 0x5ad0 CALL 1110 <expf@plt> |
(37) 0x5ad5 VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5ade VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5ae4 VINSERTF128 $0x1,0x140(%RSP),%YMM0,%YMM0 |
(37) 0x5aef VMOVAPS %YMM0,0x140(%RSP) |
(37) 0x5af8 VMOVAPS 0x80(%RSP),%ZMM0 |
(37) 0x5b00 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(37) 0x5b06 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5b0c VZEROUPPER |
(37) 0x5b0f CALL 1110 <expf@plt> |
(37) 0x5b14 VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5b1d VMOVSHDUP 0x40(%RSP),%XMM0 |
(37) 0x5b23 CALL 1110 <expf@plt> |
(37) 0x5b28 VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5b31 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5b37 VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5b40 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(37) 0x5b48 CALL 1110 <expf@plt> |
(37) 0x5b4d VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5b56 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5b5c VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5b65 VPERMILPS $-0x1,0x40(%RSP),%XMM0 |
(37) 0x5b6d CALL 1110 <expf@plt> |
(37) 0x5b72 VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5b7b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5b81 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5b87 VMOVAPS 0x80(%RSP),%ZMM0 |
(37) 0x5b8f VZEROUPPER |
(37) 0x5b92 CALL 1110 <expf@plt> |
(37) 0x5b97 VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5ba0 VMOVSHDUP 0x80(%RSP),%XMM0 |
(37) 0x5ba9 CALL 1110 <expf@plt> |
(37) 0x5bae VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5bb7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5bbd VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5bc6 VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(37) 0x5bd1 CALL 1110 <expf@plt> |
(37) 0x5bd6 VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5bdf VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5be5 VMOVAPS %XMM0,0xc0(%RSP) |
(37) 0x5bee VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(37) 0x5bf9 CALL 1110 <expf@plt> |
(37) 0x5bfe VMOVAPS 0xc0(%RSP),%XMM1 |
(37) 0x5c07 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5c0d VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 |
(37) 0x5c15 VINSERTF64X4 $0x1,0x140(%RSP),%ZMM0,%ZMM0 |
(37) 0x5c1e VMOVAPS 0x2c0(%RSP),%ZMM1 |
(37) 0x5c26 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(37) 0x5c2c VMOVAPS %ZMM1,0x2c0(%RSP) |
(37) 0x5c34 VMOVAPS 0x100(%RSP),%ZMM0 |
(37) 0x5c3c VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(37) 0x5c43 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5c4c VZEROUPPER |
(37) 0x5c4f CALL 1110 <expf@plt> |
(37) 0x5c54 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5c5a VMOVSHDUP 0x80(%RSP),%XMM0 |
(37) 0x5c63 CALL 1110 <expf@plt> |
(37) 0x5c68 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5c6e VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5c74 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5c7a VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(37) 0x5c85 CALL 1110 <expf@plt> |
(37) 0x5c8a VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5c90 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5c96 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5c9c VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(37) 0x5ca7 CALL 1110 <expf@plt> |
(37) 0x5cac VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5cb2 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5cb8 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5cbe VMOVAPS 0x100(%RSP),%ZMM0 |
(37) 0x5cc6 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(37) 0x5ccd VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5cd6 VZEROUPPER |
(37) 0x5cd9 CALL 1110 <expf@plt> |
(37) 0x5cde VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5ce7 VMOVSHDUP 0x80(%RSP),%XMM0 |
(37) 0x5cf0 CALL 1110 <expf@plt> |
(37) 0x5cf5 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5cfe VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5d04 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5d0d VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(37) 0x5d18 CALL 1110 <expf@plt> |
(37) 0x5d1d VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5d26 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5d2c VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5d35 VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(37) 0x5d40 CALL 1110 <expf@plt> |
(37) 0x5d45 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5d4e VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5d54 VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 |
(37) 0x5d5c VMOVAPS %YMM0,0x40(%RSP) |
(37) 0x5d62 VMOVAPS 0x100(%RSP),%ZMM0 |
(37) 0x5d6a VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(37) 0x5d70 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5d79 VZEROUPPER |
(37) 0x5d7c CALL 1110 <expf@plt> |
(37) 0x5d81 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5d8a VMOVSHDUP 0x80(%RSP),%XMM0 |
(37) 0x5d93 CALL 1110 <expf@plt> |
(37) 0x5d98 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5da1 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5da7 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5db0 VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(37) 0x5dbb CALL 1110 <expf@plt> |
(37) 0x5dc0 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5dc9 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5dcf VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5dd8 VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(37) 0x5de3 CALL 1110 <expf@plt> |
(37) 0x5de8 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5df1 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5df7 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5e00 VMOVAPS 0x100(%RSP),%ZMM0 |
(37) 0x5e08 VZEROUPPER |
(37) 0x5e0b CALL 1110 <expf@plt> |
(37) 0x5e10 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5e19 VMOVSHDUP 0x100(%RSP),%XMM0 |
(37) 0x5e22 CALL 1110 <expf@plt> |
(37) 0x5e27 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5e30 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5e36 VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5e3f VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(37) 0x5e4a CALL 1110 <expf@plt> |
(37) 0x5e4f VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5e58 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5e5e VMOVAPS %XMM0,0x140(%RSP) |
(37) 0x5e67 VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(37) 0x5e72 CALL 1110 <expf@plt> |
(37) 0x5e77 VMOVAPS 0x140(%RSP),%XMM1 |
(37) 0x5e80 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5e86 VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 |
(37) 0x5e91 VINSERTF64X4 $0x1,0x40(%RSP),%ZMM0,%ZMM0 |
(37) 0x5e9a VMOVAPS 0x300(%RSP),%ZMM1 |
(37) 0x5ea2 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(37) 0x5ea8 VMOVAPS %ZMM1,0x300(%RSP) |
(37) 0x5eb0 VMOVAPS 0x180(%RSP),%ZMM0 |
(37) 0x5eb8 VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(37) 0x5ebf VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x5ec8 VZEROUPPER |
(37) 0x5ecb CALL 1110 <expf@plt> |
(37) 0x5ed0 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5ed9 VMOVSHDUP 0x100(%RSP),%XMM0 |
(37) 0x5ee2 CALL 1110 <expf@plt> |
(37) 0x5ee7 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x5ef0 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5ef6 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5eff VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(37) 0x5f0a CALL 1110 <expf@plt> |
(37) 0x5f0f VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x5f18 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5f1e VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5f27 VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(37) 0x5f32 CALL 1110 <expf@plt> |
(37) 0x5f37 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x5f40 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5f46 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x5f4f VMOVAPS 0x180(%RSP),%ZMM0 |
(37) 0x5f57 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(37) 0x5f5e VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x5f67 VZEROUPPER |
(37) 0x5f6a CALL 1110 <expf@plt> |
(37) 0x5f6f VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5f75 VMOVSHDUP 0x100(%RSP),%XMM0 |
(37) 0x5f7e CALL 1110 <expf@plt> |
(37) 0x5f83 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5f89 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x5f8f VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5f95 VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(37) 0x5fa0 CALL 1110 <expf@plt> |
(37) 0x5fa5 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5fab VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x5fb1 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x5fb7 VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(37) 0x5fc2 CALL 1110 <expf@plt> |
(37) 0x5fc7 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x5fcd VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x5fd3 VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 |
(37) 0x5fde VMOVAPS %YMM0,0x80(%RSP) |
(37) 0x5fe7 VMOVAPS 0x180(%RSP),%ZMM0 |
(37) 0x5fef VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(37) 0x5ff5 VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x5ffe VZEROUPPER |
(37) 0x6001 CALL 1110 <expf@plt> |
(37) 0x6006 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x600c VMOVSHDUP 0x100(%RSP),%XMM0 |
(37) 0x6015 CALL 1110 <expf@plt> |
(37) 0x601a VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x6020 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x6026 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x602c VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(37) 0x6037 CALL 1110 <expf@plt> |
(37) 0x603c VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x6042 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x6048 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x604e VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(37) 0x6059 CALL 1110 <expf@plt> |
(37) 0x605e VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x6064 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x606a VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x6073 VMOVAPS 0x180(%RSP),%ZMM0 |
(37) 0x607b VZEROUPPER |
(37) 0x607e CALL 1110 <expf@plt> |
(37) 0x6083 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x6089 VMOVSHDUP 0x180(%RSP),%XMM0 |
(37) 0x6092 CALL 1110 <expf@plt> |
(37) 0x6097 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x609d VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x60a3 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x60a9 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(37) 0x60b4 CALL 1110 <expf@plt> |
(37) 0x60b9 VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x60bf VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x60c5 VMOVAPS %XMM0,0x40(%RSP) |
(37) 0x60cb VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(37) 0x60d6 CALL 1110 <expf@plt> |
(37) 0x60db VMOVAPS 0x40(%RSP),%XMM1 |
(37) 0x60e1 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x60e7 VINSERTF128 $0x1,0x100(%RSP),%YMM0,%YMM0 |
(37) 0x60f2 VINSERTF64X4 $0x1,0x80(%RSP),%ZMM0,%ZMM0 |
(37) 0x60fb VMOVAPS 0x340(%RSP),%ZMM1 |
(37) 0x6103 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(37) 0x6109 VMOVAPS %ZMM1,0x340(%RSP) |
(37) 0x6111 VMOVAPS 0x200(%RSP),%ZMM0 |
(37) 0x6119 VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(37) 0x6120 VMOVAPS %XMM0,0x180(%RSP) |
(37) 0x6129 VZEROUPPER |
(37) 0x612c CALL 1110 <expf@plt> |
(37) 0x6131 VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x613a VMOVSHDUP 0x180(%RSP),%XMM0 |
(37) 0x6143 CALL 1110 <expf@plt> |
(37) 0x6148 VMOVAPS 0x100(%RSP),%XMM1 |
(37) 0x6151 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x6157 VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x6160 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(37) 0x616b CALL 1110 <expf@plt> |
(37) 0x6170 VMOVAPS 0x100(%RSP),%XMM1 |
(37) 0x6179 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x617f VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x6188 VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(37) 0x6193 CALL 1110 <expf@plt> |
(37) 0x6198 VMOVAPS 0x100(%RSP),%XMM1 |
(37) 0x61a1 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x61a7 VMOVAPS %XMM0,0x100(%RSP) |
(37) 0x61b0 VMOVAPS 0x200(%RSP),%ZMM0 |
(37) 0x61b8 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(37) 0x61bf VMOVAPS %XMM0,0x180(%RSP) |
(37) 0x61c8 VZEROUPPER |
(37) 0x61cb CALL 1110 <expf@plt> |
(37) 0x61d0 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x61d9 VMOVSHDUP 0x180(%RSP),%XMM0 |
(37) 0x61e2 CALL 1110 <expf@plt> |
(37) 0x61e7 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x61f0 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x61f6 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x61ff VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(37) 0x620a CALL 1110 <expf@plt> |
(37) 0x620f VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6218 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x621e VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6227 VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(37) 0x6232 CALL 1110 <expf@plt> |
(37) 0x6237 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6240 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x6246 VINSERTF128 $0x1,0x100(%RSP),%YMM0,%YMM0 |
(37) 0x6251 VMOVAPS %YMM0,0x100(%RSP) |
(37) 0x625a VMOVAPS 0x200(%RSP),%ZMM0 |
(37) 0x6262 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(37) 0x6268 VMOVAPS %XMM0,0x180(%RSP) |
(37) 0x6271 VZEROUPPER |
(37) 0x6274 CALL 1110 <expf@plt> |
(37) 0x6279 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6282 VMOVSHDUP 0x180(%RSP),%XMM0 |
(37) 0x628b CALL 1110 <expf@plt> |
(37) 0x6290 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6299 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x629f VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x62a8 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(37) 0x62b3 CALL 1110 <expf@plt> |
(37) 0x62b8 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x62c1 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x62c7 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x62d0 VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(37) 0x62db CALL 1110 <expf@plt> |
(37) 0x62e0 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x62e9 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x62ef VMOVAPS %XMM0,0x180(%RSP) |
(37) 0x62f8 VMOVAPS 0x200(%RSP),%ZMM0 |
(37) 0x6300 VZEROUPPER |
(37) 0x6303 CALL 1110 <expf@plt> |
(37) 0x6308 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6311 VMOVSHDUP 0x200(%RSP),%XMM0 |
(37) 0x631a CALL 1110 <expf@plt> |
(37) 0x631f VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6328 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x632e VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6337 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(37) 0x6342 CALL 1110 <expf@plt> |
(37) 0x6347 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6350 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x6356 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x635f VPERMILPS $-0x1,0x200(%RSP),%XMM0 |
(37) 0x636a CALL 1110 <expf@plt> |
(37) 0x636f VMOVAPS 0x3c0(%RSP),%ZMM1 |
(37) 0x6377 MOV 0x2b8(%RSP),%RAX |
(37) 0x637f MOV 0x2b0(%RSP),%R10 |
(37) 0x6387 MOV 0x280(%RSP),%RDI |
(37) 0x638f VMOVAPS 0x80(%RSP),%XMM2 |
(37) 0x6398 VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
(37) 0x639e VINSERTF128 $0x1,0x180(%RSP),%YMM0,%YMM0 |
(37) 0x63a9 VINSERTF64X4 $0x1,0x100(%RSP),%ZMM0,%ZMM0 |
(37) 0x63b2 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(37) 0x63b8 ADD $0x40,%RAX |
(37) 0x63bc CMP %RAX,%R10 |
(37) 0x63bf JNE 5960 |
0x63c5 VMOVAPS 0x300(%RSP),%ZMM0 |
0x63cd VADDPS 0x2c0(%RSP),%ZMM0,%ZMM0 |
0x63d5 VADDPS 0x340(%RSP),%ZMM0,%ZMM0 |
0x63dd VADDPS %ZMM0,%ZMM1,%ZMM0 |
0x63e3 VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x63ea VADDPS %ZMM1,%ZMM0,%ZMM0 |
0x63f0 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x63f6 VADDPS %XMM1,%XMM0,%XMM0 |
0x63fa VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x63ff VADDPS %XMM1,%XMM0,%XMM0 |
0x6403 VMOVSHDUP %XMM0,%XMM1 |
0x6407 VADDSS %XMM1,%XMM0,%XMM2 |
0x640b CMP %R15,%RBX |
0x640e JNE 6423 |
0x6410 MOV 0x38(%RSP),%RSI |
0x6415 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x641e JMP 666e |
0x6423 TEST $0x38,%BL |
0x6426 MOV 0x38(%RSP),%RSI |
0x642b VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6434 MOV 0x298(%RSP),%R9 |
0x643c JE 6630 |
0x6442 MOV %RBX,%RCX |
0x6445 MOV %R15,%RBX |
0x6448 VXORPS %XMM0,%XMM0,%XMM0 |
0x644c VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 |
0x6452 MOV $0x7fffffffffffffc0,%RAX |
0x645c ADD $0x38,%RAX |
0x6460 AND %RCX,%RAX |
0x6463 MOV %RAX,0x2c0(%RSP) |
0x646b VBROADCASTSS %XMM1,%YMM0 |
0x6470 VMOVAPS %YMM0,0x40(%RSP) |
0x6476 NOPW %CS:(%RAX,%RAX,1) |
(44) 0x6480 VMOVAPS %YMM2,0x100(%RSP) |
(44) 0x6489 VMOVUPS (%RSI,%RBX,4),%YMM0 |
(44) 0x648e VSUBPS 0x40(%RSP),%YMM0,%YMM0 |
(44) 0x6494 VMOVAPS %YMM0,0x200(%RSP) |
(44) 0x649d VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(44) 0x64a3 VMOVAPS %XMM0,0x180(%RSP) |
(44) 0x64ac MOV %R9,%R15 |
(44) 0x64af VZEROUPPER |
(44) 0x64b2 CALL 1110 <expf@plt> |
(44) 0x64b7 VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x64c0 VMOVSHDUP 0x180(%RSP),%XMM0 |
(44) 0x64c9 CALL 1110 <expf@plt> |
(44) 0x64ce VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x64d7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(44) 0x64dd VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x64e6 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(44) 0x64f1 CALL 1110 <expf@plt> |
(44) 0x64f6 VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x64ff VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(44) 0x6505 VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x650e VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(44) 0x6519 CALL 1110 <expf@plt> |
(44) 0x651e VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x6527 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(44) 0x652d VMOVAPS %XMM0,0x180(%RSP) |
(44) 0x6536 VMOVAPS 0x200(%RSP),%YMM0 |
(44) 0x653f VZEROUPPER |
(44) 0x6542 CALL 1110 <expf@plt> |
(44) 0x6547 VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x6550 VMOVSHDUP 0x200(%RSP),%XMM0 |
(44) 0x6559 CALL 1110 <expf@plt> |
(44) 0x655e VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x6567 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(44) 0x656d VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x6576 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(44) 0x6581 CALL 1110 <expf@plt> |
(44) 0x6586 VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x658f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(44) 0x6595 VMOVAPS %XMM0,0x80(%RSP) |
(44) 0x659e VPERMILPS $-0x1,0x200(%RSP),%XMM0 |
(44) 0x65a9 CALL 1110 <expf@plt> |
(44) 0x65ae VMOVAPS 0x100(%RSP),%YMM2 |
(44) 0x65b7 MOV %R15,%R9 |
(44) 0x65ba MOV 0x38(%RSP),%RSI |
(44) 0x65bf VMOVAPS 0x80(%RSP),%XMM1 |
(44) 0x65c8 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(44) 0x65ce VINSERTF128 $0x1,0x180(%RSP),%YMM0,%YMM0 |
(44) 0x65d9 VADDPS %YMM2,%YMM0,%YMM2 |
(44) 0x65dd ADD $0x8,%RBX |
(44) 0x65e1 CMP %RBX,%R15 |
(44) 0x65e4 JNE 6480 |
0x65ea VEXTRACTF128 $0x1,%YMM2,%XMM0 |
0x65f0 VADDPS %XMM0,%XMM2,%XMM0 |
0x65f4 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x65f9 VADDPS %XMM1,%XMM0,%XMM0 |
0x65fd VMOVSHDUP %XMM0,%XMM1 |
0x6601 VADDSS %XMM1,%XMM0,%XMM2 |
0x6605 MOV 0x1f0(%RSP),%RBX |
0x660d MOV 0x2c0(%RSP),%R15 |
0x6615 CMP %R15,%RBX |
0x6618 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6621 JE 666e |
0x6623 NOPW %CS:(%RAX,%RAX,1) |
(38) 0x6630 VMOVAPS %XMM2,0x200(%RSP) |
(38) 0x6639 VMOVSS (%RSI,%R15,4),%XMM0 |
(38) 0x663f VSUBSS %XMM1,%XMM0,%XMM0 |
(38) 0x6643 VZEROUPPER |
(38) 0x6646 CALL 1110 <expf@plt> |
(38) 0x664b VMOVAPS 0x200(%RSP),%XMM2 |
(38) 0x6654 VMOVAPS 0x1c0(%RSP),%XMM1 |
(38) 0x665d MOV 0x38(%RSP),%RSI |
(38) 0x6662 VADDSS %XMM2,%XMM0,%XMM2 |
(38) 0x6666 INC %R15 |
(38) 0x6669 CMP %R15,%RBX |
(38) 0x666c JNE 6630 |
0x666e CMP $0x4,%RBX |
0x6672 VMOVAPS %XMM2,0x200(%RSP) |
0x667b JAE 6690 |
0x667d XOR %R15D,%R15D |
0x6680 MOV 0x1f8(%RSP),%RDX |
0x6688 JMP 6ac0 |
0x6690 CMP $0x10,%RBX |
0x6694 JAE 66b0 |
0x6696 XOR %R15D,%R15D |
0x6699 MOV 0x1f8(%RSP),%RDX |
0x66a1 JMP 699f |
0x66b0 MOV %RBX,%R12 |
0x66b3 AND $-0x10,%R12 |
0x66b7 MOV $0x7fffffffffffffc0,%RAX |
0x66c1 LEA 0x30(%RAX),%R15 |
0x66c5 AND %RBX,%R15 |
0x66c8 VBROADCASTSS %XMM2,%ZMM0 |
0x66ce VMOVAPS %ZMM0,0x2c0(%RSP) |
0x66d6 VBROADCASTSS %XMM1,%ZMM0 |
0x66dc VMOVAPS %ZMM0,0x300(%RSP) |
0x66e4 XOR %EBX,%EBX |
0x66e6 NOPW %CS:(%RAX,%RAX,1) |
(39) 0x66f0 VMOVUPS (%RSI,%RBX,4),%ZMM0 |
(39) 0x66f7 VSUBPS 0x300(%RSP),%ZMM0,%ZMM0 |
(39) 0x66ff VMOVAPS %ZMM0,0x180(%RSP) |
(39) 0x6707 VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(39) 0x670e VMOVAPS %XMM0,0x100(%RSP) |
(39) 0x6717 VZEROUPPER |
(39) 0x671a CALL 1110 <expf@plt> |
(39) 0x671f VMOVAPS %XMM0,0x80(%RSP) |
(39) 0x6728 VMOVSHDUP 0x100(%RSP),%XMM0 |
(39) 0x6731 CALL 1110 <expf@plt> |
(39) 0x6736 VMOVAPS 0x80(%RSP),%XMM1 |
(39) 0x673f VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(39) 0x6745 VMOVAPS %XMM0,0x80(%RSP) |
(39) 0x674e VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(39) 0x6759 CALL 1110 <expf@plt> |
(39) 0x675e VMOVAPS 0x80(%RSP),%XMM1 |
(39) 0x6767 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(39) 0x676d VMOVAPS %XMM0,0x80(%RSP) |
(39) 0x6776 VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(39) 0x6781 CALL 1110 <expf@plt> |
(39) 0x6786 VMOVAPS 0x80(%RSP),%XMM1 |
(39) 0x678f VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(39) 0x6795 VMOVAPS %XMM0,0x80(%RSP) |
(39) 0x679e VMOVAPS 0x180(%RSP),%ZMM0 |
(39) 0x67a6 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(39) 0x67ad VMOVAPS %XMM0,0x100(%RSP) |
(39) 0x67b6 VZEROUPPER |
(39) 0x67b9 CALL 1110 <expf@plt> |
(39) 0x67be VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x67c4 VMOVSHDUP 0x100(%RSP),%XMM0 |
(39) 0x67cd CALL 1110 <expf@plt> |
(39) 0x67d2 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x67d8 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(39) 0x67de VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x67e4 VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(39) 0x67ef CALL 1110 <expf@plt> |
(39) 0x67f4 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x67fa VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(39) 0x6800 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x6806 VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(39) 0x6811 CALL 1110 <expf@plt> |
(39) 0x6816 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x681c VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(39) 0x6822 VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 |
(39) 0x682d VMOVAPS %YMM0,0x80(%RSP) |
(39) 0x6836 VMOVAPS 0x180(%RSP),%ZMM0 |
(39) 0x683e VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(39) 0x6844 VMOVAPS %XMM0,0x100(%RSP) |
(39) 0x684d VZEROUPPER |
(39) 0x6850 CALL 1110 <expf@plt> |
(39) 0x6855 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x685b VMOVSHDUP 0x100(%RSP),%XMM0 |
(39) 0x6864 CALL 1110 <expf@plt> |
(39) 0x6869 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x686f VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(39) 0x6875 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x687b VPERMILPD $0x1,0x100(%RSP),%XMM0 |
(39) 0x6886 CALL 1110 <expf@plt> |
(39) 0x688b VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x6891 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(39) 0x6897 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x689d VPERMILPS $-0x1,0x100(%RSP),%XMM0 |
(39) 0x68a8 CALL 1110 <expf@plt> |
(39) 0x68ad VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x68b3 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(39) 0x68b9 VMOVAPS %XMM0,0x100(%RSP) |
(39) 0x68c2 VMOVAPS 0x180(%RSP),%ZMM0 |
(39) 0x68ca VZEROUPPER |
(39) 0x68cd CALL 1110 <expf@plt> |
(39) 0x68d2 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x68d8 VMOVSHDUP 0x180(%RSP),%XMM0 |
(39) 0x68e1 CALL 1110 <expf@plt> |
(39) 0x68e6 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x68ec VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(39) 0x68f2 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x68f8 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(39) 0x6903 CALL 1110 <expf@plt> |
(39) 0x6908 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x690e VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(39) 0x6914 VMOVAPS %XMM0,0x40(%RSP) |
(39) 0x691a VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(39) 0x6925 CALL 1110 <expf@plt> |
(39) 0x692a MOV 0x1f8(%RSP),%RDX |
(39) 0x6932 MOV 0x38(%RSP),%RSI |
(39) 0x6937 VMOVAPS 0x40(%RSP),%XMM1 |
(39) 0x693d VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(39) 0x6943 VINSERTF128 $0x1,0x100(%RSP),%YMM0,%YMM0 |
(39) 0x694e VINSERTF64X4 $0x1,0x80(%RSP),%ZMM0,%ZMM0 |
(39) 0x6957 VDIVPS 0x2c0(%RSP),%ZMM0,%ZMM0 |
(39) 0x695f VMOVUPS %ZMM0,(%RDX,%RBX,4) |
(39) 0x6966 ADD $0x10,%RBX |
(39) 0x696a CMP %RBX,%R12 |
(39) 0x696d JNE 66f0 |
0x6973 MOV 0x1f0(%RSP),%RBX |
0x697b CMP %R15,%RBX |
0x697e VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6987 VMOVAPS 0x200(%RSP),%XMM2 |
0x6990 JE 6b01 |
0x6996 TEST $0xc,%BL |
0x6999 JE 6ac0 |
0x699f MOV %RBX,%RCX |
0x69a2 MOV %R15,%RBX |
0x69a5 MOV %RCX,%RAX |
0x69a8 AND $-0x4,%RAX |
0x69ac MOV %RAX,0x80(%RSP) |
0x69b4 MOV $0x7fffffffffffffc0,%RAX |
0x69be LEA 0x3c(%RAX),%R15 |
0x69c2 AND %RCX,%R15 |
0x69c5 VBROADCASTSS %XMM2,%XMM0 |
0x69ca VMOVAPS %XMM0,0x40(%RSP) |
0x69d0 VBROADCASTSS %XMM1,%XMM0 |
0x69d5 VMOVAPS %XMM0,0x2c0(%RSP) |
0x69de XCHG %AX,%AX |
(43) 0x69e0 VMOVUPS (%RSI,%RBX,4),%XMM0 |
(43) 0x69e5 VSUBPS 0x2c0(%RSP),%XMM0,%XMM0 |
(43) 0x69ee VMOVAPS %XMM0,0x180(%RSP) |
(43) 0x69f7 MOV %RDX,%R12 |
(43) 0x69fa VZEROUPPER |
(43) 0x69fd CALL 1110 <expf@plt> |
(43) 0x6a02 VMOVAPS %XMM0,0x100(%RSP) |
(43) 0x6a0b VMOVSHDUP 0x180(%RSP),%XMM0 |
(43) 0x6a14 CALL 1110 <expf@plt> |
(43) 0x6a19 VMOVAPS 0x100(%RSP),%XMM1 |
(43) 0x6a22 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(43) 0x6a28 VMOVAPS %XMM0,0x100(%RSP) |
(43) 0x6a31 VPERMILPD $0x1,0x180(%RSP),%XMM0 |
(43) 0x6a3c CALL 1110 <expf@plt> |
(43) 0x6a41 VMOVAPS 0x100(%RSP),%XMM1 |
(43) 0x6a4a VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(43) 0x6a50 VMOVAPS %XMM0,0x100(%RSP) |
(43) 0x6a59 VPERMILPS $-0x1,0x180(%RSP),%XMM0 |
(43) 0x6a64 CALL 1110 <expf@plt> |
(43) 0x6a69 MOV %R12,%RDX |
(43) 0x6a6c MOV 0x38(%RSP),%RSI |
(43) 0x6a71 VMOVAPS 0x100(%RSP),%XMM1 |
(43) 0x6a7a VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(43) 0x6a80 VDIVPS 0x40(%RSP),%XMM0,%XMM0 |
(43) 0x6a86 VMOVUPS %XMM0,(%R12,%RBX,4) |
(43) 0x6a8c ADD $0x4,%RBX |
(43) 0x6a90 CMP %RBX,0x80(%RSP) |
(43) 0x6a98 JNE 69e0 |
0x6a9e MOV 0x1f0(%RSP),%RBX |
0x6aa6 CMP %R15,%RBX |
0x6aa9 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6ab2 VMOVAPS 0x200(%RSP),%XMM2 |
0x6abb JE 6b01 |
0x6abd NOPL (%RAX) |
(42) 0x6ac0 VMOVSS (%RSI,%R15,4),%XMM0 |
(42) 0x6ac6 VSUBSS %XMM1,%XMM0,%XMM0 |
(42) 0x6aca MOV %RDX,%R12 |
(42) 0x6acd VZEROUPPER |
(42) 0x6ad0 CALL 1110 <expf@plt> |
(42) 0x6ad5 MOV %R12,%RDX |
(42) 0x6ad8 VMOVAPS 0x200(%RSP),%XMM2 |
(42) 0x6ae1 VMOVAPS 0x1c0(%RSP),%XMM1 |
(42) 0x6aea MOV 0x38(%RSP),%RSI |
(42) 0x6aef VDIVSS %XMM2,%XMM0,%XMM0 |
(42) 0x6af3 VMOVSS %XMM0,(%R12,%R15,4) |
(42) 0x6af9 INC %R15 |
(42) 0x6afc CMP %R15,%RBX |
(42) 0x6aff JNE 6ac0 |
0x6b01 MOV 0x278(%RSP),%RCX |
0x6b09 LEA 0x1(%RCX),%RBX |
0x6b0d MOV 0x70(%RSP),%R15 |
0x6b12 CMP %R15,%RBX |
0x6b15 JAE 5700 |
0x6b1b MOV 0x2a8(%RSP),%RDX |
0x6b23 SUB %ECX,%EDX |
0x6b25 SAL $0x2,%RDX |
0x6b29 MOV $0x3fffffffc,%RAX |
0x6b33 AND %RAX,%RDX |
0x6b36 ADD $0x4,%RDX |
0x6b3a MOV 0x2a0(%RSP),%RDI |
0x6b42 IMUL %RCX,%RDI |
0x6b46 ADD $0x4,%RDI |
0x6b4a AND %RAX,%RDI |
0x6b4d ADD 0xd0(%RSP),%RDI |
0x6b55 XOR %ESI,%ESI |
0x6b57 VZEROUPPER |
0x6b5a CALL 1070 <memset@plt> |
0x6b5f VMOVAPS 0x200(%RSP),%XMM2 |
0x6b68 MOV 0x278(%RSP),%RCX |
0x6b70 MOV 0x38(%RSP),%RSI |
0x6b75 JMP 5700 |
/home/eoseret/Applications/llm-attention/attention.cpp: 43 - 284 |
-------------------------------------------------------------------------------- |
43: for (int row = 0; row < N; ++row) { |
44: const float *S_row = &S[row * N]; |
45: |
46: float max_val = -FLT_MAX; |
47: for (int idx = 0; idx <= row; ++idx) // vectorised |
48: if (S_row[idx] > max_val) max_val = S_row[idx]; |
49: |
50: float sum = 0.0f; |
51: #pragma clang loop vectorize(enable) |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
54: |
55: for (int idx = 0; idx <= row; ++idx) //vectorised |
56: P[row * N + idx] = expf(S_row[idx] - max_val) / sum; |
57: |
58: for (int idx = row + 1; idx < N; ++idx) |
59: P[row * N + idx] = 0.0f; |
60: |
61: D[row] = sum; |
[...] |
284: for (size_t r = 0; r < rept; r++) |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __libc_init_first | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | attention-avx512 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.16 |
| CQA speedup if FP arith vectorized | 1.02 |
| CQA speedup if fully vectorized | 1.56 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.83 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention.cpp:43-44,attention.cpp:47-47,attention.cpp:52-52,attention.cpp:55-55,attention.cpp:58-61,attention.cpp:284-284 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 54.50 |
| CQA cycles if no scalar integer | 25.25 |
| CQA cycles if FP arith vectorized | 53.26 |
| CQA cycles if fully vectorized | 34.86 |
| Front-end cycles | 54.50 |
| P0 cycles | 29.67 |
| P1 cycles | 29.83 |
| P2 cycles | 19.83 |
| P3 cycles | 19.50 |
| P4 cycles | 22.00 |
| P5 cycles | 29.75 |
| P6 cycles | 29.75 |
| P7 cycles | 19.67 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 214.00 |
| Nb uops | 218.00 |
| Nb loads | 37.00 |
| Nb stores | 21.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 1.50 |
| Nb FLOP add-sub | 82.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 19.67 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 516.00 |
| Bytes stored | 556.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 46.88 |
| Vectorization ratio load | 40.74 |
| Vectorization ratio store | 52.38 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 57.14 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 46.38 |
| Vector-efficiency ratio all | 27.83 |
| Vector-efficiency ratio load | 25.23 |
| Vector-efficiency ratio store | 41.37 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 39.73 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 24.09 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.16 |
| CQA speedup if FP arith vectorized | 1.02 |
| CQA speedup if fully vectorized | 1.56 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.83 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention.cpp:43-44,attention.cpp:47-47,attention.cpp:52-52,attention.cpp:55-55,attention.cpp:58-61,attention.cpp:284-284 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 54.50 |
| CQA cycles if no scalar integer | 25.25 |
| CQA cycles if FP arith vectorized | 53.26 |
| CQA cycles if fully vectorized | 34.86 |
| Front-end cycles | 54.50 |
| P0 cycles | 29.67 |
| P1 cycles | 29.83 |
| P2 cycles | 19.83 |
| P3 cycles | 19.50 |
| P4 cycles | 22.00 |
| P5 cycles | 29.75 |
| P6 cycles | 29.75 |
| P7 cycles | 19.67 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 214.00 |
| Nb uops | 218.00 |
| Nb loads | 37.00 |
| Nb stores | 21.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 1.50 |
| Nb FLOP add-sub | 82.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 19.67 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 516.00 |
| Bytes stored | 556.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 46.88 |
| Vectorization ratio load | 40.74 |
| Vectorization ratio store | 52.38 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 57.14 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 46.38 |
| Vector-efficiency ratio all | 27.83 |
| Vector-efficiency ratio load | 25.23 |
| Vector-efficiency ratio store | 41.37 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 39.73 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 24.09 |
| Path / |
| Function | main |
| Source file and lines | attention.cpp:43-284 |
| Module | attention-avx512 |
| nb instructions | 214 |
| nb uops | 218 |
| loop length | 1136 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 3 |
| used zmm registers | 4 |
| nb stack references | 22 |
| micro-operation queue | 54.50 cycles |
| front end | 54.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 29.67 | 29.83 | 19.83 | 19.50 | 22.00 | 29.75 | 29.75 | 19.67 |
| cycles | 29.67 | 29.83 | 19.83 | 19.50 | 22.00 | 29.75 | 29.75 | 19.67 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 54.50 |
| Dispatch | 29.83 |
| Overall L1 | 54.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 79% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 80% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 75% |
| all | 46% |
| load | 40% |
| store | 52% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 57% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 46% |
| all | 12% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 39% |
| load | 37% |
| store | 63% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 51% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 32% |
| all | 27% |
| load | 25% |
| store | 41% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 39% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 24% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x1e8(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV 0x1f0(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA (,%R15,4),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV 0x280(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| ADD %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x1f8(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0xd8(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0xd0(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV 0x270(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JE 56c0 <main+0x1f10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RAX,0x1f8(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x40,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP $0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| JAE 5790 <main+0x1fe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVSS 0x287b(%RIP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| JMP 58a0 <main+0x20f0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 57b0 <main+0x2000> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVSS 0x2864(%RIP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| JMP 584a <main+0x209a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VBROADCASTSS 0x2838(%RIP),%ZMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 | scal (6.3%) |
| VMOVAPS %ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM3,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JE 58ad <main+0x20fd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST $0x38,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| JE 58a0 <main+0x20f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD $0x38,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 58a8 <main+0x20f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x8,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RBX,0x1f0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VMOVAPS %XMM1,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| MOV %RDI,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %R8,0x278(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| JAE 58f0 <main+0x2140> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| JMP 6630 <main+0x2e80> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5910 <main+0x2160> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| JMP 6442 <main+0x2c92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %R9,0x298(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x380(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| VMOVAPS %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVAPS %ZMM0,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VMOVAPS %ZMM0,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| MOV %R10,0x2b0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VMOVAPS 0x300(%RSP),%ZMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 | vect (100.0%) |
| VADDPS 0x2c0(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VADDPS 0x340(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VADDPS %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VADDPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 6423 <main+0x2c73> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JMP 666e <main+0x2ebe> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| TEST $0x38,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| MOV 0x298(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JE 6630 <main+0x2e80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD $0x38,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RAX,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (50.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VEXTRACTF128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0x2c0(%RSP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 666e <main+0x2ebe> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS %XMM2,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| JAE 6690 <main+0x2ee0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 6ac0 <main+0x3310> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x10,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 66b0 <main+0x2f00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 699f <main+0x31ef> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x10,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%RAX),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| AND %RBX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 6b01 <main+0x3351> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST $0xc,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| JE 6ac0 <main+0x3310> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x3c(%RAX),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| AND %RCX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 6b01 <main+0x3351> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x278(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV 0x70(%RSP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5700 <main+0x1f50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV 0x2a8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x2a0(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD 0xd0(%RSP),%RDI | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1070 <memset@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| MOV 0x278(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 5700 <main+0x1f50> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| Function | main |
| Source file and lines | attention.cpp:43-284 |
| Module | attention-avx512 |
| nb instructions | 214 |
| nb uops | 218 |
| loop length | 1136 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 3 |
| used zmm registers | 4 |
| nb stack references | 22 |
| micro-operation queue | 54.50 cycles |
| front end | 54.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 29.67 | 29.83 | 19.83 | 19.50 | 22.00 | 29.75 | 29.75 | 19.67 |
| cycles | 29.67 | 29.83 | 19.83 | 19.50 | 22.00 | 29.75 | 29.75 | 19.67 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 54.50 |
| Dispatch | 29.83 |
| Overall L1 | 54.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 79% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 80% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 75% |
| all | 46% |
| load | 40% |
| store | 52% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 57% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 46% |
| all | 12% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 39% |
| load | 37% |
| store | 63% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 51% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 32% |
| all | 27% |
| load | 25% |
| store | 41% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 39% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 24% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x1e8(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (6.3%) |
| MOV 0x1f0(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA (,%R15,4),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV 0x280(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| ADD %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x1f8(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0xd8(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0xd0(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV 0x270(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JE 56c0 <main+0x1f10> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RAX,0x1f8(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x40,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| CMP $0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| JAE 5790 <main+0x1fe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVSS 0x287b(%RIP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| JMP 58a0 <main+0x20f0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 57b0 <main+0x2000> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVSS 0x2864(%RIP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (6.3%) |
| JMP 584a <main+0x209a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VBROADCASTSS 0x2838(%RIP),%ZMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 | scal (6.3%) |
| VMOVAPS %ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (100.0%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM3,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JE 58ad <main+0x20fd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST $0x38,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| JE 58a0 <main+0x20f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD $0x38,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 58a8 <main+0x20f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x8,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RBX,0x1f0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VMOVAPS %XMM1,0x1c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| MOV %RDI,0x280(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %R8,0x278(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| JAE 58f0 <main+0x2140> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| JMP 6630 <main+0x2e80> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5910 <main+0x2160> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| JMP 6442 <main+0x2c92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %R9,0x298(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV %RBX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x380(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| VMOVAPS %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VMOVAPS %ZMM0,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VMOVAPS %ZMM0,0x340(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| MOV %R10,0x2b0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VMOVAPS 0x300(%RSP),%ZMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 | vect (100.0%) |
| VADDPS 0x2c0(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VADDPS 0x340(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VADDPS %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VADDPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JNE 6423 <main+0x2c73> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JMP 666e <main+0x2ebe> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| TEST $0x38,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| MOV 0x298(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JE 6630 <main+0x2e80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD $0x38,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RAX,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (50.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VEXTRACTF128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0x2c0(%RSP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 666e <main+0x2ebe> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS %XMM2,0x200(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| JAE 6690 <main+0x2ee0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 6ac0 <main+0x3310> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| CMP $0x10,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 66b0 <main+0x2f00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 699f <main+0x31ef> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
| MOV %RBX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $-0x10,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x30(%RAX),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| AND %RBX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x300(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (100.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 6b01 <main+0x3351> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| TEST $0xc,%BL | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| JE 6ac0 <main+0x3310> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| LEA 0x3c(%RAX),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| AND %RCX,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| VBROADCASTSS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | vect (25.0%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1f0(%RSP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| JE 6b01 <main+0x3351> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x278(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| MOV 0x70(%RSP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5700 <main+0x1f50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 | N/A |
| MOV 0x2a8(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x2a0(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| AND %RAX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | N/A |
| ADD 0xd0(%RSP),%RDI | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
| VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1070 <memset@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| VMOVAPS 0x200(%RSP),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | vect (25.0%) |
| MOV 0x278(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | N/A |
| MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| JMP 5700 <main+0x1f50> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 | N/A |
