| Loop Id: 43 | Module: attention-aocc-znver5-256 | Source: attention_v2.cpp:43-61 | Coverage: 0.15% |
|---|
| Loop Id: 43 | Module: attention-aocc-znver5-256 | Source: attention_v2.cpp:43-61 | Coverage: 0.15% |
|---|
0x5840 MOV 0x170(%RSP),%RAX |
0x5848 MOV 0x260(%RSP),%RDX |
0x5850 MOV 0x270(%RSP),%RSI |
0x5858 MOV 0x1b8(%RSP),%RDI |
0x5860 MOV %R12,%R9 |
0x5863 VMOVSS %XMM2,(%RAX,%RCX,4) |
0x5868 MOV 0x178(%RSP),%RCX |
0x5870 ADD %RDX,%RSI |
0x5873 ADD %RDX,%R8 |
0x5876 ADD %RDX,%RDI |
0x5879 INC %RCX |
0x587c CMP 0x60(%RSP),%R12 |
0x5881 MOV 0x118(%RSP),%R12 |
0x5889 JE 65f0 |
0x588f MOV %RCX,%R10 |
0x5892 MOV %RCX,%R11 |
0x5895 AND $-0x4,%R10 |
0x5899 AND $-0x20,%R11 |
0x589d MOV %RCX,%R12 |
0x58a0 CMP $0x4,%RCX |
0x58a4 JAE 58c0 |
0x58a6 VMOVSS -0x49fe(%RIP),%XMM1 |
0x58ae XOR %EAX,%EAX |
0x58b0 JMP 59b0 |
0x58c0 CMP $0x20,%R12 |
0x58c4 JAE 58e0 |
0x58c6 VMOVSS -0x4a1e(%RIP),%XMM1 |
0x58ce XOR %EAX,%EAX |
0x58d0 JMP 5962 |
0x58e0 VBROADCASTSS -0x4a39(%RIP),%YMM0 |
0x58e9 MOV $0x7ffffffffffffffc,%RAX |
0x58f3 XOR %ECX,%ECX |
0x58f5 ADD $-0x1c,%RAX |
0x58f9 AND %R12,%RAX |
0x58fc VMOVAPS %YMM0,%YMM1 |
0x5900 VMOVAPS %YMM0,%YMM2 |
0x5904 VMOVAPS %YMM0,%YMM3 |
0x5908 NOPL (%RAX,%RAX,1) |
(34) 0x5910 VMAXPS -0x60(%RSI,%RCX,4),%YMM0,%YMM0 |
(34) 0x5916 VMAXPS -0x40(%RSI,%RCX,4),%YMM1,%YMM1 |
(34) 0x591c VMAXPS -0x20(%RSI,%RCX,4),%YMM2,%YMM2 |
(34) 0x5922 VMAXPS (%RSI,%RCX,4),%YMM3,%YMM3 |
(34) 0x5927 ADD $0x20,%RCX |
(34) 0x592b CMP %RCX,%R11 |
(34) 0x592e JNE 5910 |
0x5930 VMAXPS %YMM1,%YMM0,%YMM0 |
0x5934 VMAXPS %YMM3,%YMM2,%YMM1 |
0x5938 VMAXPS %YMM1,%YMM0,%YMM0 |
0x593c VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x5942 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5946 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x594b VMAXPS %XMM1,%XMM0,%XMM0 |
0x594f VMOVSHDUP %XMM0,%XMM1 |
0x5953 VMAXSS %XMM1,%XMM0,%XMM1 |
0x5957 CMP %RAX,%R12 |
0x595a JE 59be |
0x595c TEST $0x1c,%R12B |
0x5960 JE 59b0 |
0x5962 MOV $0x7ffffffffffffffc,%RDX |
0x596c VBROADCASTSS %XMM1,%XMM0 |
0x5971 MOV %RAX,%RCX |
0x5974 MOV %R12,%RAX |
0x5977 AND %RDX,%RAX |
0x597a NOPW (%RAX,%RAX,1) |
(48) 0x5980 VMAXPS (%R8,%RCX,4),%XMM0,%XMM0 |
(48) 0x5986 ADD $0x4,%RCX |
(48) 0x598a CMP %RCX,%R10 |
(48) 0x598d JNE 5980 |
0x598f VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5994 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5998 VMOVSHDUP %XMM0,%XMM1 |
0x599c VMAXSS %XMM1,%XMM0,%XMM1 |
0x59a0 JMP 59b9 |
(47) 0x59b0 VMAXSS (%R8,%RAX,4),%XMM1,%XMM1 |
(47) 0x59b6 INC %RAX |
(47) 0x59b9 CMP %RAX,%R12 |
(47) 0x59bc JNE 59b0 |
0x59be MOV %RSI,0x270(%RSP) |
0x59c6 MOV %R12,0x178(%RSP) |
0x59ce MOV %RDI,0x1b8(%RSP) |
0x59d6 MOV %R8,0x20(%RSP) |
0x59db MOV %R9,0x268(%RSP) |
0x59e3 VMOVAPS %XMM1,0x1c0(%RSP) |
0x59ec MOV %R10,0x280(%RSP) |
0x59f4 CMP $0x4,%R12 |
0x59f8 JAE 5a10 |
0x59fa VXORPS %XMM2,%XMM2,%XMM2 |
0x59fe XOR %ECX,%ECX |
0x5a00 JMP 61a0 |
0x5a10 CMP $0x20,%R12 |
0x5a14 JAE 5a30 |
0x5a16 VXORPS %XMM2,%XMM2,%XMM2 |
0x5a1a XOR %ECX,%ECX |
0x5a1c JMP 605c |
0x5a30 MOV $0x7ffffffffffffffc,%RAX |
0x5a3a VBROADCASTSS %XMM1,%YMM0 |
0x5a3f VXORPS %XMM1,%XMM1,%XMM1 |
0x5a43 MOV %R11,0x2d0(%RSP) |
0x5a4b ADD $-0x1c,%RAX |
0x5a4f AND %R12,%RAX |
0x5a52 VMOVAPS %YMM0,0x340(%RSP) |
0x5a5b VXORPS %XMM0,%XMM0,%XMM0 |
0x5a5f XOR %R12D,%R12D |
0x5a62 MOV %RAX,0x150(%RSP) |
0x5a6a VMOVAPS %YMM0,0x300(%RSP) |
0x5a73 VMOVAPS %YMM0,0x2e0(%RSP) |
0x5a7c VMOVAPS %YMM0,0x320(%RSP) |
0x5a85 NOPW %CS:(%RAX,%RAX,1) |
(35) 0x5a90 VMOVAPS %YMM1,0x360(%RSP) |
(35) 0x5a99 VMOVUPS -0x60(%RSI,%R12,4),%YMM0 |
(35) 0x5aa0 VMOVAPS 0x340(%RSP),%YMM4 |
(35) 0x5aa9 VMOVUPS -0x40(%RSI,%R12,4),%YMM1 |
(35) 0x5ab0 VMOVUPS -0x20(%RSI,%R12,4),%YMM2 |
(35) 0x5ab7 VMOVUPS (%RSI,%R12,4),%YMM3 |
(35) 0x5abd VSUBPS %YMM4,%YMM0,%YMM5 |
(35) 0x5ac1 VSUBPS %YMM4,%YMM1,%YMM0 |
(35) 0x5ac5 VMOVAPS %YMM0,0xa0(%RSP) |
(35) 0x5ace VSUBPS %YMM4,%YMM2,%YMM0 |
(35) 0x5ad2 VMOVAPS %YMM5,0x80(%RSP) |
(35) 0x5adb VMOVAPS %YMM0,0x120(%RSP) |
(35) 0x5ae4 VSUBPS %YMM4,%YMM3,%YMM0 |
(35) 0x5ae8 VMOVAPS %YMM0,0xe0(%RSP) |
(35) 0x5af1 VEXTRACTF128 $0x1,%YMM5,%XMM0 |
(35) 0x5af7 VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5afd VZEROUPPER |
(35) 0x5b00 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5b05 VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5b0e VMOVSHDUP 0x40(%RSP),%XMM0 |
(35) 0x5b14 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5b19 VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5b22 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5b28 VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5b31 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(35) 0x5b39 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5b3e VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5b47 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5b4d VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5b56 VPERMILPS $-0x1,0x40(%RSP),%XMM0 |
(35) 0x5b5e CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5b63 VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5b6c VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5b72 VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5b78 VMOVAPS 0x80(%RSP),%YMM0 |
(35) 0x5b81 VZEROUPPER |
(35) 0x5b84 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5b89 VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5b92 VMOVSHDUP 0x80(%RSP),%XMM0 |
(35) 0x5b9b CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5ba0 VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5ba9 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5baf VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5bb8 VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(35) 0x5bc3 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5bc8 VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5bd1 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5bd7 VMOVAPS %XMM0,0x180(%RSP) |
(35) 0x5be0 VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(35) 0x5beb CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5bf0 VMOVAPS 0x180(%RSP),%XMM1 |
(35) 0x5bf9 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5bff VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 |
(35) 0x5c07 VMOVAPS 0x300(%RSP),%YMM1 |
(35) 0x5c10 VADDPS %YMM1,%YMM0,%YMM1 |
(35) 0x5c14 VMOVAPS 0xa0(%RSP),%YMM0 |
(35) 0x5c1d VMOVAPS %YMM1,0x300(%RSP) |
(35) 0x5c26 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(35) 0x5c2c VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5c35 VZEROUPPER |
(35) 0x5c38 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5c3d VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5c43 VMOVSHDUP 0x80(%RSP),%XMM0 |
(35) 0x5c4c CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5c51 VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5c57 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5c5d VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5c63 VPERMILPD $0x1,0x80(%RSP),%XMM0 |
(35) 0x5c6e CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5c73 VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5c79 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5c7f VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5c85 VPERMILPS $-0x1,0x80(%RSP),%XMM0 |
(35) 0x5c90 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5c95 VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5c9b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5ca1 VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5caa VMOVAPS 0xa0(%RSP),%YMM0 |
(35) 0x5cb3 VZEROUPPER |
(35) 0x5cb6 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5cbb VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5cc1 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(35) 0x5cca CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5ccf VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5cd5 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5cdb VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5ce1 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(35) 0x5cec CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5cf1 VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5cf7 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5cfd VMOVAPS %XMM0,0x40(%RSP) |
(35) 0x5d03 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(35) 0x5d0e CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5d13 VMOVAPS 0x40(%RSP),%XMM1 |
(35) 0x5d19 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5d1f VINSERTF128 $0x1,0x80(%RSP),%YMM0,%YMM0 |
(35) 0x5d2a VMOVAPS 0x2e0(%RSP),%YMM1 |
(35) 0x5d33 VADDPS %YMM1,%YMM0,%YMM1 |
(35) 0x5d37 VMOVAPS 0x120(%RSP),%YMM0 |
(35) 0x5d40 VMOVAPS %YMM1,0x2e0(%RSP) |
(35) 0x5d49 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(35) 0x5d4f VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5d58 VZEROUPPER |
(35) 0x5d5b CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5d60 VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5d69 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(35) 0x5d72 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5d77 VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5d80 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5d86 VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5d8f VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(35) 0x5d9a CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5d9f VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5da8 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5dae VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5db7 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(35) 0x5dc2 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5dc7 VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5dd0 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5dd6 VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5ddf VMOVAPS 0x120(%RSP),%YMM0 |
(35) 0x5de8 VZEROUPPER |
(35) 0x5deb CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5df0 VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5df9 VMOVSHDUP 0x120(%RSP),%XMM0 |
(35) 0x5e02 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5e07 VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5e10 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5e16 VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5e1f VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(35) 0x5e2a CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5e2f VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5e38 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5e3e VMOVAPS %XMM0,0x80(%RSP) |
(35) 0x5e47 VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(35) 0x5e52 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5e57 VMOVAPS 0x80(%RSP),%XMM1 |
(35) 0x5e60 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5e66 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(35) 0x5e71 VMOVAPS 0x320(%RSP),%YMM1 |
(35) 0x5e7a VADDPS %YMM1,%YMM0,%YMM1 |
(35) 0x5e7e VMOVAPS 0xe0(%RSP),%YMM0 |
(35) 0x5e87 VMOVAPS %YMM1,0x320(%RSP) |
(35) 0x5e90 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(35) 0x5e96 VMOVAPS %XMM0,0x120(%RSP) |
(35) 0x5e9f VZEROUPPER |
(35) 0x5ea2 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5ea7 VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5eb0 VMOVSHDUP 0x120(%RSP),%XMM0 |
(35) 0x5eb9 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5ebe VMOVAPS 0xa0(%RSP),%XMM1 |
(35) 0x5ec7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5ecd VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5ed6 VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(35) 0x5ee1 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5ee6 VMOVAPS 0xa0(%RSP),%XMM1 |
(35) 0x5eef VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5ef5 VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5efe VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(35) 0x5f09 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5f0e VMOVAPS 0xa0(%RSP),%XMM1 |
(35) 0x5f17 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(35) 0x5f1d VMOVAPS %XMM0,0x120(%RSP) |
(35) 0x5f26 VMOVAPS 0xe0(%RSP),%YMM0 |
(35) 0x5f2f VZEROUPPER |
(35) 0x5f32 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5f37 VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5f40 VMOVSHDUP 0xe0(%RSP),%XMM0 |
(35) 0x5f49 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5f4e VMOVAPS 0xa0(%RSP),%XMM1 |
(35) 0x5f57 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(35) 0x5f5d VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5f66 VPERMILPD $0x1,0xe0(%RSP),%XMM0 |
(35) 0x5f71 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5f76 VMOVAPS 0xa0(%RSP),%XMM1 |
(35) 0x5f7f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(35) 0x5f85 VMOVAPS %XMM0,0xa0(%RSP) |
(35) 0x5f8e VPERMILPS $-0x1,0xe0(%RSP),%XMM0 |
(35) 0x5f99 CALL 6fd0 <@plt_start@+0x20> |
(35) 0x5f9e VMOVAPS 0xa0(%RSP),%XMM2 |
(35) 0x5fa7 VMOVAPS 0x360(%RSP),%YMM1 |
(35) 0x5fb0 MOV 0x2d0(%RSP),%R11 |
(35) 0x5fb8 MOV 0x270(%RSP),%RSI |
(35) 0x5fc0 ADD $0x20,%R12 |
(35) 0x5fc4 VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
(35) 0x5fca VINSERTF128 $0x1,0x120(%RSP),%YMM0,%YMM0 |
(35) 0x5fd5 VADDPS %YMM1,%YMM0,%YMM1 |
(35) 0x5fd9 CMP %R12,%R11 |
(35) 0x5fdc JNE 5a90 |
0x5fe2 VMOVAPS 0x2e0(%RSP),%YMM0 |
0x5feb MOV 0x178(%RSP),%R12 |
0x5ff3 MOV 0x150(%RSP),%RCX |
0x5ffb VADDPS 0x300(%RSP),%YMM0,%YMM0 |
0x6004 VADDPS 0x320(%RSP),%YMM0,%YMM0 |
0x600d VADDPS %YMM0,%YMM1,%YMM0 |
0x6011 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x6017 VADDPS %XMM1,%XMM0,%XMM0 |
0x601b VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x6020 VADDPS %XMM1,%XMM0,%XMM0 |
0x6024 VMOVSHDUP %XMM0,%XMM1 |
0x6028 VADDSS %XMM1,%XMM0,%XMM2 |
0x602c CMP %RCX,%R12 |
0x602f JNE 6044 |
0x6031 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x603a MOV 0x20(%RSP),%R8 |
0x603f JMP 61ee |
0x6044 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x604d MOV 0x20(%RSP),%R8 |
0x6052 TEST $0x1c,%R12B |
0x6056 JE 61a0 |
0x605c VXORPS %XMM0,%XMM0,%XMM0 |
0x6060 VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 |
0x6066 VBROADCASTSS %XMM1,%XMM0 |
0x606b MOV $0x7ffffffffffffffc,%RDX |
0x6075 MOV %R12,%RAX |
0x6078 MOV %RAX,%RSI |
0x607b MOV %RCX,%R12 |
0x607e AND %RDX,%RSI |
0x6081 MOV %RSI,0x150(%RSP) |
0x6089 VMOVAPS %XMM0,0x80(%RSP) |
0x6092 NOPW %CS:(%RAX,%RAX,1) |
(46) 0x60a0 VMOVUPS (%R8,%R12,4),%XMM0 |
(46) 0x60a6 VMOVAPS %XMM2,0x120(%RSP) |
(46) 0x60af VSUBPS 0x80(%RSP),%XMM0,%XMM0 |
(46) 0x60b8 VMOVAPS %XMM0,0xe0(%RSP) |
(46) 0x60c1 VZEROUPPER |
(46) 0x60c4 CALL 6fd0 <@plt_start@+0x20> |
(46) 0x60c9 VMOVAPS %XMM0,0xa0(%RSP) |
(46) 0x60d2 VMOVSHDUP 0xe0(%RSP),%XMM0 |
(46) 0x60db CALL 6fd0 <@plt_start@+0x20> |
(46) 0x60e0 VMOVAPS 0xa0(%RSP),%XMM1 |
(46) 0x60e9 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(46) 0x60ef VMOVAPS %XMM0,0xa0(%RSP) |
(46) 0x60f8 VPERMILPD $0x1,0xe0(%RSP),%XMM0 |
(46) 0x6103 CALL 6fd0 <@plt_start@+0x20> |
(46) 0x6108 VMOVAPS 0xa0(%RSP),%XMM1 |
(46) 0x6111 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(46) 0x6117 VMOVAPS %XMM0,0xa0(%RSP) |
(46) 0x6120 VPERMILPS $-0x1,0xe0(%RSP),%XMM0 |
(46) 0x612b CALL 6fd0 <@plt_start@+0x20> |
(46) 0x6130 VMOVAPS 0xa0(%RSP),%XMM1 |
(46) 0x6139 VMOVAPS 0x120(%RSP),%XMM2 |
(46) 0x6142 MOV 0x20(%RSP),%R8 |
(46) 0x6147 ADD $0x4,%R12 |
(46) 0x614b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(46) 0x6151 VADDPS %XMM2,%XMM0,%XMM2 |
(46) 0x6155 CMP %R12,0x280(%RSP) |
(46) 0x615d JNE 60a0 |
0x6163 VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 |
0x6168 MOV 0x178(%RSP),%R12 |
0x6170 MOV 0x150(%RSP),%RCX |
0x6178 VADDPS %XMM0,%XMM2,%XMM0 |
0x617c VMOVSHDUP %XMM0,%XMM1 |
0x6180 VADDSS %XMM1,%XMM0,%XMM2 |
0x6184 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x618d CMP %RCX,%R12 |
0x6190 JE 61ee |
0x6192 NOPW %CS:(%RAX,%RAX,1) |
(36) 0x61a0 VMOVSS (%R8,%RCX,4),%XMM0 |
(36) 0x61a6 VMOVAPS %XMM2,0xe0(%RSP) |
(36) 0x61af MOV %RCX,0x150(%RSP) |
(36) 0x61b7 VSUBSS %XMM1,%XMM0,%XMM0 |
(36) 0x61bb VZEROUPPER |
(36) 0x61be CALL 6fd0 <@plt_start@+0x20> |
(36) 0x61c3 VMOVAPS 0xe0(%RSP),%XMM2 |
(36) 0x61cc VMOVAPS 0x1c0(%RSP),%XMM1 |
(36) 0x61d5 MOV 0x150(%RSP),%RCX |
(36) 0x61dd MOV 0x20(%RSP),%R8 |
(36) 0x61e2 INC %RCX |
(36) 0x61e5 VADDSS %XMM2,%XMM0,%XMM2 |
(36) 0x61e9 CMP %RCX,%R12 |
(36) 0x61ec JNE 61a0 |
0x61ee VMOVAPS %XMM2,0xe0(%RSP) |
0x61f7 CMP $0x4,%R12 |
0x61fb JAE 6210 |
0x61fd XOR %R12D,%R12D |
0x6200 JMP 6540 |
0x6210 CMP $0x8,%R12 |
0x6214 JAE 6350 |
0x621a MOV %R12,%RAX |
0x621d XOR %R12D,%R12D |
0x6220 VBROADCASTSS %XMM1,%XMM0 |
0x6225 MOV %R12,%RDX |
0x6228 MOV %RAX,%R12 |
0x622b MOV $0x7ffffffffffffffc,%RAX |
0x6235 VBROADCASTSS %XMM2,%XMM1 |
0x623a AND %RAX,%R12 |
0x623d VMOVAPS %XMM0,0x80(%RSP) |
0x6246 VMOVAPS %XMM1,0x40(%RSP) |
0x624c NOPL (%RAX) |
(44) 0x6250 VMOVUPS (%R8,%RDX,4),%XMM0 |
(44) 0x6256 MOV %RDX,0xa0(%RSP) |
(44) 0x625e VSUBPS 0x80(%RSP),%XMM0,%XMM0 |
(44) 0x6267 VMOVAPS %XMM0,0x120(%RSP) |
(44) 0x6270 VZEROUPPER |
(44) 0x6273 CALL 6fd0 <@plt_start@+0x20> |
(44) 0x6278 VMOVAPS %XMM0,0x150(%RSP) |
(44) 0x6281 VMOVSHDUP 0x120(%RSP),%XMM0 |
(44) 0x628a CALL 6fd0 <@plt_start@+0x20> |
(44) 0x628f VMOVAPS 0x150(%RSP),%XMM1 |
(44) 0x6298 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(44) 0x629e VMOVAPS %XMM0,0x150(%RSP) |
(44) 0x62a7 VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(44) 0x62b2 CALL 6fd0 <@plt_start@+0x20> |
(44) 0x62b7 VMOVAPS 0x150(%RSP),%XMM1 |
(44) 0x62c0 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(44) 0x62c6 VMOVAPS %XMM0,0x150(%RSP) |
(44) 0x62cf VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(44) 0x62da CALL 6fd0 <@plt_start@+0x20> |
(44) 0x62df VMOVAPS 0x150(%RSP),%XMM1 |
(44) 0x62e8 MOV 0xa0(%RSP),%RDX |
(44) 0x62f0 MOV 0x1b8(%RSP),%RCX |
(44) 0x62f8 MOV 0x20(%RSP),%R8 |
(44) 0x62fd VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(44) 0x6303 VDIVPS 0x40(%RSP),%XMM0,%XMM0 |
(44) 0x6309 VMOVUPS %XMM0,(%RCX,%RDX,4) |
(44) 0x630e ADD $0x4,%RDX |
(44) 0x6312 CMP %RDX,0x280(%RSP) |
(44) 0x631a JNE 6250 |
0x6320 VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6329 VMOVAPS 0xe0(%RSP),%XMM2 |
0x6332 MOV 0x178(%RSP),%RAX |
0x633a CMP %R12,%RAX |
0x633d JNE 6540 |
0x6343 JMP 658b |
0x6350 MOV %R12,%RAX |
0x6353 AND $-0x8,%RAX |
0x6357 VBROADCASTSS %XMM1,%YMM0 |
0x635c VBROADCASTSS %XMM2,%YMM1 |
0x6361 MOV %R12,%RCX |
0x6364 MOV %RAX,0x40(%RSP) |
0x6369 MOV $0x7ffffffffffffffc,%RAX |
0x6373 LEA -0x4(%RAX),%R12 |
0x6377 VMOVAPS %YMM0,0x300(%RSP) |
0x6380 VMOVAPS %YMM1,0x2e0(%RSP) |
0x6389 XOR %EAX,%EAX |
0x638b AND %RCX,%R12 |
0x638e XCHG %AX,%AX |
(37) 0x6390 VMOVUPS (%R8,%RAX,4),%YMM0 |
(37) 0x6396 MOV %RAX,0x150(%RSP) |
(37) 0x639e VSUBPS 0x300(%RSP),%YMM0,%YMM0 |
(37) 0x63a7 VMOVAPS %YMM0,0x120(%RSP) |
(37) 0x63b0 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(37) 0x63b6 VMOVAPS %XMM0,0xa0(%RSP) |
(37) 0x63bf VZEROUPPER |
(37) 0x63c2 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x63c7 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x63d0 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(37) 0x63d9 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x63de VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x63e7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x63ed VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x63f6 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(37) 0x6401 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x6406 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x640f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x6415 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x641e VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(37) 0x6429 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x642e VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6437 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x643d VMOVAPS %XMM0,0xa0(%RSP) |
(37) 0x6446 VMOVAPS 0x120(%RSP),%YMM0 |
(37) 0x644f VZEROUPPER |
(37) 0x6452 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x6457 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6460 VMOVSHDUP 0x120(%RSP),%XMM0 |
(37) 0x6469 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x646e VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x6477 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(37) 0x647d VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x6486 VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(37) 0x6491 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x6496 VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x649f VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(37) 0x64a5 VMOVAPS %XMM0,0x80(%RSP) |
(37) 0x64ae VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(37) 0x64b9 CALL 6fd0 <@plt_start@+0x20> |
(37) 0x64be VMOVAPS 0x80(%RSP),%XMM1 |
(37) 0x64c7 MOV 0x150(%RSP),%RAX |
(37) 0x64cf MOV 0x1b8(%RSP),%RCX |
(37) 0x64d7 MOV 0x20(%RSP),%R8 |
(37) 0x64dc VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(37) 0x64e2 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(37) 0x64ed VDIVPS 0x2e0(%RSP),%YMM0,%YMM0 |
(37) 0x64f6 VMOVUPS %YMM0,(%RCX,%RAX,4) |
(37) 0x64fb ADD $0x8,%RAX |
(37) 0x64ff CMP %RAX,0x40(%RSP) |
(37) 0x6504 JNE 6390 |
0x650a VMOVAPS 0x1c0(%RSP),%XMM1 |
0x6513 VMOVAPS 0xe0(%RSP),%XMM2 |
0x651c MOV 0x178(%RSP),%RAX |
0x6524 CMP %R12,%RAX |
0x6527 JE 658b |
0x6529 TEST $0x4,%AL |
0x652b JNE 6220 |
0x6531 NOPW %CS:(%RAX,%RAX,1) |
(45) 0x6540 VMOVSS (%R8,%R12,4),%XMM0 |
(45) 0x6546 VSUBSS %XMM1,%XMM0,%XMM0 |
(45) 0x654a VZEROUPPER |
(45) 0x654d CALL 6fd0 <@plt_start@+0x20> |
(45) 0x6552 VMOVAPS 0xe0(%RSP),%XMM2 |
(45) 0x655b VMOVAPS 0x1c0(%RSP),%XMM1 |
(45) 0x6564 MOV 0x1b8(%RSP),%RCX |
(45) 0x656c MOV 0x178(%RSP),%RAX |
(45) 0x6574 MOV 0x20(%RSP),%R8 |
(45) 0x6579 VDIVSS %XMM2,%XMM0,%XMM0 |
(45) 0x657d VMOVSS %XMM0,(%RCX,%R12,4) |
(45) 0x6583 INC %R12 |
(45) 0x6586 CMP %R12,%RAX |
(45) 0x6589 JNE 6540 |
0x658b MOV 0x268(%RSP),%RCX |
0x6593 LEA 0x1(%RCX),%R12 |
0x6597 CMP 0x60(%RSP),%R12 |
0x659c JAE 5840 |
0x65a2 MOV %RCX,%RDI |
0x65a5 IMUL 0x2b0(%RSP),%RDI |
0x65ae MOV 0x258(%RSP),%RDX |
0x65b6 LEA (,%RCX,4),%RAX |
0x65be XOR %ESI,%ESI |
0x65c0 SUB %RAX,%RDX |
0x65c3 ADD $0x4,%RDI |
0x65c7 ADD %R15,%RDI |
0x65ca VZEROUPPER |
0x65cd CALL 6fe0 <@plt_start@+0x30> |
0x65d2 VMOVAPS 0xe0(%RSP),%XMM2 |
0x65db MOV 0x268(%RSP),%RCX |
0x65e3 MOV 0x20(%RSP),%R8 |
0x65e8 JMP 5840 |
/home/eoseret/llm-attention/attention_v2.cpp: 43 - 61 |
-------------------------------------------------------------------------------- |
43: for (int row = 0; row < N; ++row) { |
44: const float *S_row = &S[row * N]; |
45: |
46: float max_val = -FLT_MAX; |
47: for (int idx = 0; idx <= row; ++idx) // vectorised |
48: if (S_row[idx] > max_val) max_val = S_row[idx]; |
49: |
50: float sum = 0.0f; |
51: #pragma clang loop vectorize(enable) |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
54: |
55: for (int idx = 0; idx <= row; ++idx) //vectorised |
56: P[row * N + idx] = expf(S_row[idx] - max_val) / sum; |
57: |
58: for (int idx = row + 1; idx < N; ++idx) |
59: P[row * N + idx] = 0.0f; |
60: |
61: D[row] = sum; |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.24 |
| CQA speedup if FP arith vectorized | 2.06 |
| CQA speedup if fully vectorized | 7.73 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.75 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 24.88 |
| CQA cycles if no scalar integer | 11.13 |
| CQA cycles if FP arith vectorized | 12.10 |
| CQA cycles if fully vectorized | 3.22 |
| Front-end cycles | 24.88 |
| P0 cycles | 11.50 |
| P1 cycles | 11.50 |
| P2 cycles | 11.50 |
| P3 cycles | 11.50 |
| P4 cycles | 11.50 |
| P5 cycles | 11.50 |
| P6 cycles | 14.25 |
| P7 cycles | 14.25 |
| P8 cycles | 14.25 |
| P9 cycles | 14.25 |
| P10 cycles | 8.50 |
| P11 cycles | 8.50 |
| P12 cycles | 8.50 |
| P13 cycles | 8.50 |
| P14 cycles | 6.00 |
| P15 cycles | 6.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 198.00 |
| Nb uops | 199.00 |
| Nb loads | 35.00 |
| Nb stores | 22.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 1.53 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 30.55 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 404.00 |
| Bytes stored | 356.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 42.98 |
| Vectorization ratio load | 44.00 |
| Vectorization ratio store | 50.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 54.55 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 38.24 |
| Vector-efficiency ratio all | 19.78 |
| Vector-efficiency ratio load | 20.25 |
| Vector-efficiency ratio store | 25.28 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.46 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.24 |
| CQA speedup if FP arith vectorized | 2.06 |
| CQA speedup if fully vectorized | 7.73 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.75 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 24.88 |
| CQA cycles if no scalar integer | 11.13 |
| CQA cycles if FP arith vectorized | 12.10 |
| CQA cycles if fully vectorized | 3.22 |
| Front-end cycles | 24.88 |
| P0 cycles | 11.50 |
| P1 cycles | 11.50 |
| P2 cycles | 11.50 |
| P3 cycles | 11.50 |
| P4 cycles | 11.50 |
| P5 cycles | 11.50 |
| P6 cycles | 14.25 |
| P7 cycles | 14.25 |
| P8 cycles | 14.25 |
| P9 cycles | 14.25 |
| P10 cycles | 8.50 |
| P11 cycles | 8.50 |
| P12 cycles | 8.50 |
| P13 cycles | 8.50 |
| P14 cycles | 6.00 |
| P15 cycles | 6.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 198.00 |
| Nb uops | 199.00 |
| Nb loads | 35.00 |
| Nb stores | 22.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 1.53 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 30.55 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 404.00 |
| Bytes stored | 356.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 42.98 |
| Vectorization ratio load | 44.00 |
| Vectorization ratio store | 50.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 54.55 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 38.24 |
| Vector-efficiency ratio all | 19.78 |
| Vector-efficiency ratio load | 20.25 |
| Vector-efficiency ratio store | 25.28 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.46 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-aocc-znver5-256 |
| nb instructions | 198 |
| nb uops | 199 |
| loop length | 1063 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 22 |
| micro-operation queue | 24.88 cycles |
| front end | 24.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 14.25 | 14.25 | 14.25 | 14.25 | 8.50 | 8.50 | 8.50 | 8.50 | 6.00 | 6.00 |
| cycles | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 14.25 | 14.25 | 14.25 | 14.25 | 8.50 | 8.50 | 8.50 | 8.50 | 6.00 | 6.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 24.88 |
| Dispatch | 14.25 |
| Overall L1 | 24.88 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 71% |
| all | 42% |
| load | 44% |
| store | 50% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 54% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 38% |
| all | 12% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 26% |
| load | 26% |
| store | 35% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 20% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x170(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x260(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x270(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1b8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %R12,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| MOV 0x178(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| ADD %RDX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RDX,%R8 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RDX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| INC %RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x60(%RSP),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x118(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JE 65f0 <main+0x29e0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| AND $-0x4,%R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%R11 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| CMP $0x4,%RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 58c0 <main+0x1cb0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS -0x49fe(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 59b0 <main+0x1da0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 58e0 <main+0x1cd0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS -0x4a1e(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5962 <main+0x1d52> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS -0x4a39(%RIP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| ADD $-0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM0,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RAX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 59be <main+0x1dae> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x1c,%R12B | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 59b0 <main+0x1da0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV $0x7ffffffffffffffc,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND %RDX,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| JMP 59b9 <main+0x1da9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,0x270(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R12,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RDI,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R9,0x268(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| MOV %R10,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| CMP $0x4,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5a10 <main+0x1e00> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 61a0 <main+0x2590> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5a30 <main+0x1e20> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 605c <main+0x244c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R11,0x2d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| ADD $-0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %YMM0,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x2e0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x178(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x150(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS 0x300(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS 0x320(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 6044 <main+0x2434> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 61ee <main+0x25de> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| TEST $0x1c,%R12B | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 61a0 <main+0x2590> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7ffffffffffffffc,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| AND %RDX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RSI,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| MOV 0x178(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x150(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| CMP %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 61ee <main+0x25de> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS %XMM2,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| CMP $0x4,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6210 <main+0x2600> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 6540 <main+0x2930> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x8,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6350 <main+0x2740> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| AND %RAX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS %XMM1,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x178(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| CMP %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 6540 <main+0x2930> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| JMP 658b <main+0x297b> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x8,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VBROADCASTSS %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| LEA -0x4(%RAX),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x178(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| CMP %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 658b <main+0x297b> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x4,%AL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JNE 6220 <main+0x2610> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV 0x268(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| LEA 0x1(%RCX),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x60(%RSP),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5840 <main+0x1c30> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| IMUL 0x2b0(%RSP),%RDI | 1 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
| MOV 0x258(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| LEA (,%RCX,4),%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| SUB %RAX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD $0x4,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD %R15,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fe0 <@plt_start@+0x30> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x268(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5840 <main+0x1c30> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-aocc-znver5-256 |
| nb instructions | 198 |
| nb uops | 199 |
| loop length | 1063 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 22 |
| micro-operation queue | 24.88 cycles |
| front end | 24.88 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 14.25 | 14.25 | 14.25 | 14.25 | 8.50 | 8.50 | 8.50 | 8.50 | 6.00 | 6.00 |
| cycles | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 11.50 | 14.25 | 14.25 | 14.25 | 14.25 | 8.50 | 8.50 | 8.50 | 8.50 | 6.00 | 6.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 24.88 |
| Dispatch | 14.25 |
| Overall L1 | 24.88 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 71% |
| all | 42% |
| load | 44% |
| store | 50% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 54% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 38% |
| all | 12% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 12% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 12% |
| all | 26% |
| load | 26% |
| store | 35% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 20% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x170(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x260(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x270(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1b8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %R12,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| MOV 0x178(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| ADD %RDX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RDX,%R8 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RDX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| INC %RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x60(%RSP),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV 0x118(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JE 65f0 <main+0x29e0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| AND $-0x4,%R10 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%R11 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| CMP $0x4,%RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 58c0 <main+0x1cb0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS -0x49fe(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 59b0 <main+0x1da0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 58e0 <main+0x1cd0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS -0x4a1e(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5962 <main+0x1d52> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS -0x4a39(%RIP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| ADD $-0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM0,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RAX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 59be <main+0x1dae> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x1c,%R12B | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 59b0 <main+0x1da0> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV $0x7ffffffffffffffc,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND %RDX,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| JMP 59b9 <main+0x1da9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,0x270(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R12,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RDI,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %R9,0x268(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| MOV %R10,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| CMP $0x4,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5a10 <main+0x1e00> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 61a0 <main+0x2590> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5a30 <main+0x1e20> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 605c <main+0x244c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R11,0x2d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| ADD $-0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %YMM0,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x2e0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x178(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x150(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS 0x300(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS 0x320(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 6044 <main+0x2434> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 61ee <main+0x25de> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| TEST $0x1c,%R12B | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 61a0 <main+0x2590> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VBLENDPS $0x1,%XMM2,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7ffffffffffffffc,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| AND %RDX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RSI,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| MOV 0x178(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x150(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| CMP %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 61ee <main+0x25de> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS %XMM2,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| CMP $0x4,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6210 <main+0x2600> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 6540 <main+0x2930> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x8,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6350 <main+0x2740> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| AND %RAX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %XMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VMOVAPS %XMM1,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x178(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| CMP %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 6540 <main+0x2930> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| JMP 658b <main+0x297b> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x8,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VBROADCASTSS %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV $0x7ffffffffffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| LEA -0x4(%RAX),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RCX,%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| VMOVAPS 0x1c0(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x178(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| CMP %R12,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 658b <main+0x297b> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x4,%AL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JNE 6220 <main+0x2610> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| MOV 0x268(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| LEA 0x1(%RCX),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP 0x60(%RSP),%R12 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| JAE 5840 <main+0x1c30> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| IMUL 0x2b0(%RSP),%RDI | 1 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
| MOV 0x258(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| LEA (,%RCX,4),%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| SUB %RAX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD $0x4,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD %R15,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 6fe0 <@plt_start@+0x30> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0xe0(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x268(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x20(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5840 <main+0x1c30> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
