| Loop Id: 57 | Module: attention-clang-znver5-256 | Source: attention_v2.cpp:43-61 | Coverage: 0.34% |
|---|
| Loop Id: 57 | Module: attention-clang-znver5-256 | Source: attention_v2.cpp:43-61 | Coverage: 0.34% |
|---|
0x5e65 MOV 0x1e8(%RSP),%RCX |
0x5e6d MOV 0x2e0(%RSP),%RDX |
0x5e75 MOV 0x180(%RSP),%RAX |
0x5e7d MOV %R15,%RDI |
0x5e80 INC %RSI |
0x5e83 MOV %RBX,0x160(%RSP) |
0x5e8b ADD %RCX,0x1f0(%RSP) |
0x5e93 ADD %RCX,%RDX |
0x5e96 ADD %RCX,%RDI |
0x5e99 VMOVSS %XMM1,(%RAX,%R14,4) |
0x5e9f CMP %R13,%RBX |
0x5ea2 JE 6a0c |
0x5ea8 MOV 0x1f0(%RSP),%R15 |
0x5eb0 MOV %RSI,%R8 |
0x5eb3 MOV %RSI,%R14 |
0x5eb6 AND $-0x4,%R8 |
0x5eba AND $-0x20,%R14 |
0x5ebe CMP $0x4,%RSI |
0x5ec2 JAE 5ed3 |
0x5ec4 VMOVSS 0x2138(%RIP),%XMM1 |
0x5ecc XOR %EAX,%EAX |
0x5ece JMP 5f99 |
0x5ed3 CMP $0x20,%RSI |
0x5ed7 JAE 5ee5 |
0x5ed9 VMOVSS 0x2123(%RIP),%XMM1 |
0x5ee1 XOR %EAX,%EAX |
0x5ee3 JMP 5f5e |
0x5ee5 VBROADCASTSS 0x2116(%RIP),%YMM3 |
0x5eee MOV $0x7fffffffffffffe0,%RCX |
0x5ef8 MOV %RSI,%RAX |
0x5efb AND %RCX,%RAX |
0x5efe XOR %ECX,%ECX |
0x5f00 VMOVAPS %YMM3,%YMM0 |
0x5f04 VMOVAPS %YMM3,%YMM1 |
0x5f08 VMOVAPS %YMM3,%YMM2 |
(46) 0x5f0c VMAXPS -0x60(%RDX,%RCX,4),%YMM0,%YMM0 |
(46) 0x5f12 VMAXPS -0x40(%RDX,%RCX,4),%YMM1,%YMM1 |
(46) 0x5f18 VMAXPS -0x20(%RDX,%RCX,4),%YMM2,%YMM2 |
(46) 0x5f1e VMAXPS (%RDX,%RCX,4),%YMM3,%YMM3 |
(46) 0x5f23 ADD $0x20,%RCX |
(46) 0x5f27 CMP %RCX,%R14 |
(46) 0x5f2a JNE 5f0c |
0x5f2c VMAXPS %YMM1,%YMM0,%YMM0 |
0x5f30 VMAXPS %YMM3,%YMM2,%YMM1 |
0x5f34 VMAXPS %YMM1,%YMM0,%YMM0 |
0x5f38 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x5f3e VMAXPS %XMM1,%XMM0,%XMM0 |
0x5f42 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5f47 VMAXPS %XMM1,%XMM0,%XMM0 |
0x5f4b VMOVSHDUP %XMM0,%XMM1 |
0x5f4f VMAXSS %XMM1,%XMM0,%XMM1 |
0x5f53 CMP %RAX,%RSI |
0x5f56 JE 5fa7 |
0x5f58 TEST $0x1c,%SIL |
0x5f5c JE 5f99 |
0x5f5e MOV %RAX,%RCX |
0x5f61 MOV $0x7fffffffffffffe0,%RAX |
0x5f6b VBROADCASTSS %XMM1,%XMM0 |
0x5f70 ADD $0x1c,%RAX |
0x5f74 AND %RSI,%RAX |
(61) 0x5f77 VMAXPS (%R15,%RCX,4),%XMM0,%XMM0 |
(61) 0x5f7d ADD $0x4,%RCX |
(61) 0x5f81 CMP %RCX,%R8 |
(61) 0x5f84 JNE 5f77 |
0x5f86 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x5f8b VMAXPS %XMM1,%XMM0,%XMM0 |
0x5f8f VMOVSHDUP %XMM0,%XMM1 |
0x5f93 VMAXSS %XMM1,%XMM0,%XMM1 |
0x5f97 JMP 5fa2 |
(60) 0x5f99 VMAXSS (%R15,%RAX,4),%XMM1,%XMM1 |
(60) 0x5f9f INC %RAX |
(60) 0x5fa2 CMP %RAX,%RSI |
(60) 0x5fa5 JNE 5f99 |
0x5fa7 MOV %RSI,0x30(%RSP) |
0x5fac VMOVAPS %XMM1,0x170(%RSP) |
0x5fb5 MOV %RDI,0x2d8(%RSP) |
0x5fbd MOV %RDX,0x2e0(%RSP) |
0x5fc5 CMP $0x3,%RSI |
0x5fc9 JA 5fde |
0x5fcb MOV 0x160(%RSP),%R14 |
0x5fd3 VXORPS %XMM2,%XMM2,%XMM2 |
0x5fd7 XOR %EBX,%EBX |
0x5fd9 JMP 672e |
0x5fde CMP $0x20,%RSI |
0x5fe2 JAE 5ff7 |
0x5fe4 MOV 0x160(%RSP),%R14 |
0x5fec VXORPS %XMM2,%XMM2,%XMM2 |
0x5ff0 XOR %EBX,%EBX |
0x5ff2 JMP 660c |
0x5ff7 VBROADCASTSS %XMM1,%YMM0 |
0x5ffc MOV $0x7fffffffffffffe0,%RAX |
0x6006 MOV %RSI,%RBX |
0x6009 MOV %R8,0x350(%RSP) |
0x6011 XOR %R13D,%R13D |
0x6014 VXORPS %XMM1,%XMM1,%XMM1 |
0x6018 AND %RAX,%RBX |
0x601b VMOVAPS %YMM0,0x440(%RSP) |
0x6024 VXORPS %XMM0,%XMM0,%XMM0 |
0x6028 VMOVAPS %YMM0,0x320(%RSP) |
0x6031 VMOVAPS %YMM0,0x3e0(%RSP) |
0x603a VMOVAPS %YMM0,0x400(%RSP) |
(47) 0x6043 VMOVAPS %YMM1,0x460(%RSP) |
(47) 0x604c VMOVUPS -0x60(%RDX,%R13,4),%YMM0 |
(47) 0x6053 VMOVAPS 0x440(%RSP),%YMM4 |
(47) 0x605c VMOVUPS -0x40(%RDX,%R13,4),%YMM1 |
(47) 0x6063 VMOVUPS -0x20(%RDX,%R13,4),%YMM2 |
(47) 0x606a VMOVUPS (%RDX,%R13,4),%YMM3 |
(47) 0x6070 VSUBPS %YMM4,%YMM0,%YMM5 |
(47) 0x6074 VSUBPS %YMM4,%YMM1,%YMM0 |
(47) 0x6078 VMOVAPS %YMM0,0xa0(%RSP) |
(47) 0x6081 VSUBPS %YMM4,%YMM2,%YMM0 |
(47) 0x6085 VMOVAPS %YMM5,0xe0(%RSP) |
(47) 0x608e VMOVAPS %YMM0,0x120(%RSP) |
(47) 0x6097 VSUBPS %YMM4,%YMM3,%YMM0 |
(47) 0x609b VMOVAPS %YMM0,0x140(%RSP) |
(47) 0x60a4 VEXTRACTF128 $0x1,%YMM5,%XMM0 |
(47) 0x60aa VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x60b0 VZEROUPPER |
(47) 0x60b3 CALL 1160 <expf@plt> |
(47) 0x60b8 VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x60c1 VMOVSHDUP 0x70(%RSP),%XMM0 |
(47) 0x60c7 CALL 1160 <expf@plt> |
(47) 0x60cc VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x60d5 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x60db VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x60e4 VPERMILPD $0x1,0x70(%RSP),%XMM0 |
(47) 0x60ec CALL 1160 <expf@plt> |
(47) 0x60f1 VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x60fa VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6100 VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x6109 VPERMILPS $-0x1,0x70(%RSP),%XMM0 |
(47) 0x6111 CALL 1160 <expf@plt> |
(47) 0x6116 VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x611f VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6125 VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x612b VMOVAPS 0xe0(%RSP),%YMM0 |
(47) 0x6134 VZEROUPPER |
(47) 0x6137 CALL 1160 <expf@plt> |
(47) 0x613c VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x6145 VMOVSHDUP 0xe0(%RSP),%XMM0 |
(47) 0x614e CALL 1160 <expf@plt> |
(47) 0x6153 VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x615c VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6162 VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x616b VPERMILPD $0x1,0xe0(%RSP),%XMM0 |
(47) 0x6176 CALL 1160 <expf@plt> |
(47) 0x617b VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x6184 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x618a VMOVAPS %XMM0,0x1d0(%RSP) |
(47) 0x6193 VPERMILPS $-0x1,0xe0(%RSP),%XMM0 |
(47) 0x619e CALL 1160 <expf@plt> |
(47) 0x61a3 VMOVAPS 0x1d0(%RSP),%XMM1 |
(47) 0x61ac VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x61b2 VINSERTF128 $0x1,0x70(%RSP),%YMM0,%YMM0 |
(47) 0x61ba VMOVAPS 0x320(%RSP),%YMM1 |
(47) 0x61c3 VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x61c7 VMOVAPS 0xa0(%RSP),%YMM0 |
(47) 0x61d0 VMOVAPS %YMM1,0x320(%RSP) |
(47) 0x61d9 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x61df VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x61e8 VZEROUPPER |
(47) 0x61eb CALL 1160 <expf@plt> |
(47) 0x61f0 VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x61f6 VMOVSHDUP 0xe0(%RSP),%XMM0 |
(47) 0x61ff CALL 1160 <expf@plt> |
(47) 0x6204 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x620a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6210 VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x6216 VPERMILPD $0x1,0xe0(%RSP),%XMM0 |
(47) 0x6221 CALL 1160 <expf@plt> |
(47) 0x6226 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x622c VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6232 VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x6238 VPERMILPS $-0x1,0xe0(%RSP),%XMM0 |
(47) 0x6243 CALL 1160 <expf@plt> |
(47) 0x6248 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x624e VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6254 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x625d VMOVAPS 0xa0(%RSP),%YMM0 |
(47) 0x6266 VZEROUPPER |
(47) 0x6269 CALL 1160 <expf@plt> |
(47) 0x626e VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x6274 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(47) 0x627d CALL 1160 <expf@plt> |
(47) 0x6282 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x6288 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x628e VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x6294 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(47) 0x629f CALL 1160 <expf@plt> |
(47) 0x62a4 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x62aa VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x62b0 VMOVAPS %XMM0,0x70(%RSP) |
(47) 0x62b6 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(47) 0x62c1 CALL 1160 <expf@plt> |
(47) 0x62c6 VMOVAPS 0x70(%RSP),%XMM1 |
(47) 0x62cc VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x62d2 VINSERTF128 $0x1,0xe0(%RSP),%YMM0,%YMM0 |
(47) 0x62dd VMOVAPS 0x3e0(%RSP),%YMM1 |
(47) 0x62e6 VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x62ea VMOVAPS 0x120(%RSP),%YMM0 |
(47) 0x62f3 VMOVAPS %YMM1,0x3e0(%RSP) |
(47) 0x62fc VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x6302 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x630b VZEROUPPER |
(47) 0x630e CALL 1160 <expf@plt> |
(47) 0x6313 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x631c VMOVSHDUP 0xa0(%RSP),%XMM0 |
(47) 0x6325 CALL 1160 <expf@plt> |
(47) 0x632a VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x6333 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6339 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x6342 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(47) 0x634d CALL 1160 <expf@plt> |
(47) 0x6352 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x635b VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6361 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x636a VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(47) 0x6375 CALL 1160 <expf@plt> |
(47) 0x637a VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x6383 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6389 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x6392 VMOVAPS 0x120(%RSP),%YMM0 |
(47) 0x639b VZEROUPPER |
(47) 0x639e CALL 1160 <expf@plt> |
(47) 0x63a3 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x63ac VMOVSHDUP 0x120(%RSP),%XMM0 |
(47) 0x63b5 CALL 1160 <expf@plt> |
(47) 0x63ba VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x63c3 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x63c9 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x63d2 VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(47) 0x63dd CALL 1160 <expf@plt> |
(47) 0x63e2 VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x63eb VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x63f1 VMOVAPS %XMM0,0xe0(%RSP) |
(47) 0x63fa VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(47) 0x6405 CALL 1160 <expf@plt> |
(47) 0x640a VMOVAPS 0xe0(%RSP),%XMM1 |
(47) 0x6413 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6419 VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(47) 0x6424 VMOVAPS 0x400(%RSP),%YMM1 |
(47) 0x642d VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x6431 VMOVAPS 0x140(%RSP),%YMM0 |
(47) 0x643a VMOVAPS %YMM1,0x400(%RSP) |
(47) 0x6443 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x6449 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6452 VZEROUPPER |
(47) 0x6455 CALL 1160 <expf@plt> |
(47) 0x645a VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x6463 VMOVSHDUP 0x120(%RSP),%XMM0 |
(47) 0x646c CALL 1160 <expf@plt> |
(47) 0x6471 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x647a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6480 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x6489 VPERMILPD $0x1,0x120(%RSP),%XMM0 |
(47) 0x6494 CALL 1160 <expf@plt> |
(47) 0x6499 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x64a2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x64a8 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x64b1 VPERMILPS $-0x1,0x120(%RSP),%XMM0 |
(47) 0x64bc CALL 1160 <expf@plt> |
(47) 0x64c1 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x64ca VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x64d0 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x64d9 VMOVAPS 0x140(%RSP),%YMM0 |
(47) 0x64e2 VZEROUPPER |
(47) 0x64e5 CALL 1160 <expf@plt> |
(47) 0x64ea VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x64f3 VMOVSHDUP 0x140(%RSP),%XMM0 |
(47) 0x64fc CALL 1160 <expf@plt> |
(47) 0x6501 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x650a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6510 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x6519 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(47) 0x6524 CALL 1160 <expf@plt> |
(47) 0x6529 VMOVAPS 0xa0(%RSP),%XMM1 |
(47) 0x6532 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6538 VMOVAPS %XMM0,0xa0(%RSP) |
(47) 0x6541 VPERMILPS $-0x1,0x140(%RSP),%XMM0 |
(47) 0x654c CALL 1160 <expf@plt> |
(47) 0x6551 VMOVAPS 0xa0(%RSP),%XMM2 |
(47) 0x655a VMOVAPS 0x460(%RSP),%YMM1 |
(47) 0x6563 MOV 0x2e0(%RSP),%RDX |
(47) 0x656b ADD $0x20,%R13 |
(47) 0x656f VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
(47) 0x6575 VINSERTF128 $0x1,0x120(%RSP),%YMM0,%YMM0 |
(47) 0x6580 VADDPS %YMM1,%YMM0,%YMM1 |
(47) 0x6584 CMP %R13,%R14 |
(47) 0x6587 JNE 6043 |
0x658d VMOVAPS 0x3e0(%RSP),%YMM0 |
0x6596 MOV 0x30(%RSP),%RSI |
0x659b MOV 0x160(%RSP),%R14 |
0x65a3 VADDPS 0x320(%RSP),%YMM0,%YMM0 |
0x65ac VADDPS 0x400(%RSP),%YMM0,%YMM0 |
0x65b5 VADDPS %YMM0,%YMM1,%YMM0 |
0x65b9 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x65bf VADDPS %XMM1,%XMM0,%XMM0 |
0x65c3 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x65c8 VADDPS %XMM1,%XMM0,%XMM0 |
0x65cc VMOVSHDUP %XMM0,%XMM1 |
0x65d0 VADDSS %XMM1,%XMM0,%XMM2 |
0x65d4 CMP %RBX,%RSI |
0x65d7 JNE 65ec |
0x65d9 VMOVAPS 0x170(%RSP),%XMM1 |
0x65e2 MOV 0x40(%RSP),%R13 |
0x65e7 JMP 676c |
0x65ec VMOVAPS 0x170(%RSP),%XMM1 |
0x65f5 MOV 0x40(%RSP),%R13 |
0x65fa MOV 0x350(%RSP),%R8 |
0x6602 TEST $0x1c,%SIL |
0x6606 JE 672e |
0x660c MOV $0x7fffffffffffffe0,%RAX |
0x6616 MOV %RBX,%R13 |
0x6619 VXORPS %XMM0,%XMM0,%XMM0 |
0x661d VMOVSS %XMM2,%XMM0,%XMM2 |
0x6621 VBROADCASTSS %XMM1,%XMM0 |
0x6626 MOV %R14,0x160(%RSP) |
0x662e LEA 0x1c(%RAX),%RBX |
0x6632 AND %RSI,%RBX |
0x6635 VMOVAPS %XMM0,0xe0(%RSP) |
(59) 0x663e VMOVUPS (%R15,%R13,4),%XMM0 |
(59) 0x6644 VMOVAPS %XMM2,0x120(%RSP) |
(59) 0x664d MOV %R8,%R14 |
(59) 0x6650 VSUBPS 0xe0(%RSP),%XMM0,%XMM0 |
(59) 0x6659 VMOVAPS %XMM0,0x140(%RSP) |
(59) 0x6662 VZEROUPPER |
(59) 0x6665 CALL 1160 <expf@plt> |
(59) 0x666a VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x6673 VMOVSHDUP 0x140(%RSP),%XMM0 |
(59) 0x667c CALL 1160 <expf@plt> |
(59) 0x6681 VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x668a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(59) 0x6690 VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x6699 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(59) 0x66a4 CALL 1160 <expf@plt> |
(59) 0x66a9 VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x66b2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(59) 0x66b8 VMOVAPS %XMM0,0xa0(%RSP) |
(59) 0x66c1 VPERMILPS $-0x1,0x140(%RSP),%XMM0 |
(59) 0x66cc CALL 1160 <expf@plt> |
(59) 0x66d1 VMOVAPS 0xa0(%RSP),%XMM1 |
(59) 0x66da VMOVAPS 0x120(%RSP),%XMM2 |
(59) 0x66e3 ADD $0x4,%R13 |
(59) 0x66e7 MOV %R14,%R8 |
(59) 0x66ea VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(59) 0x66f0 VADDPS %XMM2,%XMM0,%XMM2 |
(59) 0x66f4 CMP %R13,%R14 |
(59) 0x66f7 JNE 663e |
0x66fd VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 |
0x6702 MOV 0x30(%RSP),%RSI |
0x6707 MOV 0x40(%RSP),%R13 |
0x670c MOV 0x160(%RSP),%R14 |
0x6714 VADDPS %XMM0,%XMM2,%XMM0 |
0x6718 VMOVSHDUP %XMM0,%XMM1 |
0x671c VADDSS %XMM1,%XMM0,%XMM2 |
0x6720 VMOVAPS 0x170(%RSP),%XMM1 |
0x6729 CMP %RBX,%RSI |
0x672c JE 676c |
(48) 0x672e VMOVSS (%R15,%RBX,4),%XMM0 |
(48) 0x6734 VMOVAPS %XMM2,0x120(%RSP) |
(48) 0x673d VSUBSS %XMM1,%XMM0,%XMM0 |
(48) 0x6741 VZEROUPPER |
(48) 0x6744 CALL 1160 <expf@plt> |
(48) 0x6749 VMOVAPS 0x120(%RSP),%XMM2 |
(48) 0x6752 VMOVAPS 0x170(%RSP),%XMM1 |
(48) 0x675b MOV 0x30(%RSP),%RSI |
(48) 0x6760 INC %RBX |
(48) 0x6763 VADDSS %XMM2,%XMM0,%XMM2 |
(48) 0x6767 CMP %RBX,%RSI |
(48) 0x676a JNE 672e |
0x676c MOV %R14,0x160(%RSP) |
0x6774 VMOVAPS %XMM2,0x120(%RSP) |
0x677d CMP $0x8,%RSI |
0x6781 JAE 679e |
0x6783 MOV 0x2d8(%RSP),%R15 |
0x678b MOV 0x1f0(%RSP),%R14 |
0x6793 XOR %EBX,%EBX |
0x6795 VMOVAPS %XMM1,%XMM2 |
0x6799 JMP 6969 |
0x679e MOV 0x2d8(%RSP),%R15 |
0x67a6 MOV 0x1f0(%RSP),%R14 |
0x67ae MOV %RSI,%RAX |
0x67b1 AND $-0x8,%RAX |
0x67b5 VBROADCASTSS %XMM2,%YMM0 |
0x67ba VBROADCASTSS %XMM1,%YMM1 |
0x67bf XOR %R13D,%R13D |
0x67c2 MOV %RAX,0x70(%RSP) |
0x67c7 MOV $0x7fffffffffffffe0,%RAX |
0x67d1 LEA 0x18(%RAX),%RBX |
0x67d5 VMOVAPS %YMM0,0x320(%RSP) |
0x67de VMOVAPS %YMM1,0x3e0(%RSP) |
0x67e7 AND %RSI,%RBX |
(49) 0x67ea VMOVUPS (%R14,%R13,4),%YMM0 |
(49) 0x67f0 VSUBPS 0x3e0(%RSP),%YMM0,%YMM0 |
(49) 0x67f9 VMOVAPS %YMM0,0x140(%RSP) |
(49) 0x6802 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(49) 0x6808 VMOVAPS %XMM0,0xa0(%RSP) |
(49) 0x6811 VZEROUPPER |
(49) 0x6814 CALL 1160 <expf@plt> |
(49) 0x6819 VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x6822 VMOVSHDUP 0xa0(%RSP),%XMM0 |
(49) 0x682b CALL 1160 <expf@plt> |
(49) 0x6830 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x6839 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x683f VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x6848 VPERMILPD $0x1,0xa0(%RSP),%XMM0 |
(49) 0x6853 CALL 1160 <expf@plt> |
(49) 0x6858 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x6861 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x6867 VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x6870 VPERMILPS $-0x1,0xa0(%RSP),%XMM0 |
(49) 0x687b CALL 1160 <expf@plt> |
(49) 0x6880 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x6889 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x688f VMOVAPS %XMM0,0xa0(%RSP) |
(49) 0x6898 VMOVAPS 0x140(%RSP),%YMM0 |
(49) 0x68a1 VZEROUPPER |
(49) 0x68a4 CALL 1160 <expf@plt> |
(49) 0x68a9 VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x68b2 VMOVSHDUP 0x140(%RSP),%XMM0 |
(49) 0x68bb CALL 1160 <expf@plt> |
(49) 0x68c0 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x68c9 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x68cf VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x68d8 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(49) 0x68e3 CALL 1160 <expf@plt> |
(49) 0x68e8 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x68f1 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x68f7 VMOVAPS %XMM0,0xe0(%RSP) |
(49) 0x6900 VPERMILPS $-0x1,0x140(%RSP),%XMM0 |
(49) 0x690b CALL 1160 <expf@plt> |
(49) 0x6910 VMOVAPS 0xe0(%RSP),%XMM1 |
(49) 0x6919 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x691f VINSERTF128 $0x1,0xa0(%RSP),%YMM0,%YMM0 |
(49) 0x692a VDIVPS 0x320(%RSP),%YMM0,%YMM0 |
(49) 0x6933 VMOVUPS %YMM0,(%R15,%R13,4) |
(49) 0x6939 ADD $0x8,%R13 |
(49) 0x693d CMP %R13,0x70(%RSP) |
(49) 0x6942 JNE 67ea |
0x6948 VMOVAPS 0x170(%RSP),%XMM2 |
0x6951 VMOVAPS 0x120(%RSP),%XMM1 |
0x695a MOV 0x30(%RSP),%RSI |
0x695f MOV 0x40(%RSP),%R13 |
0x6964 CMP %RBX,%RSI |
0x6967 JE 69a4 |
(58) 0x6969 VMOVSS (%R14,%RBX,4),%XMM0 |
(58) 0x696f VSUBSS %XMM2,%XMM0,%XMM0 |
(58) 0x6973 VZEROUPPER |
(58) 0x6976 CALL 1160 <expf@plt> |
(58) 0x697b VMOVAPS 0x120(%RSP),%XMM1 |
(58) 0x6984 VMOVAPS 0x170(%RSP),%XMM2 |
(58) 0x698d MOV 0x30(%RSP),%RSI |
(58) 0x6992 VDIVSS %XMM1,%XMM0,%XMM0 |
(58) 0x6996 VMOVSS %XMM0,(%R15,%RBX,4) |
(58) 0x699c INC %RBX |
(58) 0x699f CMP %RBX,%RSI |
(58) 0x69a2 JNE 6969 |
0x69a4 MOV 0x160(%RSP),%R14 |
0x69ac LEA 0x1(%R14),%RBX |
0x69b0 CMP %R13,%RBX |
0x69b3 JAE 5e65 |
0x69b9 MOV 0x390(%RSP),%RDI |
0x69c1 MOV 0x398(%RSP),%RDX |
0x69c9 SUB %R14D,%EDX |
0x69cc MOV $0x3fffffffc,%RAX |
0x69d6 XOR %ESI,%ESI |
0x69d8 SAL $0x2,%RDX |
0x69dc AND %RAX,%RDX |
0x69df ADD $0x4,%RDX |
0x69e3 IMUL %R14,%RDI |
0x69e7 ADD $0x4,%RDI |
0x69eb AND %RAX,%RDI |
0x69ee ADD %R12,%RDI |
0x69f1 VZEROUPPER |
0x69f4 CALL 1090 <memset@plt> |
0x69f9 VMOVAPS 0x120(%RSP),%XMM1 |
0x6a02 MOV 0x30(%RSP),%RSI |
0x6a07 JMP 5e65 |
/home/eoseret/llm-attention/attention_v2.cpp: 43 - 61 |
-------------------------------------------------------------------------------- |
43: for (int row = 0; row < N; ++row) { |
44: const float *S_row = &S[row * N]; |
45: |
46: float max_val = -FLT_MAX; |
47: for (int idx = 0; idx <= row; ++idx) // vectorised |
48: if (S_row[idx] > max_val) max_val = S_row[idx]; |
49: |
50: float sum = 0.0f; |
51: #pragma clang loop vectorize(enable) |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
54: |
55: for (int idx = 0; idx <= row; ++idx) //vectorised |
56: P[row * N + idx] = expf(S_row[idx] - max_val) / sum; |
57: |
58: for (int idx = row + 1; idx < N; ++idx) |
59: P[row * N + idx] = 0.0f; |
60: |
61: D[row] = sum; |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.16 |
| CQA speedup if FP arith vectorized | 1.89 |
| CQA speedup if fully vectorized | 7.47 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.61 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 22.13 |
| CQA cycles if no scalar integer | 10.25 |
| CQA cycles if FP arith vectorized | 11.70 |
| CQA cycles if fully vectorized | 2.96 |
| Front-end cycles | 22.13 |
| P0 cycles | 10.67 |
| P1 cycles | 10.67 |
| P2 cycles | 10.67 |
| P3 cycles | 10.67 |
| P4 cycles | 10.67 |
| P5 cycles | 10.67 |
| P6 cycles | 13.75 |
| P7 cycles | 13.75 |
| P8 cycles | 13.75 |
| P9 cycles | 13.75 |
| P10 cycles | 8.00 |
| P11 cycles | 8.00 |
| P12 cycles | 8.00 |
| P13 cycles | 8.00 |
| P14 cycles | 5.00 |
| P15 cycles | 5.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 176.00 |
| Nb uops | 177.00 |
| Nb loads | 37.00 |
| Nb stores | 19.00 |
| Nb stack references | 19.00 |
| FLOP/cycle | 1.72 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 32.54 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 404.00 |
| Bytes stored | 316.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 40.34 |
| Vectorization ratio load | 32.14 |
| Vectorization ratio store | 47.37 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 50.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 40.63 |
| Vector-efficiency ratio all | 19.33 |
| Vector-efficiency ratio load | 18.53 |
| Vector-efficiency ratio store | 25.99 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 23.44 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.58 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.16 |
| CQA speedup if FP arith vectorized | 1.89 |
| CQA speedup if fully vectorized | 7.47 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.61 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 22.13 |
| CQA cycles if no scalar integer | 10.25 |
| CQA cycles if FP arith vectorized | 11.70 |
| CQA cycles if fully vectorized | 2.96 |
| Front-end cycles | 22.13 |
| P0 cycles | 10.67 |
| P1 cycles | 10.67 |
| P2 cycles | 10.67 |
| P3 cycles | 10.67 |
| P4 cycles | 10.67 |
| P5 cycles | 10.67 |
| P6 cycles | 13.75 |
| P7 cycles | 13.75 |
| P8 cycles | 13.75 |
| P9 cycles | 13.75 |
| P10 cycles | 8.00 |
| P11 cycles | 8.00 |
| P12 cycles | 8.00 |
| P13 cycles | 8.00 |
| P14 cycles | 5.00 |
| P15 cycles | 5.00 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 176.00 |
| Nb uops | 177.00 |
| Nb loads | 37.00 |
| Nb stores | 19.00 |
| Nb stack references | 19.00 |
| FLOP/cycle | 1.72 |
| Nb FLOP add-sub | 38.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 32.54 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 404.00 |
| Bytes stored | 316.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 40.34 |
| Vectorization ratio load | 32.14 |
| Vectorization ratio store | 47.37 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 50.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 40.63 |
| Vector-efficiency ratio all | 19.33 |
| Vector-efficiency ratio load | 18.53 |
| Vector-efficiency ratio store | 25.99 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 23.44 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 17.58 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-znver5-256 |
| nb instructions | 176 |
| nb uops | 177 |
| loop length | 906 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 19 |
| micro-operation queue | 22.13 cycles |
| front end | 22.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 13.75 | 13.75 | 13.75 | 13.75 | 8.00 | 8.00 | 8.00 | 8.00 | 5.00 | 5.00 |
| cycles | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 13.75 | 13.75 | 13.75 | 13.75 | 8.00 | 8.00 | 8.00 | 8.00 | 5.00 | 5.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 22.13 |
| Dispatch | 13.75 |
| Overall L1 | 22.13 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 75% |
| store | 90% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 40% |
| load | 32% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 40% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 26% |
| load | 26% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 18% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 23% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x1e8(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x2e0(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x180(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| INC %RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RBX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| ADD %RCX,0x1f0(%RSP) | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| ADD %RCX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RCX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVSS %XMM1,(%RAX,%R14,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| CMP %R13,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 6a0c <main+0x3c3c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x1f0(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x4,%R8 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%R14 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP $0x4,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ed3 <main+0x3103> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS 0x2138(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5f99 <main+0x31c9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ee5 <main+0x3115> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS 0x2123(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5f5e <main+0x318e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS 0x2116(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND %RCX,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RAX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 5fa7 <main+0x31d7> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x1c,%SIL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 5f99 <main+0x31c9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| ADD $0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| JMP 5fa2 <main+0x31d2> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| MOV %RDI,0x2d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RDX,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| CMP $0x3,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JA 5fde <main+0x320e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 672e <main+0x395e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ff7 <main+0x3227> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 660c <main+0x383c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| MOV %RSI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %R8,0x350(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| AND %RAX,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %YMM0,0x440(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x400(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS 0x3e0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS 0x320(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS 0x400(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 65ec <main+0x381c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 676c <main+0x399c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x350(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| TEST $0x1c,%SIL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 672e <main+0x395e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0-2 | 0.25 | scal (6.3%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R14,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| LEA 0x1c(%RAX),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %XMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 676c <main+0x399c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %R14,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM2,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| CMP $0x8,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 679e <main+0x39ce> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x2d8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1f0(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| JMP 6969 <main+0x3b99> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x2d8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1f0(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x8,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VBROADCASTSS %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| LEA 0x18(%RAX),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| AND %RSI,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS 0x170(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 69a4 <main+0x3bd4> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| LEA 0x1(%R14),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP %R13,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5e65 <main+0x3095> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x390(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x398(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| SUB %R14D,%EDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (6.3%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| AND %RAX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| IMUL %R14,%RDI | 1 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
| ADD $0x4,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD %R12,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5e65 <main+0x3095> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-znver5-256 |
| nb instructions | 176 |
| nb uops | 177 |
| loop length | 906 |
| used x86 registers | 12 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 19 |
| micro-operation queue | 22.13 cycles |
| front end | 22.13 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 13.75 | 13.75 | 13.75 | 13.75 | 8.00 | 8.00 | 8.00 | 8.00 | 5.00 | 5.00 |
| cycles | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 10.67 | 13.75 | 13.75 | 13.75 | 13.75 | 8.00 | 8.00 | 8.00 | 8.00 | 5.00 | 5.00 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 22.13 |
| Dispatch | 13.75 |
| Overall L1 | 22.13 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 77% |
| load | 75% |
| store | 90% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 75% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 40% |
| load | 32% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 40% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 26% |
| load | 26% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 29% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| all | 19% |
| load | 18% |
| store | 25% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 23% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 17% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | P15 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x1e8(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x2e0(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x180(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| INC %RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RBX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| ADD %RCX,0x1f0(%RSP) | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| ADD %RCX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD %RCX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVSS %XMM1,(%RAX,%R14,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (6.3%) |
| CMP %R13,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 6a0c <main+0x3c3c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x1f0(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x4,%R8 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x20,%R14 | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP $0x4,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ed3 <main+0x3103> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS 0x2138(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5f99 <main+0x31c9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ee5 <main+0x3115> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVSS 0x2123(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| JMP 5f5e <main+0x318e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS 0x2116(%RIP),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RCX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND %RCX,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMOVAPS %YMM3,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM3,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RAX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 5fa7 <main+0x31d7> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| TEST $0x1c,%SIL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 5f99 <main+0x31c9> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| ADD $0x1c,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| JMP 5fa2 <main+0x31d2> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| MOV %RDI,0x2d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV %RDX,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| CMP $0x3,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JA 5fde <main+0x320e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 672e <main+0x395e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x20,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5ff7 <main+0x3227> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 660c <main+0x383c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| MOV %RSI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| MOV %R8,0x350(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| AND %RAX,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %YMM0,0x440(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM0,0x400(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS 0x3e0(%RSP),%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS 0x320(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS 0x400(%RSP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VADDPS %YMM0,%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JNE 65ec <main+0x381c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 676c <main+0x399c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x350(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| TEST $0x1c,%SIL | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| JE 672e <main+0x395e> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | scal (12.5%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0-2 | 0.25 | scal (6.3%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| MOV %R14,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| LEA 0x1c(%RAX),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RSI,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS %XMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM2,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1-2 | 0.25 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 0.50 | scal (6.3%) |
| VMOVAPS 0x170(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 676c <main+0x399c> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV %R14,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM2,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (25.0%) |
| CMP $0x8,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 679e <main+0x39ce> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x2d8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1f0(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| JMP 6969 <main+0x3b99> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x2d8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x1f0(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.13 | N/A |
| AND $-0x8,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VBROADCASTSS %XMM2,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| VBROADCASTSS %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1-2 | 0.50 | scal (6.3%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffe0,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| LEA 0x18(%RAX),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VMOVAPS %YMM0,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| VMOVAPS %YMM1,0x3e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 0.50 | vect (50.0%) |
| AND %RSI,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VMOVAPS 0x170(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVAPS 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| MOV 0x40(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| CMP %RBX,%RSI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JE 69a4 <main+0x3bd4> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x160(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| LEA 0x1(%R14),%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| CMP %R13,%RBX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 5e65 <main+0x3095> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33-0.50 | N/A |
| MOV 0x390(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | N/A |
| MOV 0x398(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| SUB %R14D,%EDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (6.3%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
| AND %RAX,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| IMUL %R14,%RDI | 1 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
| ADD $0x4,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| ADD %R12,%RDI | 1 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x120(%RSP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 | scal (12.5%) |
| JMP 5e65 <main+0x3095> | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
