| Loop Id: 57 | Module: attention-clang-gnr512 | Source: attention_v2.cpp:43-61 | Coverage: 0.30% |
|---|
| Loop Id: 57 | Module: attention-clang-gnr512 | Source: attention_v2.cpp:43-61 | Coverage: 0.30% |
|---|
0x61f4 MOV 0x190(%RSP),%RAX |
0x61fc VMOVSS %XMM2,(%RAX,%RCX,4) |
0x6201 MOV 0x138(%RSP),%RDX |
0x6209 INC %RDX |
0x620c MOV 0x248(%RSP),%RAX |
0x6214 ADD %RAX,0x340(%RSP) |
0x621c ADD %RAX,%RDI |
0x621f MOV 0x258(%RSP),%RSI |
0x6227 ADD %RAX,%RSI |
0x622a MOV %RBX,%R8 |
0x622d CMP %R13,%RBX |
0x6230 JE 75ea |
0x6236 MOV %RDX,%R9 |
0x6239 AND $-0x8,%R9 |
0x623d MOV %RDX,%R10 |
0x6240 AND $-0x40,%R10 |
0x6244 CMP $0x8,%RDX |
0x6248 MOV %RDX,%R13 |
0x624b JAE 6264 |
0x624d XOR %EAX,%EAX |
0x624f VMOVSS 0x2dad(%RIP),%XMM1 |
0x6257 MOV 0x340(%RSP),%RBX |
0x625f JMP 635e |
0x6264 CMP $0x40,%R13 |
0x6268 MOV 0x340(%RSP),%RBX |
0x6270 JAE 6281 |
0x6272 XOR %EAX,%EAX |
0x6274 VMOVSS 0x2d88(%RIP),%XMM1 |
0x627c JMP 631a |
0x6281 MOV %R13,%RAX |
0x6284 MOV $0x7fffffffffffffc0,%RCX |
0x628e AND %RCX,%RAX |
0x6291 XOR %ECX,%ECX |
0x6293 VBROADCASTSS 0x2d67(%RIP),%ZMM3 |
0x629d VMOVAPS %ZMM3,%ZMM0 |
0x62a3 VMOVAPS %ZMM3,%ZMM1 |
0x62a9 VMOVAPS %ZMM3,%ZMM2 |
(46) 0x62af VMAXPS -0xc0(%RBX,%RCX,4),%ZMM0,%ZMM0 |
(46) 0x62b7 VMAXPS -0x80(%RBX,%RCX,4),%ZMM1,%ZMM1 |
(46) 0x62bf VMAXPS -0x40(%RBX,%RCX,4),%ZMM2,%ZMM2 |
(46) 0x62c7 VMAXPS (%RBX,%RCX,4),%ZMM3,%ZMM3 |
(46) 0x62ce ADD $0x40,%RCX |
(46) 0x62d2 CMP %RCX,%R10 |
(46) 0x62d5 JNE 62af |
0x62d7 VMAXPS %ZMM1,%ZMM0,%ZMM0 |
0x62dd VMAXPS %ZMM3,%ZMM2,%ZMM1 |
0x62e3 VMAXPS %ZMM1,%ZMM0,%ZMM0 |
0x62e9 VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x62f0 VMAXPS %YMM1,%YMM0,%YMM0 |
0x62f4 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x62fa VMAXPS %XMM1,%XMM0,%XMM0 |
0x62fe VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x6303 VMAXPS %XMM1,%XMM0,%XMM0 |
0x6307 VMOVSHDUP %XMM0,%XMM1 |
0x630b VMAXSS %XMM1,%XMM0,%XMM1 |
0x630f CMP %RAX,%R13 |
0x6312 JE 636b |
0x6314 TEST $0x38,%R13B |
0x6318 JE 635e |
0x631a MOV %RAX,%RCX |
0x631d MOV $0x7fffffffffffffc0,%RAX |
0x6327 ADD $0x38,%RAX |
0x632b AND %R13,%RAX |
0x632e VBROADCASTSS %XMM1,%YMM0 |
(62) 0x6333 VMAXPS (%RDI,%RCX,4),%YMM0,%YMM0 |
(62) 0x6338 ADD $0x8,%RCX |
(62) 0x633c CMP %RCX,%R9 |
(62) 0x633f JNE 6333 |
0x6341 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x6347 VMAXPS %XMM1,%XMM0,%XMM0 |
0x634b VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x6350 VMAXPS %XMM1,%XMM0,%XMM0 |
0x6354 VMOVSHDUP %XMM0,%XMM1 |
0x6358 VMAXSS %XMM1,%XMM0,%XMM1 |
0x635c JMP 6366 |
(61) 0x635e VMAXSS (%RDI,%RAX,4),%XMM1,%XMM1 |
(61) 0x6363 INC %RAX |
(61) 0x6366 CMP %RAX,%R13 |
(61) 0x6369 JNE 635e |
0x636b CMP $0x7,%R13 |
0x636f MOV %R13,0x138(%RSP) |
0x6377 MOV %RDI,0x30(%RSP) |
0x637c VMOVAPS %XMM1,0x320(%RSP) |
0x6385 MOV %RSI,0x258(%RSP) |
0x638d MOV %R8,0x3a8(%RSP) |
0x6395 JA 63a2 |
0x6397 VXORPS %XMM2,%XMM2,%XMM2 |
0x639b XOR %EBX,%EBX |
0x639d JMP 70ae |
0x63a2 CMP $0x40,%R13 |
0x63a6 JAE 63b3 |
0x63a8 VXORPS %XMM2,%XMM2,%XMM2 |
0x63ac XOR %EBX,%EBX |
0x63ae JMP 6ee1 |
0x63b3 MOV %R9,0x3f0(%RSP) |
0x63bb MOV $0x7fffffffffffffc0,%RAX |
0x63c5 AND %RAX,%R13 |
0x63c8 MOV %R13,0x338(%RSP) |
0x63d0 VBROADCASTSS %XMM1,%ZMM0 |
0x63d6 VMOVAPS %ZMM0,0x1940(%RSP) |
0x63de VXORPS %XMM0,%XMM0,%XMM0 |
0x63e2 VMOVAPS %ZMM0,0x480(%RSP) |
0x63ea XOR %R13D,%R13D |
0x63ed VMOVAPS %ZMM0,0x500(%RSP) |
0x63f5 VMOVAPS %ZMM0,0x4c0(%RSP) |
0x63fd VXORPS %XMM1,%XMM1,%XMM1 |
0x6401 MOV %R10,0x3f8(%RSP) |
(47) 0x6409 VMOVAPS %ZMM1,0x1980(%RSP) |
(47) 0x6411 VMOVUPS -0xc0(%RBX,%R13,4),%ZMM0 |
(47) 0x6419 VMOVUPS -0x80(%RBX,%R13,4),%ZMM1 |
(47) 0x6421 VMOVUPS -0x40(%RBX,%R13,4),%ZMM2 |
(47) 0x6429 VMOVUPS (%RBX,%R13,4),%ZMM3 |
(47) 0x6430 VMOVAPS 0x1940(%RSP),%ZMM4 |
(47) 0x6438 VSUBPS %ZMM4,%ZMM0,%ZMM5 |
(47) 0x643e VMOVAPS %ZMM5,0xc0(%RSP) |
(47) 0x6446 VSUBPS %ZMM4,%ZMM1,%ZMM0 |
(47) 0x644c VMOVAPS %ZMM0,0x140(%RSP) |
(47) 0x6454 VSUBPS %ZMM4,%ZMM2,%ZMM0 |
(47) 0x645a VMOVAPS %ZMM0,0x200(%RSP) |
(47) 0x6462 VSUBPS %ZMM4,%ZMM3,%ZMM0 |
(47) 0x6468 VMOVAPS %ZMM0,0x2c0(%RSP) |
(47) 0x6470 VEXTRACTF32X4 $0x3,%ZMM5,%XMM0 |
(47) 0x6477 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x647d VZEROUPPER |
(47) 0x6480 CALL 1160 <expf@plt> |
(47) 0x6485 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x648e VMOVSHDUP 0x40(%RSP),%XMM0 |
(47) 0x6494 CALL 1160 <expf@plt> |
(47) 0x6499 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x64a2 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x64a8 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x64b1 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(47) 0x64b9 CALL 1160 <expf@plt> |
(47) 0x64be VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x64c7 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x64cd VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x64d6 VPSHUFD $-0x1,0x40(%RSP),%XMM0 |
(47) 0x64dd CALL 1160 <expf@plt> |
(47) 0x64e2 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x64eb VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x64f1 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x64fa VMOVAPS 0xc0(%RSP),%ZMM0 |
(47) 0x6502 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(47) 0x6509 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x650f VZEROUPPER |
(47) 0x6512 CALL 1160 <expf@plt> |
(47) 0x6517 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6520 VMOVSHDUP 0x40(%RSP),%XMM0 |
(47) 0x6526 CALL 1160 <expf@plt> |
(47) 0x652b VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x6534 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x653a VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6543 VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(47) 0x654b CALL 1160 <expf@plt> |
(47) 0x6550 VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x6559 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x655f VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6568 VPSHUFD $-0x1,0x40(%RSP),%XMM0 |
(47) 0x656f CALL 1160 <expf@plt> |
(47) 0x6574 VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x657d VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6583 VINSERTF128 $0x1,0x1c0(%RSP),%YMM0,%YMM0 |
(47) 0x658e VMOVAPS %YMM0,0x1c0(%RSP) |
(47) 0x6597 VMOVAPS 0xc0(%RSP),%ZMM0 |
(47) 0x659f VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x65a5 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x65ab VZEROUPPER |
(47) 0x65ae CALL 1160 <expf@plt> |
(47) 0x65b3 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x65bc VMOVSHDUP 0x40(%RSP),%XMM0 |
(47) 0x65c2 CALL 1160 <expf@plt> |
(47) 0x65c7 VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x65d0 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x65d6 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x65df VPERMILPD $0x1,0x40(%RSP),%XMM0 |
(47) 0x65e7 CALL 1160 <expf@plt> |
(47) 0x65ec VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x65f5 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x65fb VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6604 VPSHUFD $-0x1,0x40(%RSP),%XMM0 |
(47) 0x660b CALL 1160 <expf@plt> |
(47) 0x6610 VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x6619 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x661f VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6625 VMOVAPS 0xc0(%RSP),%ZMM0 |
(47) 0x662d VZEROUPPER |
(47) 0x6630 CALL 1160 <expf@plt> |
(47) 0x6635 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x663e VMOVSHDUP 0xc0(%RSP),%XMM0 |
(47) 0x6647 CALL 1160 <expf@plt> |
(47) 0x664c VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x6655 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x665b VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x6664 VPERMILPD $0x1,0xc0(%RSP),%XMM0 |
(47) 0x666f CALL 1160 <expf@plt> |
(47) 0x6674 VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x667d VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6683 VMOVAPS %XMM0,0x120(%RSP) |
(47) 0x668c VPSHUFD $-0x1,0xc0(%RSP),%XMM0 |
(47) 0x6696 CALL 1160 <expf@plt> |
(47) 0x669b VMOVAPS 0x120(%RSP),%XMM1 |
(47) 0x66a4 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x66aa VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 |
(47) 0x66b2 VINSERTF64X4 $0x1,0x1c0(%RSP),%ZMM0,%ZMM0 |
(47) 0x66bb VMOVAPS 0x480(%RSP),%ZMM1 |
(47) 0x66c3 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(47) 0x66c9 VMOVAPS %ZMM1,0x480(%RSP) |
(47) 0x66d1 VMOVAPS 0x140(%RSP),%ZMM0 |
(47) 0x66d9 VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(47) 0x66e0 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x66e9 VZEROUPPER |
(47) 0x66ec CALL 1160 <expf@plt> |
(47) 0x66f1 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x66f7 VMOVSHDUP 0xc0(%RSP),%XMM0 |
(47) 0x6700 CALL 1160 <expf@plt> |
(47) 0x6705 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x670b VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6711 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6717 VPERMILPD $0x1,0xc0(%RSP),%XMM0 |
(47) 0x6722 CALL 1160 <expf@plt> |
(47) 0x6727 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x672d VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6733 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6739 VPSHUFD $-0x1,0xc0(%RSP),%XMM0 |
(47) 0x6743 CALL 1160 <expf@plt> |
(47) 0x6748 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x674e VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6754 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x675a VMOVAPS 0x140(%RSP),%ZMM0 |
(47) 0x6762 VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(47) 0x6769 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6772 VZEROUPPER |
(47) 0x6775 CALL 1160 <expf@plt> |
(47) 0x677a VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x6783 VMOVSHDUP 0xc0(%RSP),%XMM0 |
(47) 0x678c CALL 1160 <expf@plt> |
(47) 0x6791 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x679a VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x67a0 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x67a9 VPERMILPD $0x1,0xc0(%RSP),%XMM0 |
(47) 0x67b4 CALL 1160 <expf@plt> |
(47) 0x67b9 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x67c2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x67c8 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x67d1 VPSHUFD $-0x1,0xc0(%RSP),%XMM0 |
(47) 0x67db CALL 1160 <expf@plt> |
(47) 0x67e0 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x67e9 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x67ef VINSERTF128 $0x1,0x40(%RSP),%YMM0,%YMM0 |
(47) 0x67f7 VMOVAPS %YMM0,0x40(%RSP) |
(47) 0x67fd VMOVAPS 0x140(%RSP),%ZMM0 |
(47) 0x6805 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x680b VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6814 VZEROUPPER |
(47) 0x6817 CALL 1160 <expf@plt> |
(47) 0x681c VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x6825 VMOVSHDUP 0xc0(%RSP),%XMM0 |
(47) 0x682e CALL 1160 <expf@plt> |
(47) 0x6833 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x683c VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6842 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x684b VPERMILPD $0x1,0xc0(%RSP),%XMM0 |
(47) 0x6856 CALL 1160 <expf@plt> |
(47) 0x685b VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x6864 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x686a VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x6873 VPSHUFD $-0x1,0xc0(%RSP),%XMM0 |
(47) 0x687d CALL 1160 <expf@plt> |
(47) 0x6882 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x688b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6891 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x689a VMOVAPS 0x140(%RSP),%ZMM0 |
(47) 0x68a2 VZEROUPPER |
(47) 0x68a5 CALL 1160 <expf@plt> |
(47) 0x68aa VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x68b3 VMOVSHDUP 0x140(%RSP),%XMM0 |
(47) 0x68bc CALL 1160 <expf@plt> |
(47) 0x68c1 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x68ca VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x68d0 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x68d9 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(47) 0x68e4 CALL 1160 <expf@plt> |
(47) 0x68e9 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x68f2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x68f8 VMOVAPS %XMM0,0x1c0(%RSP) |
(47) 0x6901 VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(47) 0x690b CALL 1160 <expf@plt> |
(47) 0x6910 VMOVAPS 0x1c0(%RSP),%XMM1 |
(47) 0x6919 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x691f VINSERTF128 $0x1,0xc0(%RSP),%YMM0,%YMM0 |
(47) 0x692a VINSERTF64X4 $0x1,0x40(%RSP),%ZMM0,%ZMM0 |
(47) 0x6933 VMOVAPS 0x500(%RSP),%ZMM1 |
(47) 0x693b VADDPS %ZMM1,%ZMM0,%ZMM1 |
(47) 0x6941 VMOVAPS %ZMM1,0x500(%RSP) |
(47) 0x6949 VMOVAPS 0x200(%RSP),%ZMM0 |
(47) 0x6951 VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(47) 0x6958 VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6961 VZEROUPPER |
(47) 0x6964 CALL 1160 <expf@plt> |
(47) 0x6969 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6972 VMOVSHDUP 0x140(%RSP),%XMM0 |
(47) 0x697b CALL 1160 <expf@plt> |
(47) 0x6980 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6989 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x698f VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6998 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(47) 0x69a3 CALL 1160 <expf@plt> |
(47) 0x69a8 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x69b1 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x69b7 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x69c0 VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(47) 0x69ca CALL 1160 <expf@plt> |
(47) 0x69cf VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x69d8 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x69de VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x69e7 VMOVAPS 0x200(%RSP),%ZMM0 |
(47) 0x69ef VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(47) 0x69f6 VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x69ff VZEROUPPER |
(47) 0x6a02 CALL 1160 <expf@plt> |
(47) 0x6a07 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6a0d VMOVSHDUP 0x140(%RSP),%XMM0 |
(47) 0x6a16 CALL 1160 <expf@plt> |
(47) 0x6a1b VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6a21 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6a27 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6a2d VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(47) 0x6a38 CALL 1160 <expf@plt> |
(47) 0x6a3d VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6a43 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6a49 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6a4f VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(47) 0x6a59 CALL 1160 <expf@plt> |
(47) 0x6a5e VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6a64 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6a6a VINSERTF128 $0x1,0xc0(%RSP),%YMM0,%YMM0 |
(47) 0x6a75 VMOVAPS %YMM0,0xc0(%RSP) |
(47) 0x6a7e VMOVAPS 0x200(%RSP),%ZMM0 |
(47) 0x6a86 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x6a8c VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6a95 VZEROUPPER |
(47) 0x6a98 CALL 1160 <expf@plt> |
(47) 0x6a9d VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6aa3 VMOVSHDUP 0x140(%RSP),%XMM0 |
(47) 0x6aac CALL 1160 <expf@plt> |
(47) 0x6ab1 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6ab7 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6abd VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6ac3 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(47) 0x6ace CALL 1160 <expf@plt> |
(47) 0x6ad3 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6ad9 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6adf VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6ae5 VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(47) 0x6aef CALL 1160 <expf@plt> |
(47) 0x6af4 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6afa VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6b00 VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6b09 VMOVAPS 0x200(%RSP),%ZMM0 |
(47) 0x6b11 VZEROUPPER |
(47) 0x6b14 CALL 1160 <expf@plt> |
(47) 0x6b19 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6b1f VMOVSHDUP 0x200(%RSP),%XMM0 |
(47) 0x6b28 CALL 1160 <expf@plt> |
(47) 0x6b2d VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6b33 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6b39 VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6b3f VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(47) 0x6b4a CALL 1160 <expf@plt> |
(47) 0x6b4f VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6b55 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6b5b VMOVAPS %XMM0,0x40(%RSP) |
(47) 0x6b61 VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(47) 0x6b6b CALL 1160 <expf@plt> |
(47) 0x6b70 VMOVAPS 0x40(%RSP),%XMM1 |
(47) 0x6b76 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6b7c VINSERTF128 $0x1,0x140(%RSP),%YMM0,%YMM0 |
(47) 0x6b87 VINSERTF64X4 $0x1,0xc0(%RSP),%ZMM0,%ZMM0 |
(47) 0x6b90 VMOVAPS 0x4c0(%RSP),%ZMM1 |
(47) 0x6b98 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(47) 0x6b9e VMOVAPS %ZMM1,0x4c0(%RSP) |
(47) 0x6ba6 VMOVAPS 0x2c0(%RSP),%ZMM0 |
(47) 0x6bae VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(47) 0x6bb5 VMOVAPS %XMM0,0x200(%RSP) |
(47) 0x6bbe VZEROUPPER |
(47) 0x6bc1 CALL 1160 <expf@plt> |
(47) 0x6bc6 VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6bcf VMOVSHDUP 0x200(%RSP),%XMM0 |
(47) 0x6bd8 CALL 1160 <expf@plt> |
(47) 0x6bdd VMOVAPS 0x140(%RSP),%XMM1 |
(47) 0x6be6 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6bec VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6bf5 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(47) 0x6c00 CALL 1160 <expf@plt> |
(47) 0x6c05 VMOVAPS 0x140(%RSP),%XMM1 |
(47) 0x6c0e VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6c14 VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6c1d VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(47) 0x6c27 CALL 1160 <expf@plt> |
(47) 0x6c2c VMOVAPS 0x140(%RSP),%XMM1 |
(47) 0x6c35 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6c3b VMOVAPS %XMM0,0x140(%RSP) |
(47) 0x6c44 VMOVAPS 0x2c0(%RSP),%ZMM0 |
(47) 0x6c4c VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(47) 0x6c53 VMOVAPS %XMM0,0x200(%RSP) |
(47) 0x6c5c VZEROUPPER |
(47) 0x6c5f CALL 1160 <expf@plt> |
(47) 0x6c64 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6c6d VMOVSHDUP 0x200(%RSP),%XMM0 |
(47) 0x6c76 CALL 1160 <expf@plt> |
(47) 0x6c7b VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6c84 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6c8a VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6c93 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(47) 0x6c9e CALL 1160 <expf@plt> |
(47) 0x6ca3 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6cac VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6cb2 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6cbb VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(47) 0x6cc5 CALL 1160 <expf@plt> |
(47) 0x6cca VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6cd3 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6cd9 VINSERTF128 $0x1,0x140(%RSP),%YMM0,%YMM0 |
(47) 0x6ce4 VMOVAPS %YMM0,0x140(%RSP) |
(47) 0x6ced VMOVAPS 0x2c0(%RSP),%ZMM0 |
(47) 0x6cf5 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(47) 0x6cfb VMOVAPS %XMM0,0x200(%RSP) |
(47) 0x6d04 VZEROUPPER |
(47) 0x6d07 CALL 1160 <expf@plt> |
(47) 0x6d0c VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6d15 VMOVSHDUP 0x200(%RSP),%XMM0 |
(47) 0x6d1e CALL 1160 <expf@plt> |
(47) 0x6d23 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6d2c VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6d32 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6d3b VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(47) 0x6d46 CALL 1160 <expf@plt> |
(47) 0x6d4b VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6d54 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6d5a VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6d63 VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(47) 0x6d6d CALL 1160 <expf@plt> |
(47) 0x6d72 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6d7b VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(47) 0x6d81 VMOVAPS %XMM0,0x200(%RSP) |
(47) 0x6d8a VMOVAPS 0x2c0(%RSP),%ZMM0 |
(47) 0x6d92 VZEROUPPER |
(47) 0x6d95 CALL 1160 <expf@plt> |
(47) 0x6d9a VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6da3 VMOVSHDUP 0x2c0(%RSP),%XMM0 |
(47) 0x6dac CALL 1160 <expf@plt> |
(47) 0x6db1 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6dba VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(47) 0x6dc0 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6dc9 VPERMILPD $0x1,0x2c0(%RSP),%XMM0 |
(47) 0x6dd4 CALL 1160 <expf@plt> |
(47) 0x6dd9 VMOVAPS 0xc0(%RSP),%XMM1 |
(47) 0x6de2 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(47) 0x6de8 VMOVAPS %XMM0,0xc0(%RSP) |
(47) 0x6df1 VPSHUFD $-0x1,0x2c0(%RSP),%XMM0 |
(47) 0x6dfb CALL 1160 <expf@plt> |
(47) 0x6e00 VMOVAPS 0x1980(%RSP),%ZMM1 |
(47) 0x6e08 MOV 0x3f8(%RSP),%R10 |
(47) 0x6e10 VMOVAPS 0xc0(%RSP),%XMM2 |
(47) 0x6e19 VINSERTPS $0x30,%XMM0,%XMM2,%XMM0 |
(47) 0x6e1f VINSERTF128 $0x1,0x200(%RSP),%YMM0,%YMM0 |
(47) 0x6e2a VINSERTF64X4 $0x1,0x140(%RSP),%ZMM0,%ZMM0 |
(47) 0x6e33 VADDPS %ZMM1,%ZMM0,%ZMM1 |
(47) 0x6e39 ADD $0x40,%R13 |
(47) 0x6e3d CMP %R13,%R10 |
(47) 0x6e40 JNE 6409 |
0x6e46 VMOVAPS 0x500(%RSP),%ZMM0 |
0x6e4e VADDPS 0x480(%RSP),%ZMM0,%ZMM0 |
0x6e56 VADDPS 0x4c0(%RSP),%ZMM0,%ZMM0 |
0x6e5e VADDPS %ZMM0,%ZMM1,%ZMM0 |
0x6e64 VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x6e6b VADDPS %ZMM1,%ZMM0,%ZMM0 |
0x6e71 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x6e77 VADDPS %XMM1,%XMM0,%XMM0 |
0x6e7b VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x6e80 VADDPS %XMM1,%XMM0,%XMM0 |
0x6e84 VMOVSHDUP %XMM0,%XMM1 |
0x6e88 VADDSS %XMM1,%XMM0,%XMM2 |
0x6e8c MOV 0x338(%RSP),%RBX |
0x6e94 CMP %RBX,0x138(%RSP) |
0x6e9c JNE 6eb9 |
0x6e9e MOV 0x138(%RSP),%R13 |
0x6ea6 MOV 0x30(%RSP),%RDI |
0x6eab VMOVAPS 0x320(%RSP),%XMM1 |
0x6eb4 JMP 70eb |
0x6eb9 MOV 0x138(%RSP),%R13 |
0x6ec1 TEST $0x38,%R13B |
0x6ec5 MOV 0x30(%RSP),%RDI |
0x6eca VMOVAPS 0x320(%RSP),%XMM1 |
0x6ed3 MOV 0x3f0(%RSP),%R9 |
0x6edb JE 70ae |
0x6ee1 VXORPS %XMM0,%XMM0,%XMM0 |
0x6ee5 VMOVSS %XMM2,%XMM0,%XMM2 |
0x6ee9 MOV $0x7fffffffffffffc0,%RAX |
0x6ef3 ADD $0x38,%RAX |
0x6ef7 AND %R13,%RAX |
0x6efa MOV %RAX,0x338(%RSP) |
0x6f02 VBROADCASTSS %XMM1,%YMM0 |
0x6f07 VMOVAPS %YMM0,0x40(%RSP) |
(60) 0x6f0d VMOVAPS %YMM2,0x140(%RSP) |
(60) 0x6f16 VMOVUPS (%RDI,%RBX,4),%YMM0 |
(60) 0x6f1b VSUBPS 0x40(%RSP),%YMM0,%YMM0 |
(60) 0x6f21 VMOVAPS %YMM0,0x2c0(%RSP) |
(60) 0x6f2a VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(60) 0x6f30 VMOVAPS %XMM0,0x200(%RSP) |
(60) 0x6f39 MOV %R9,%R13 |
(60) 0x6f3c VZEROUPPER |
(60) 0x6f3f CALL 1160 <expf@plt> |
(60) 0x6f44 VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x6f4d VMOVSHDUP 0x200(%RSP),%XMM0 |
(60) 0x6f56 CALL 1160 <expf@plt> |
(60) 0x6f5b VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x6f64 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(60) 0x6f6a VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x6f73 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(60) 0x6f7e CALL 1160 <expf@plt> |
(60) 0x6f83 VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x6f8c VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(60) 0x6f92 VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x6f9b VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(60) 0x6fa5 CALL 1160 <expf@plt> |
(60) 0x6faa VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x6fb3 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(60) 0x6fb9 VMOVAPS %XMM0,0x200(%RSP) |
(60) 0x6fc2 VMOVAPS 0x2c0(%RSP),%YMM0 |
(60) 0x6fcb VZEROUPPER |
(60) 0x6fce CALL 1160 <expf@plt> |
(60) 0x6fd3 VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x6fdc VMOVSHDUP 0x2c0(%RSP),%XMM0 |
(60) 0x6fe5 CALL 1160 <expf@plt> |
(60) 0x6fea VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x6ff3 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(60) 0x6ff9 VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x7002 VPERMILPD $0x1,0x2c0(%RSP),%XMM0 |
(60) 0x700d CALL 1160 <expf@plt> |
(60) 0x7012 VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x701b VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(60) 0x7021 VMOVAPS %XMM0,0xc0(%RSP) |
(60) 0x702a VPSHUFD $-0x1,0x2c0(%RSP),%XMM0 |
(60) 0x7034 CALL 1160 <expf@plt> |
(60) 0x7039 VMOVAPS 0x140(%RSP),%YMM2 |
(60) 0x7042 MOV %R13,%R9 |
(60) 0x7045 MOV 0x30(%RSP),%RDI |
(60) 0x704a VMOVAPS 0xc0(%RSP),%XMM1 |
(60) 0x7053 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(60) 0x7059 VINSERTF128 $0x1,0x200(%RSP),%YMM0,%YMM0 |
(60) 0x7064 VADDPS %YMM2,%YMM0,%YMM2 |
(60) 0x7068 ADD $0x8,%RBX |
(60) 0x706c CMP %RBX,%R13 |
(60) 0x706f JNE 6f0d |
0x7075 VEXTRACTF128 $0x1,%YMM2,%XMM0 |
0x707b VADDPS %XMM0,%XMM2,%XMM0 |
0x707f VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
0x7084 VADDPS %XMM1,%XMM0,%XMM0 |
0x7088 VMOVSHDUP %XMM0,%XMM1 |
0x708c VADDSS %XMM1,%XMM0,%XMM2 |
0x7090 MOV 0x138(%RSP),%R13 |
0x7098 MOV 0x338(%RSP),%RBX |
0x70a0 CMP %RBX,%R13 |
0x70a3 VMOVAPS 0x320(%RSP),%XMM1 |
0x70ac JE 70eb |
(48) 0x70ae VMOVAPS %XMM2,0x2c0(%RSP) |
(48) 0x70b7 VMOVSS (%RDI,%RBX,4),%XMM0 |
(48) 0x70bc VSUBSS %XMM1,%XMM0,%XMM0 |
(48) 0x70c0 VZEROUPPER |
(48) 0x70c3 CALL 1160 <expf@plt> |
(48) 0x70c8 VMOVAPS 0x2c0(%RSP),%XMM2 |
(48) 0x70d1 VMOVAPS 0x320(%RSP),%XMM1 |
(48) 0x70da MOV 0x30(%RSP),%RDI |
(48) 0x70df VADDSS %XMM2,%XMM0,%XMM2 |
(48) 0x70e3 INC %RBX |
(48) 0x70e6 CMP %RBX,%R13 |
(48) 0x70e9 JNE 70ae |
0x70eb CMP $0x3,%R13 |
0x70ef VMOVAPS %XMM2,0x2c0(%RSP) |
0x70f8 JA 7109 |
0x70fa XOR %EBX,%EBX |
0x70fc MOV 0x258(%RSP),%RDX |
0x7104 JMP 7524 |
0x7109 CMP $0x10,%R13 |
0x710d JAE 7116 |
0x710f XOR %EBX,%EBX |
0x7111 JMP 7406 |
0x7116 MOV %R13,%RAX |
0x7119 AND $-0x10,%RAX |
0x711d MOV %RAX,0x480(%RSP) |
0x7125 MOV $0x7fffffffffffffc0,%RAX |
0x712f LEA 0x30(%RAX),%RBX |
0x7133 AND %R13,%RBX |
0x7136 VBROADCASTSS %XMM2,%ZMM0 |
0x713c VMOVAPS %ZMM0,0x500(%RSP) |
0x7144 VBROADCASTSS %XMM1,%ZMM0 |
0x714a VMOVAPS %ZMM0,0x4c0(%RSP) |
0x7152 XOR %R13D,%R13D |
(49) 0x7155 VMOVUPS (%RDI,%R13,4),%ZMM0 |
(49) 0x715c VSUBPS 0x4c0(%RSP),%ZMM0,%ZMM0 |
(49) 0x7164 VMOVAPS %ZMM0,0x200(%RSP) |
(49) 0x716c VEXTRACTF32X4 $0x3,%ZMM0,%XMM0 |
(49) 0x7173 VMOVAPS %XMM0,0x140(%RSP) |
(49) 0x717c VZEROUPPER |
(49) 0x717f CALL 1160 <expf@plt> |
(49) 0x7184 VMOVAPS %XMM0,0xc0(%RSP) |
(49) 0x718d VMOVSHDUP 0x140(%RSP),%XMM0 |
(49) 0x7196 CALL 1160 <expf@plt> |
(49) 0x719b VMOVAPS 0xc0(%RSP),%XMM1 |
(49) 0x71a4 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x71aa VMOVAPS %XMM0,0xc0(%RSP) |
(49) 0x71b3 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(49) 0x71be CALL 1160 <expf@plt> |
(49) 0x71c3 VMOVAPS 0xc0(%RSP),%XMM1 |
(49) 0x71cc VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x71d2 VMOVAPS %XMM0,0xc0(%RSP) |
(49) 0x71db VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(49) 0x71e5 CALL 1160 <expf@plt> |
(49) 0x71ea VMOVAPS 0xc0(%RSP),%XMM1 |
(49) 0x71f3 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x71f9 VMOVAPS %XMM0,0xc0(%RSP) |
(49) 0x7202 VMOVAPS 0x200(%RSP),%ZMM0 |
(49) 0x720a VEXTRACTF32X4 $0x2,%ZMM0,%XMM0 |
(49) 0x7211 VMOVAPS %XMM0,0x140(%RSP) |
(49) 0x721a VZEROUPPER |
(49) 0x721d CALL 1160 <expf@plt> |
(49) 0x7222 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x7228 VMOVSHDUP 0x140(%RSP),%XMM0 |
(49) 0x7231 CALL 1160 <expf@plt> |
(49) 0x7236 VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x723c VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x7242 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x7248 VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(49) 0x7253 CALL 1160 <expf@plt> |
(49) 0x7258 VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x725e VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x7264 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x726a VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(49) 0x7274 CALL 1160 <expf@plt> |
(49) 0x7279 VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x727f VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x7285 VINSERTF128 $0x1,0xc0(%RSP),%YMM0,%YMM0 |
(49) 0x7290 VMOVAPS %YMM0,0xc0(%RSP) |
(49) 0x7299 VMOVAPS 0x200(%RSP),%ZMM0 |
(49) 0x72a1 VEXTRACTF128 $0x1,%YMM0,%XMM0 |
(49) 0x72a7 VMOVAPS %XMM0,0x140(%RSP) |
(49) 0x72b0 VZEROUPPER |
(49) 0x72b3 CALL 1160 <expf@plt> |
(49) 0x72b8 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x72be VMOVSHDUP 0x140(%RSP),%XMM0 |
(49) 0x72c7 CALL 1160 <expf@plt> |
(49) 0x72cc VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x72d2 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x72d8 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x72de VPERMILPD $0x1,0x140(%RSP),%XMM0 |
(49) 0x72e9 CALL 1160 <expf@plt> |
(49) 0x72ee VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x72f4 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x72fa VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x7300 VPSHUFD $-0x1,0x140(%RSP),%XMM0 |
(49) 0x730a CALL 1160 <expf@plt> |
(49) 0x730f VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x7315 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x731b VMOVAPS %XMM0,0x140(%RSP) |
(49) 0x7324 VMOVAPS 0x200(%RSP),%ZMM0 |
(49) 0x732c VZEROUPPER |
(49) 0x732f CALL 1160 <expf@plt> |
(49) 0x7334 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x733a VMOVSHDUP 0x200(%RSP),%XMM0 |
(49) 0x7343 CALL 1160 <expf@plt> |
(49) 0x7348 VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x734e VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(49) 0x7354 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x735a VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(49) 0x7365 CALL 1160 <expf@plt> |
(49) 0x736a VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x7370 VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(49) 0x7376 VMOVAPS %XMM0,0x40(%RSP) |
(49) 0x737c VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(49) 0x7386 CALL 1160 <expf@plt> |
(49) 0x738b MOV 0x258(%RSP),%RDX |
(49) 0x7393 MOV 0x30(%RSP),%RDI |
(49) 0x7398 VMOVAPS 0x40(%RSP),%XMM1 |
(49) 0x739e VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(49) 0x73a4 VINSERTF128 $0x1,0x140(%RSP),%YMM0,%YMM0 |
(49) 0x73af VINSERTF64X4 $0x1,0xc0(%RSP),%ZMM0,%ZMM0 |
(49) 0x73b8 VDIVPS 0x500(%RSP),%ZMM0,%ZMM0 |
(49) 0x73c0 VMOVUPS %ZMM0,(%RDX,%R13,4) |
(49) 0x73c7 ADD $0x10,%R13 |
(49) 0x73cb CMP %R13,0x480(%RSP) |
(49) 0x73d3 JNE 7155 |
0x73d9 MOV 0x138(%RSP),%R13 |
0x73e1 CMP %RBX,%R13 |
0x73e4 VMOVAPS 0x320(%RSP),%XMM1 |
0x73ed VMOVAPS 0x2c0(%RSP),%XMM2 |
0x73f6 JE 7571 |
0x73fc TEST $0xc,%R13B |
0x7400 JE 7524 |
0x7406 MOV %R13,%RCX |
0x7409 MOV %RBX,%R13 |
0x740c MOV %RCX,%RAX |
0x740f AND $-0x4,%RAX |
0x7413 MOV %RAX,0xc0(%RSP) |
0x741b MOV $0x7fffffffffffffc0,%RAX |
0x7425 LEA 0x3c(%RAX),%RBX |
0x7429 AND %RCX,%RBX |
0x742c VBROADCASTSS %XMM2,%XMM0 |
0x7431 VMOVAPS %XMM0,0x40(%RSP) |
0x7437 VBROADCASTSS %XMM1,%XMM0 |
0x743c VMOVAPS %XMM0,0x480(%RSP) |
(59) 0x7445 VMOVUPS (%RDI,%R13,4),%XMM0 |
(59) 0x744b VSUBPS 0x480(%RSP),%XMM0,%XMM0 |
(59) 0x7454 VMOVAPS %XMM0,0x200(%RSP) |
(59) 0x745d VZEROUPPER |
(59) 0x7460 CALL 1160 <expf@plt> |
(59) 0x7465 VMOVAPS %XMM0,0x140(%RSP) |
(59) 0x746e VMOVSHDUP 0x200(%RSP),%XMM0 |
(59) 0x7477 CALL 1160 <expf@plt> |
(59) 0x747c VMOVAPS 0x140(%RSP),%XMM1 |
(59) 0x7485 VINSERTPS $0x10,%XMM0,%XMM1,%XMM0 |
(59) 0x748b VMOVAPS %XMM0,0x140(%RSP) |
(59) 0x7494 VPERMILPD $0x1,0x200(%RSP),%XMM0 |
(59) 0x749f CALL 1160 <expf@plt> |
(59) 0x74a4 VMOVAPS 0x140(%RSP),%XMM1 |
(59) 0x74ad VINSERTPS $0x20,%XMM0,%XMM1,%XMM0 |
(59) 0x74b3 VMOVAPS %XMM0,0x140(%RSP) |
(59) 0x74bc VPSHUFD $-0x1,0x200(%RSP),%XMM0 |
(59) 0x74c6 CALL 1160 <expf@plt> |
(59) 0x74cb MOV 0x258(%RSP),%RDX |
(59) 0x74d3 MOV 0x30(%RSP),%RDI |
(59) 0x74d8 VMOVAPS 0x140(%RSP),%XMM1 |
(59) 0x74e1 VINSERTPS $0x30,%XMM0,%XMM1,%XMM0 |
(59) 0x74e7 VDIVPS 0x40(%RSP),%XMM0,%XMM0 |
(59) 0x74ed VMOVUPS %XMM0,(%RDX,%R13,4) |
(59) 0x74f3 ADD $0x4,%R13 |
(59) 0x74f7 CMP %R13,0xc0(%RSP) |
(59) 0x74ff JNE 7445 |
0x7505 MOV 0x138(%RSP),%R13 |
0x750d CMP %RBX,%R13 |
0x7510 VMOVAPS 0x320(%RSP),%XMM1 |
0x7519 VMOVAPS 0x2c0(%RSP),%XMM2 |
0x7522 JE 7571 |
(58) 0x7524 VMOVSS (%RDI,%RBX,4),%XMM0 |
(58) 0x7529 VSUBSS %XMM1,%XMM0,%XMM0 |
(58) 0x752d MOV %RBX,%R13 |
(58) 0x7530 MOV %RDX,%RBX |
(58) 0x7533 VZEROUPPER |
(58) 0x7536 CALL 1160 <expf@plt> |
(58) 0x753b MOV %RBX,%RDX |
(58) 0x753e MOV %R13,%RBX |
(58) 0x7541 MOV 0x138(%RSP),%R13 |
(58) 0x7549 VMOVAPS 0x2c0(%RSP),%XMM2 |
(58) 0x7552 VMOVAPS 0x320(%RSP),%XMM1 |
(58) 0x755b MOV 0x30(%RSP),%RDI |
(58) 0x7560 VDIVSS %XMM2,%XMM0,%XMM0 |
(58) 0x7564 VMOVSS %XMM0,(%RDX,%RBX,4) |
(58) 0x7569 INC %RBX |
(58) 0x756c CMP %RBX,%R13 |
(58) 0x756f JNE 7524 |
0x7571 MOV 0x3a8(%RSP),%RCX |
0x7579 LEA 0x1(%RCX),%RBX |
0x757d MOV 0x98(%RSP),%R13 |
0x7585 CMP %R13,%RBX |
0x7588 JAE 61f4 |
0x758e MOV 0x428(%RSP),%RDX |
0x7596 SUB %ECX,%EDX |
0x7598 SAL $0x2,%RDX |
0x759c MOV $0x3fffffffc,%RAX |
0x75a6 AND %RAX,%RDX |
0x75a9 ADD $0x4,%RDX |
0x75ad MOV 0x420(%RSP),%RDI |
0x75b5 IMUL %RCX,%RDI |
0x75b9 ADD $0x4,%RDI |
0x75bd AND %RAX,%RDI |
0x75c0 ADD 0x78(%RSP),%RDI |
0x75c5 XOR %ESI,%ESI |
0x75c7 VZEROUPPER |
0x75ca CALL 1090 <memset@plt> |
0x75cf VMOVAPS 0x2c0(%RSP),%XMM2 |
0x75d8 MOV 0x3a8(%RSP),%RCX |
0x75e0 MOV 0x30(%RSP),%RDI |
0x75e5 JMP 61f4 |
/home/eoseret/llm-attention/attention_v2.cpp: 43 - 61 |
-------------------------------------------------------------------------------- |
43: for (int row = 0; row < N; ++row) { |
44: const float *S_row = &S[row * N]; |
45: |
46: float max_val = -FLT_MAX; |
47: for (int idx = 0; idx <= row; ++idx) // vectorised |
48: if (S_row[idx] > max_val) max_val = S_row[idx]; |
49: |
50: float sum = 0.0f; |
51: #pragma clang loop vectorize(enable) |
52: for (int idx = 0; idx <= row; ++idx) // vectorised |
53: sum += expf(S_row[idx] - max_val); |
54: |
55: for (int idx = 0; idx <= row; ++idx) //vectorised |
56: P[row * N + idx] = expf(S_row[idx] - max_val) / sum; |
57: |
58: for (int idx = row + 1; idx < N; ++idx) |
59: P[row * N + idx] = 0.0f; |
60: |
61: D[row] = sum; |
| Coverage (%) | Name | Source Location | Module |
|---|
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.05 |
| CQA speedup if FP arith vectorized | 1.02 |
| CQA speedup if fully vectorized | 1.67 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.72 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 34.50 |
| CQA cycles if no scalar integer | 16.83 |
| CQA cycles if FP arith vectorized | 33.68 |
| CQA cycles if fully vectorized | 20.70 |
| Front-end cycles | 34.50 |
| P0 cycles | 20.00 |
| P1 cycles | 20.10 |
| P2 cycles | 13.33 |
| P3 cycles | 13.33 |
| P4 cycles | 12.00 |
| P5 cycles | 19.90 |
| P6 cycles | 20.00 |
| P7 cycles | 12.00 |
| P8 cycles | 12.00 |
| P9 cycles | 12.00 |
| P10 cycles | 20.00 |
| P11 cycles | 13.33 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 204.00 |
| Nb uops | 207.00 |
| Nb loads | 40.00 |
| Nb stores | 23.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 2.38 |
| Nb FLOP add-sub | 82.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 32.23 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 540.00 |
| Bytes stored | 572.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 43.07 |
| Vectorization ratio load | 35.48 |
| Vectorization ratio store | 47.83 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 57.14 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 41.89 |
| Vector-efficiency ratio all | 26.46 |
| Vector-efficiency ratio load | 23.59 |
| Vector-efficiency ratio store | 38.86 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 39.73 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 22.64 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 2.05 |
| CQA speedup if FP arith vectorized | 1.02 |
| CQA speedup if fully vectorized | 1.67 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.72 |
| Bottlenecks | micro-operation queue, |
| Function | main |
| Source | attention_v2.cpp:43-44,attention_v2.cpp:47-47,attention_v2.cpp:52-52,attention_v2.cpp:55-55,attention_v2.cpp:58-61 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 34.50 |
| CQA cycles if no scalar integer | 16.83 |
| CQA cycles if FP arith vectorized | 33.68 |
| CQA cycles if fully vectorized | 20.70 |
| Front-end cycles | 34.50 |
| P0 cycles | 20.00 |
| P1 cycles | 20.10 |
| P2 cycles | 13.33 |
| P3 cycles | 13.33 |
| P4 cycles | 12.00 |
| P5 cycles | 19.90 |
| P6 cycles | 20.00 |
| P7 cycles | 12.00 |
| P8 cycles | 12.00 |
| P9 cycles | 12.00 |
| P10 cycles | 20.00 |
| P11 cycles | 13.33 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 204.00 |
| Nb uops | 207.00 |
| Nb loads | 40.00 |
| Nb stores | 23.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 2.38 |
| Nb FLOP add-sub | 82.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 32.23 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 540.00 |
| Bytes stored | 572.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 43.07 |
| Vectorization ratio load | 35.48 |
| Vectorization ratio store | 47.83 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 57.14 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 41.89 |
| Vector-efficiency ratio all | 26.46 |
| Vector-efficiency ratio load | 23.59 |
| Vector-efficiency ratio store | 38.86 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 39.73 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 22.64 |
| Path / |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-gnr512 |
| nb instructions | 204 |
| nb uops | 207 |
| loop length | 1088 |
| used x86 registers | 11 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 3 |
| used zmm registers | 4 |
| nb stack references | 22 |
| micro-operation queue | 34.50 cycles |
| front end | 34.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 20.00 | 20.10 | 13.33 | 13.33 | 12.00 | 19.90 | 20.00 | 12.00 | 12.00 | 12.00 | 20.00 | 13.33 |
| cycles | 20.00 | 20.10 | 13.33 | 13.33 | 12.00 | 19.90 | 20.00 | 12.00 | 12.00 | 12.00 | 20.00 | 13.33 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 34.50 |
| Dispatch | 20.10 |
| Overall L1 | 34.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 78% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 80% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 43% |
| load | 35% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 57% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 41% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 38% |
| load | 37% |
| store | 63% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 51% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 31% |
| all | 26% |
| load | 23% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 39% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x190(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x138(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x248(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| ADD %RAX,0x340(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 | scal (12.5%) |
| ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
| MOV 0x258(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 75ea <main+0x3d6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x40,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| CMP $0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6264 <main+0x29e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x2dad(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| MOV 0x340(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 635e <main+0x2ade> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x40,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x340(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JAE 6281 <main+0x2a01> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x2d88(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 631a <main+0x2a9a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffc0,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VBROADCASTSS 0x2d67(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 | scal (6.3%) |
| VMOVAPS %ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM3,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 636b <main+0x2aeb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0x38,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 635e <main+0x2ade> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x38,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 6366 <main+0x2ae6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| CMP $0x7,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %R13,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %RDI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV %RSI,0x258(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R8,0x3a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| JA 63a2 <main+0x2b22> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 70ae <main+0x382e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x40,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 63b3 <main+0x2b33> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 6ee1 <main+0x3661> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R9,0x3f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %R13,0x338(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x1940(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %ZMM0,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %ZMM0,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VMOVAPS %ZMM0,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R10,0x3f8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS 0x500(%RSP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 | vect (100.0%) |
| VADDPS 0x480(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.65 | vect (100.0%) |
| VADDPS 0x4c0(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.65 | vect (100.0%) |
| VADDPS %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VADDPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x338(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,0x138(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JNE 6eb9 <main+0x3639> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JMP 70eb <main+0x386b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| TEST $0x38,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x3f0(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JE 70ae <main+0x382e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x38,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x338(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x338(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 70eb <main+0x386b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS %XMM2,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| JA 7109 <main+0x3889> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x258(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 7524 <main+0x3ca4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x10,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 7116 <main+0x3896> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 7406 <main+0x3b86> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x30(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 7571 <main+0x3cf1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0xc,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 7524 <main+0x3ca4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %R13,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x4,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x3c(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 7571 <main+0x3cf1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x3a8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x98(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 61f4 <main+0x2974> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x428(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x420(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| ADD 0x78(%RSP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x3a8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| JMP 61f4 <main+0x2974> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| Function | main |
| Source file and lines | attention_v2.cpp:43-61 |
| Module | attention-clang-gnr512 |
| nb instructions | 204 |
| nb uops | 207 |
| loop length | 1088 |
| used x86 registers | 11 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 3 |
| used zmm registers | 4 |
| nb stack references | 22 |
| micro-operation queue | 34.50 cycles |
| front end | 34.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 20.00 | 20.10 | 13.33 | 13.33 | 12.00 | 19.90 | 20.00 | 12.00 | 12.00 | 12.00 | 20.00 | 13.33 |
| cycles | 20.00 | 20.10 | 13.33 | 13.33 | 12.00 | 19.90 | 20.00 | 12.00 | 12.00 | 12.00 | 20.00 | 13.33 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 34.50 |
| Dispatch | 20.10 |
| Overall L1 | 34.50 |
| all | 1% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 3% |
| all | 78% |
| load | 78% |
| store | 91% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 80% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 73% |
| all | 43% |
| load | 35% |
| store | 47% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 57% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 41% |
| all | 11% |
| load | 12% |
| store | 12% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 10% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 11% |
| all | 38% |
| load | 37% |
| store | 63% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 51% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 31% |
| all | 26% |
| load | 23% |
| store | 38% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 39% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 22% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV 0x190(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVSS %XMM2,(%RAX,%RCX,4) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| MOV 0x138(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x248(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| ADD %RAX,0x340(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 | scal (12.5%) |
| ADD %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | N/A |
| MOV 0x258(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 75ea <main+0x3d6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| AND $-0x40,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| CMP $0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| JAE 6264 <main+0x29e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x2dad(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| MOV 0x340(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 635e <main+0x2ade> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x40,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV 0x340(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JAE 6281 <main+0x2a01> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VMOVSS 0x2d88(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (6.3%) |
| JMP 631a <main+0x2a9a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffc0,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VBROADCASTSS 0x2d67(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 | scal (6.3%) |
| VMOVAPS %ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMOVAPS %ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM3,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VMAXPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VMAXPS %YMM1,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| CMP %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JE 636b <main+0x2aeb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0x38,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 635e <main+0x2ade> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x38,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMAXPS %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VMAXSS %XMM1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
| JMP 6366 <main+0x2ae6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 | N/A |
| CMP $0x7,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| MOV %R13,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %RDI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS %XMM1,0x320(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV %RSI,0x258(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV %R8,0x3a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| JA 63a2 <main+0x2b22> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 70ae <main+0x382e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x40,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 63b3 <main+0x2b33> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 6ee1 <main+0x3661> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R9,0x3f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %R13,0x338(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x1940(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVAPS %ZMM0,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VMOVAPS %ZMM0,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VMOVAPS %ZMM0,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VXORPS %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| MOV %R10,0x3f8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVAPS 0x500(%RSP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 | vect (100.0%) |
| VADDPS 0x480(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.65 | vect (100.0%) |
| VADDPS 0x4c0(%RSP),%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.65 | vect (100.0%) |
| VADDPS %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (50.0%) |
| VADDPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (100.0%) |
| VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x338(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,0x138(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JNE 6eb9 <main+0x3639> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JMP 70eb <main+0x386b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| TEST $0x38,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x3f0(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JE 70ae <main+0x382e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
| VMOVSS %XMM2,%XMM0,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| ADD $0x38,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x338(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VBROADCASTSS %XMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (50.0%) |
| VEXTRACTF128 $0x1,%YMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (25.0%) |
| VADDPS %XMM0,%XMM2,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VADDPS %XMM1,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
| VMOVSHDUP %XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (12.5%) |
| VADDSS %XMM1,%XMM0,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x338(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 70eb <main+0x386b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| CMP $0x3,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS %XMM2,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| JA 7109 <main+0x3889> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x258(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| JMP 7524 <main+0x3ca4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| CMP $0x10,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 7116 <main+0x3896> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| JMP 7406 <main+0x3b86> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
| MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x30(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| VBROADCASTSS %XMM1,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %ZMM0,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 | vect (100.0%) |
| XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 7571 <main+0x3cf1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| TEST $0xc,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 | N/A |
| JE 7524 <main+0x3ca4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV %R13,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| MOV %RBX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND $-0x4,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOV $0x7fffffffffffffc0,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| LEA 0x3c(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VBROADCASTSS %XMM2,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| VBROADCASTSS %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
| VMOVAPS %XMM0,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| MOV 0x138(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %RBX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| VMOVAPS 0x320(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| JE 7571 <main+0x3cf1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x3a8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| LEA 0x1(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x98(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| CMP %R13,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| JAE 61f4 <main+0x2974> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
| MOV 0x428(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| SUB %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| SAL $0x2,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| MOV $0x3fffffffc,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 | N/A |
| AND %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| ADD $0x4,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| MOV 0x420(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
| ADD $0x4,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | N/A |
| AND %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | N/A |
| ADD 0x78(%RSP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 | N/A |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| CALL 1090 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | N/A |
| VMOVAPS 0x2c0(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 | vect (25.0%) |
| MOV 0x3a8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | N/A |
| JMP 61f4 <main+0x2974> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 | N/A |
