Loop Id: 181 | Module: exec | Source: generate_chunk.cpp:85-128 [...] | Coverage: 0.01% |
---|
Loop Id: 181 | Module: exec | Source: generate_chunk.cpp:85-128 [...] | Coverage: 0.01% |
---|
0x4295c0 VPADDQ %ZMM24,%ZMM19,%ZMM19 |
0x4295c6 ADD $0x8,%R13 |
0x4295ca MOV -0x60(%RBP),%RAX |
0x4295ce CMP %RAX,%R13 |
0x4295d1 JAE 42a287 |
0x4295d7 VPADDQ %ZMM19,%ZMM18,%ZMM17 |
0x4295dd VMOVDQA64 %ZMM17,%ZMM0 |
0x4295e3 VMOVDQA64 %ZMM16,%ZMM1 |
0x4295e9 MOV $0x451210,%RAX |
0x4295f0 CALL %RAX |
0x4295f2 VPXOR %XMM1,%XMM1,%XMM1 |
0x4295f6 VPMULLQ %ZMM16,%ZMM0,%ZMM1 |
0x4295fc VPSUBQ %ZMM1,%ZMM17,%ZMM1 |
0x429602 MOV -0xa0(%RBP),%RAX |
0x429609 VBROADCASTSD (%RAX,%RBX,8),%ZMM3 |
0x429610 MOV -0x98(%RBP),%RAX |
0x429617 VBROADCASTSD (%RAX,%RBX,8),%ZMM2 |
0x42961e CMP $0x2,%R12D |
0x429622 JGE 429810 |
0x429628 CMP $0x1,%R12D |
0x42962c JNE 4295c0 |
0x42962e MOV -0x58(%RBP),%RAX |
0x429632 MOV (%RAX),%RSI |
0x429635 MOV 0x258(%RSI),%RAX |
0x42963c VPMOVQD %ZMM1,%YMM4 |
0x429642 VPCMPEQD %YMM5,%YMM5,%YMM5 |
0x429646 VPSUBD %YMM5,%YMM4,%YMM4 |
0x42964a KXNORW %K0,%K0,%K1 |
0x42964e VPXOR %XMM5,%XMM5,%XMM5 |
0x429652 VGATHERDPD (%RAX,%YMM4,8),%ZMM5{%K1} |
0x429659 VCMPPD $0x2,%ZMM5,%ZMM3,%K1 |
0x429660 KORTESTB %K1,%K1 |
0x429664 JE 4295c0 |
0x42966a KMOVQ %K1,%K2 |
0x42966f VXORPD %XMM3,%XMM3,%XMM3 |
0x429673 VGATHERQPD (%RAX,%ZMM1,8),%ZMM3{%K2} |
0x42967a MOV -0x88(%RBP),%RAX |
0x429681 MOV 0x8(%RAX),%RAX |
0x429685 VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM3,%K1{%K1} |
0x42968d KORTESTB %K1,%K1 |
0x429691 JE 4295c0 |
0x429697 MOV 0x278(%RSI),%RAX |
0x42969e VPSLLQ $0x20,%ZMM0,%ZMM0 |
0x4296a5 VPADDQ %ZMM20,%ZMM0,%ZMM3 |
0x4296ab VPSRAQ $0x20,%ZMM3,%ZMM4 |
0x4296b2 KMOVQ %K1,%K2 |
0x4296b7 VPXOR %XMM3,%XMM3,%XMM3 |
0x4296bb VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} |
0x4296c2 VCMPPD $0x2,%ZMM3,%ZMM2,%K1{%K1} |
0x4296c9 KORTESTB %K1,%K1 |
0x4296cd JE 4295c0 |
0x4296d3 VPSRAQ $0x20,%ZMM0,%ZMM0 |
0x4296da KMOVQ %K1,%K2 |
0x4296df VXORPD %XMM2,%XMM2,%XMM2 |
0x4296e3 VGATHERQPD (%RAX,%ZMM0,8),%ZMM2{%K2} |
0x4296ea MOV -0x80(%RBP),%RAX |
0x4296ee MOV 0x8(%RAX),%RAX |
0x4296f2 VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM2,%K1{%K1} |
0x4296fa KORTESTB %K1,%K1 |
0x4296fe JE 4295c0 |
0x429704 VPXOR %XMM2,%XMM2,%XMM2 |
0x429708 VPMULLQ 0x30(%RSI){1to8},%ZMM0,%ZMM2 |
0x42970f MOV -0x40(%RBP),%RAX |
0x429713 MOV 0x8(%RAX),%RAX |
0x429717 MOV 0x40(%RSI),%RCX |
0x42971b VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x429721 VBROADCASTSD (%RAX,%RBX,8),%ZMM3 |
0x429728 KMOVQ %K1,%K2 |
0x42972d VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} |
0x429734 VPXOR %XMM2,%XMM2,%XMM2 |
0x429738 VPMULLQ (%RSI){1to8},%ZMM0,%ZMM2 |
0x42973e MOV -0x38(%RBP),%RAX |
0x429742 MOV 0x8(%RAX),%RAX |
0x429746 MOV 0x10(%RSI),%RCX |
0x42974a VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x429750 VBROADCASTSD (%RAX,%RBX,8),%ZMM3 |
0x429757 KMOVQ %K1,%K2 |
0x42975c VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} |
0x429763 MOV -0x48(%RBP),%RAX |
0x429767 MOV 0x8(%RAX),%RAX |
0x42976b MOV 0xa8(%RSI),%R9 |
0x429772 MOV 0xb8(%RSI),%RCX |
0x429779 VPBROADCASTQ %R9,%ZMM3 |
0x42977f MOV -0x50(%RBP),%RDX |
0x429783 MOV 0x8(%RDX),%R8 |
0x429787 MOV 0xe8(%RSI),%RDX |
0x42978e MOV 0xd8(%RSI),%R10 |
0x429795 VPBROADCASTQ %R10,%ZMM2 |
0x42979b VPSUBQ %ZMM21,%ZMM4,%ZMM4 |
0x4297a1 VPSUBQ %ZMM21,%ZMM0,%ZMM5 |
0x4297a7 VPMAXSQ %ZMM5,%ZMM4,%ZMM4 |
0x4297ad VPSUBQ %ZMM0,%ZMM4,%ZMM4 |
0x4297b3 VPCMPNLEUQ %ZMM22,%ZMM4,%K0 |
0x4297ba KANDB %K0,%K1,%K2 |
0x4297be KORTESTB %K2,%K2 |
0x4297c2 JE 42a1dc |
0x4297c8 VPMULLQ %ZMM0,%ZMM3,%ZMM6 |
0x4297ce VPSRLQ $0x1,%ZMM4,%ZMM5 |
0x4297d5 VPADDQ %ZMM21,%ZMM5,%ZMM5 |
0x4297db ADD %R9,%R9 |
0x4297de VPMULLQ %ZMM0,%ZMM2,%ZMM7 |
0x4297e4 ADD %R10,%R10 |
0x4297e7 VPADDQ %ZMM6,%ZMM1,%ZMM6 |
0x4297ed VPADDQ %ZMM7,%ZMM1,%ZMM7 |
0x4297f3 VPADDQ %ZMM3,%ZMM6,%ZMM8 |
0x4297f9 VPADDQ %ZMM2,%ZMM7,%ZMM9 |
0x4297ff XOR %ESI,%ESI |
0x429801 JMP 429d56 |
0x429810 JE 429d6c |
0x429816 CMP $0x3,%R12D |
0x42981a JNE 4295c0 |
0x429820 MOV -0x58(%RBP),%RAX |
0x429824 MOV (%RAX),%RCX |
0x429827 MOV 0x258(%RCX),%RAX |
0x42982e KXNORW %K0,%K0,%K1 |
0x429832 VXORPD %XMM4,%XMM4,%XMM4 |
0x429836 VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} |
0x42983d VCMPPD $0,%ZMM3,%ZMM4,%K1 |
0x429844 KORTESTB %K1,%K1 |
0x429848 JE 4295c0 |
0x42984e MOV 0x278(%RCX),%RAX |
0x429855 VPSLLQ $0x20,%ZMM0,%ZMM0 |
0x42985c VPSRAQ $0x20,%ZMM0,%ZMM4 |
0x429863 KMOVQ %K1,%K2 |
0x429868 VXORPD %XMM3,%XMM3,%XMM3 |
0x42986c VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} |
0x429873 VCMPPD $0,%ZMM2,%ZMM3,%K0 |
0x42987a KANDB %K0,%K1,%K1 |
0x42987e KORTESTB %K1,%K1 |
0x429882 JE 4295c0 |
0x429888 VPXOR %XMM2,%XMM2,%XMM2 |
0x42988c VPMULLQ 0x30(%RCX){1to8},%ZMM4,%ZMM2 |
0x429893 MOV -0x40(%RBP),%RAX |
0x429897 MOV 0x8(%RAX),%RAX |
0x42989b MOV 0x40(%RCX),%RDX |
0x42989f VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x4298a5 VBROADCASTSD (%RAX,%RBX,8),%ZMM3 |
0x4298ac KMOVQ %K1,%K2 |
0x4298b1 VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} |
0x4298b8 VPXOR %XMM2,%XMM2,%XMM2 |
0x4298bc VPMULLQ (%RCX){1to8},%ZMM4,%ZMM2 |
0x4298c2 MOV -0x38(%RBP),%RAX |
0x4298c6 MOV 0x8(%RAX),%RAX |
0x4298ca MOV 0x10(%RCX),%RDX |
0x4298ce VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x4298d4 VBROADCASTSD (%RAX,%RBX,8),%ZMM3 |
0x4298db KMOVQ %K1,%K2 |
0x4298e0 VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} |
0x4298e7 MOV -0x48(%RBP),%RAX |
0x4298eb MOV 0x8(%RAX),%RAX |
0x4298ef MOV 0xa8(%RCX),%R15 |
0x4298f6 MOV 0xb8(%RCX),%RDX |
0x4298fd VPBROADCASTQ %R15,%ZMM5 |
0x429903 MOV -0x50(%RBP),%RSI |
0x429907 MOV 0x8(%RSI),%R8 |
0x42990b MOV 0xe8(%RCX),%R9 |
0x429912 MOV 0xd8(%RCX),%RSI |
0x429919 VPBROADCASTQ %RSI,%ZMM6 |
0x42991f VPADDQ %ZMM20,%ZMM0,%ZMM0 |
0x429925 VPSRAQ $0x20,%ZMM0,%ZMM0 |
0x42992c VPSUBQ %ZMM21,%ZMM0,%ZMM0 |
0x429932 VPSUBQ %ZMM21,%ZMM4,%ZMM2 |
0x429938 VPMAXSQ %ZMM2,%ZMM0,%ZMM0 |
0x42993e VMOVDQA64 %ZMM4,%ZMM2 |
0x429944 VPTERNLOGQ $0xf,%ZMM4,%ZMM4,%ZMM2 |
0x42994b VPADDQ %ZMM2,%ZMM0,%ZMM0 |
0x429951 VPMOVQD %ZMM1,%YMM2 |
0x429957 VPCMPEQD %YMM3,%YMM3,%YMM3 |
0x42995b VPSUBD %YMM3,%YMM2,%YMM3 |
0x42995f VPMAXSD %YMM2,%YMM3,%YMM3 |
0x429964 VPSUBD %YMM2,%YMM3,%YMM2 |
0x429968 VPMOVZXDQ %YMM2,%ZMM3 |
0x42996e VPMULLQ %ZMM4,%ZMM5,%ZMM5 |
0x429974 VPSUBQ %ZMM21,%ZMM3,%ZMM7 |
0x42997a VPSRLQ $0x3,%ZMM7,%ZMM8 |
0x429981 VPMULLQ %ZMM4,%ZMM6,%ZMM6 |
0x429987 VPCMPNLEUD %YMM25,%YMM2,%K2 |
0x42998e KMOVW %K2,-0x2a(%RBP) |
0x429993 VPADDQ %ZMM21,%ZMM8,%ZMM4 |
0x429999 VPADDQ %ZMM5,%ZMM1,%ZMM5 |
0x42999f VPADDQ %ZMM6,%ZMM1,%ZMM1 |
0x4299a5 VPANDQ %ZMM26,%ZMM7,%ZMM6 |
0x4299ab VPCMPLEUQ %ZMM3,%ZMM6,%K3 |
0x4299b2 XOR %R11D,%R11D |
0x4299b5 JMP 4299dd |
(185) 0x4299c0 VPBROADCASTQ %R11,%ZMM7 |
(185) 0x4299c6 INC %R11 |
(185) 0x4299c9 VPCMPEQQ %ZMM0,%ZMM7,%K4 |
(185) 0x4299cf KANDNB %K0,%K4,%K0 |
(185) 0x4299d3 KTESTB %K1,%K0 |
(185) 0x4299d7 JE 4295c0 |
(185) 0x4299dd KANDB %K0,%K1,%K4 |
(185) 0x4299e1 KORTESTB %K4,%K4 |
(185) 0x4299e5 JE 429c60 |
(185) 0x4299eb VPCMPNLEUD %YMM25,%YMM2,%K5{%K4} |
(185) 0x4299f2 KORTESTB %K5,%K5 |
(185) 0x4299f6 JE 429bb0 |
(185) 0x4299fc MOV %R15,%RCX |
(185) 0x4299ff IMUL %R11,%RCX |
(185) 0x429a03 MOV %RSI,%R10 |
(185) 0x429a06 IMUL %R11,%R10 |
(185) 0x429a0a XOR %EDI,%EDI |
(185) 0x429a0c KMOVW -0x2a(%RBP),%K6 |
(185) 0x429a11 JMP 429b92 |
(187) 0x429a20 LEA (%RCX,%RDI,8),%R14 |
(187) 0x429a24 VPBROADCASTQ %R14,%ZMM7 |
(187) 0x429a2a VPADDQ %ZMM7,%ZMM5,%ZMM7 |
(187) 0x429a30 VBROADCASTSD (%RAX,%RBX,8),%ZMM8 |
(187) 0x429a37 KMOVQ %K2,%K7 |
(187) 0x429a3c VSCATTERQPD %ZMM8,(%RDX,%ZMM7,8){%K7} |
(187) 0x429a43 LEA (%R10,%RDI,8),%R14 |
(187) 0x429a47 VPBROADCASTQ %R14,%ZMM8 |
(187) 0x429a4d VPADDQ %ZMM8,%ZMM1,%ZMM8 |
(187) 0x429a53 VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429a5a KMOVQ %K2,%K7 |
(187) 0x429a5f VSCATTERQPD %ZMM9,(%R9,%ZMM8,8){%K7} |
(187) 0x429a66 VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429a6d KMOVQ %K2,%K7 |
(187) 0x429a72 VSCATTERQPD %ZMM9,0x8(%RDX,%ZMM7,8){%K7} |
(187) 0x429a7a VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429a81 KMOVQ %K2,%K7 |
(187) 0x429a86 VSCATTERQPD %ZMM9,0x8(%R9,%ZMM8,8){%K7} |
(187) 0x429a8e VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429a95 KMOVQ %K2,%K7 |
(187) 0x429a9a VSCATTERQPD %ZMM9,0x10(%RDX,%ZMM7,8){%K7} |
(187) 0x429aa2 VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429aa9 KMOVQ %K2,%K7 |
(187) 0x429aae VSCATTERQPD %ZMM9,0x10(%R9,%ZMM8,8){%K7} |
(187) 0x429ab6 VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429abd KMOVQ %K2,%K7 |
(187) 0x429ac2 VSCATTERQPD %ZMM9,0x18(%RDX,%ZMM7,8){%K7} |
(187) 0x429aca VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429ad1 KMOVQ %K2,%K7 |
(187) 0x429ad6 VSCATTERQPD %ZMM9,0x18(%R9,%ZMM8,8){%K7} |
(187) 0x429ade VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429ae5 KMOVQ %K2,%K7 |
(187) 0x429aea VSCATTERQPD %ZMM9,0x20(%RDX,%ZMM7,8){%K7} |
(187) 0x429af2 VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429af9 KMOVQ %K2,%K7 |
(187) 0x429afe VSCATTERQPD %ZMM9,0x20(%R9,%ZMM8,8){%K7} |
(187) 0x429b06 VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429b0d KMOVQ %K2,%K7 |
(187) 0x429b12 VSCATTERQPD %ZMM9,0x28(%RDX,%ZMM7,8){%K7} |
(187) 0x429b1a VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429b21 KMOVQ %K2,%K7 |
(187) 0x429b26 VSCATTERQPD %ZMM9,0x28(%R9,%ZMM8,8){%K7} |
(187) 0x429b2e VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429b35 KMOVQ %K2,%K7 |
(187) 0x429b3a VSCATTERQPD %ZMM9,0x30(%RDX,%ZMM7,8){%K7} |
(187) 0x429b42 VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(187) 0x429b49 KMOVQ %K2,%K7 |
(187) 0x429b4e VSCATTERQPD %ZMM9,0x30(%R9,%ZMM8,8){%K7} |
(187) 0x429b56 VBROADCASTSD (%RAX,%RBX,8),%ZMM9 |
(187) 0x429b5d KMOVQ %K2,%K7 |
(187) 0x429b62 VSCATTERQPD %ZMM9,0x38(%RDX,%ZMM7,8){%K7} |
(187) 0x429b6a VBROADCASTSD (%R8,%RBX,8),%ZMM7 |
(187) 0x429b71 VSCATTERQPD %ZMM7,0x38(%R9,%ZMM8,8){%K2} |
(187) 0x429b79 VPBROADCASTQ %RDI,%ZMM7 |
(187) 0x429b7f INC %RDI |
(187) 0x429b82 VPCMPEQQ %ZMM4,%ZMM7,%K7 |
(187) 0x429b88 KANDNB %K6,%K7,%K6 |
(187) 0x429b8c KTESTB %K5,%K6 |
(187) 0x429b90 JE 429bb0 |
(187) 0x429b92 KANDB %K6,%K5,%K2 |
(187) 0x429b96 KORTESTB %K2,%K2 |
(187) 0x429b9a JNE 429a20 |
(187) 0x429ba0 KXORW %K0,%K0,%K7 |
(187) 0x429ba4 XOR %EDI,%EDI |
(187) 0x429ba6 JMP 429b88 |
(185) 0x429bb0 VPCMPLEUQ %ZMM3,%ZMM6,%K4{%K4} |
(185) 0x429bb7 KORTESTB %K4,%K4 |
(185) 0x429bbb JE 4299c0 |
(185) 0x429bc1 MOV %R15,%RCX |
(185) 0x429bc4 IMUL %R11,%RCX |
(185) 0x429bc8 VPBROADCASTQ %RCX,%ZMM7 |
(185) 0x429bce MOV %RSI,%RCX |
(185) 0x429bd1 IMUL %R11,%RCX |
(185) 0x429bd5 VPBROADCASTQ %RCX,%ZMM8 |
(185) 0x429bdb VMOVDQA64 %ZMM6,%ZMM9 |
(185) 0x429be1 KMOVQ %K3,%K5 |
(185) 0x429be6 JMP 429c43 |
(186) 0x429bf0 VPADDQ %ZMM9,%ZMM7,%ZMM10 |
(186) 0x429bf6 VPADDQ %ZMM10,%ZMM5,%ZMM10 |
(186) 0x429bfc VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(186) 0x429c03 KMOVQ %K6,%K2 |
(186) 0x429c08 VSCATTERQPD %ZMM11,(%RDX,%ZMM10,8){%K2} |
(186) 0x429c0f VPADDQ %ZMM9,%ZMM8,%ZMM10 |
(186) 0x429c15 VPADDQ %ZMM10,%ZMM1,%ZMM10 |
(186) 0x429c1b VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(186) 0x429c22 VSCATTERQPD %ZMM11,(%R9,%ZMM10,8){%K6} |
(186) 0x429c29 VPCMPEQQ %ZMM3,%ZMM9,%K6 |
(186) 0x429c2f VPSUBQ %ZMM21,%ZMM9,%ZMM9 |
(186) 0x429c35 KANDNB %K5,%K6,%K5 |
(186) 0x429c39 KTESTB %K4,%K5 |
(186) 0x429c3d JE 4299c0 |
(186) 0x429c43 KANDB %K5,%K4,%K6 |
(186) 0x429c47 KORTESTB %K6,%K6 |
(186) 0x429c4b JNE 429bf0 |
(186) 0x429c4d KXORW %K0,%K0,%K6 |
(186) 0x429c51 VPXOR %XMM9,%XMM9,%XMM9 |
(186) 0x429c56 JMP 429c35 |
(185) 0x429c60 KXORW %K0,%K0,%K4 |
(185) 0x429c64 XOR %R11D,%R11D |
(185) 0x429c67 JMP 4299cf |
(188) 0x429c70 MOV %R9,%RDI |
(188) 0x429c73 IMUL %RSI,%RDI |
(188) 0x429c77 VPBROADCASTQ %RDI,%ZMM10 |
(188) 0x429c7d VPADDQ %ZMM10,%ZMM6,%ZMM11 |
(188) 0x429c83 VBROADCASTSD (%RAX,%RBX,8),%ZMM12 |
(188) 0x429c8a KMOVQ %K3,%K4 |
(188) 0x429c8f VSCATTERQPD %ZMM12,(%RCX,%ZMM11,8){%K4} |
(188) 0x429c96 MOV %R10,%RDI |
(188) 0x429c99 IMUL %RSI,%RDI |
(188) 0x429c9d VPBROADCASTQ %RDI,%ZMM12 |
(188) 0x429ca3 VPADDQ %ZMM12,%ZMM7,%ZMM13 |
(188) 0x429ca9 VBROADCASTSD (%R8,%RBX,8),%ZMM14 |
(188) 0x429cb0 KMOVQ %K3,%K4 |
(188) 0x429cb5 VSCATTERQPD %ZMM14,(%RDX,%ZMM13,8){%K4} |
(188) 0x429cbc VBROADCASTSD (%RAX,%RBX,8),%ZMM14 |
(188) 0x429cc3 KMOVQ %K3,%K4 |
(188) 0x429cc8 VSCATTERQPD %ZMM14,0x8(%RCX,%ZMM11,8){%K4} |
(188) 0x429cd0 VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(188) 0x429cd7 KMOVQ %K3,%K4 |
(188) 0x429cdc VSCATTERQPD %ZMM11,0x8(%RDX,%ZMM13,8){%K4} |
(188) 0x429ce4 VPADDQ %ZMM10,%ZMM8,%ZMM10 |
(188) 0x429cea VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(188) 0x429cf1 KMOVQ %K3,%K4 |
(188) 0x429cf6 VSCATTERQPD %ZMM11,(%RCX,%ZMM10,8){%K4} |
(188) 0x429cfd VPADDQ %ZMM12,%ZMM9,%ZMM11 |
(188) 0x429d03 VBROADCASTSD (%R8,%RBX,8),%ZMM12 |
(188) 0x429d0a KMOVQ %K3,%K4 |
(188) 0x429d0f VSCATTERQPD %ZMM12,(%RDX,%ZMM11,8){%K4} |
(188) 0x429d16 VBROADCASTSD (%RAX,%RBX,8),%ZMM12 |
(188) 0x429d1d KMOVQ %K3,%K4 |
(188) 0x429d22 VSCATTERQPD %ZMM12,0x8(%RCX,%ZMM10,8){%K4} |
(188) 0x429d2a VBROADCASTSD (%R8,%RBX,8),%ZMM10 |
(188) 0x429d31 VSCATTERQPD %ZMM10,0x8(%RDX,%ZMM11,8){%K3} |
(188) 0x429d39 VPBROADCASTQ %RSI,%ZMM10 |
(188) 0x429d3f INC %RSI |
(188) 0x429d42 VPCMPEQQ %ZMM5,%ZMM10,%K3 |
(188) 0x429d48 KANDNB %K0,%K3,%K0 |
(188) 0x429d4c KTESTB %K2,%K0 |
(188) 0x429d50 JE 42a1dc |
(188) 0x429d56 KANDB %K0,%K2,%K3 |
(188) 0x429d5a KORTESTB %K3,%K3 |
(188) 0x429d5e JNE 429c70 |
(188) 0x429d64 KXORW %K0,%K0,%K3 |
(188) 0x429d68 XOR %ESI,%ESI |
(188) 0x429d6a JMP 429d48 |
0x429d6c MOV -0x58(%RBP),%RAX |
0x429d70 MOV (%RAX),%RSI |
0x429d73 MOV 0x218(%RSI),%RAX |
0x429d7a KXNORW %K0,%K0,%K1 |
0x429d7e VXORPD %XMM4,%XMM4,%XMM4 |
0x429d82 VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} |
0x429d89 MOV 0x238(%RSI),%RAX |
0x429d90 VSUBPD %ZMM3,%ZMM4,%ZMM4 |
0x429d96 VPSLLQ $0x20,%ZMM0,%ZMM3 |
0x429d9d VPSRAQ $0x20,%ZMM3,%ZMM6 |
0x429da4 KXNORW %K0,%K0,%K1 |
0x429da8 VPXOR %XMM0,%XMM0,%XMM0 |
0x429dac VGATHERQPD (%RAX,%ZMM6,8),%ZMM0{%K1} |
0x429db3 VMULPD %ZMM4,%ZMM4,%ZMM4 |
0x429db9 VSUBPD %ZMM2,%ZMM0,%ZMM0 |
0x429dbf VFMADD213PD %ZMM4,%ZMM0,%ZMM0 |
0x429dc5 VSQRTPD %ZMM0,%ZMM0 |
0x429dcb MOV -0x90(%RBP),%RAX |
0x429dd2 MOV 0x8(%RAX),%RAX |
0x429dd6 VBROADCASTSD (%RAX,%RBX,8),%ZMM2 |
0x429ddd VCMPPD $0x2,%ZMM2,%ZMM0,%K1 |
0x429de4 KORTESTB %K1,%K1 |
0x429de8 JE 4295c0 |
0x429dee VPXOR %XMM4,%XMM4,%XMM4 |
0x429df2 VPMULLQ 0x30(%RSI){1to8},%ZMM6,%ZMM4 |
0x429df9 VCMPPD $0x2,%ZMM2,%ZMM0,%K2 |
0x429e00 MOV -0x40(%RBP),%RAX |
0x429e04 MOV 0x8(%RAX),%RAX |
0x429e08 MOV 0x40(%RSI),%RCX |
0x429e0c VPADDQ %ZMM4,%ZMM1,%ZMM4 |
0x429e12 VBROADCASTSD (%RAX,%RBX,8),%ZMM5 |
0x429e19 KMOVQ %K2,%K3 |
0x429e1e VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} |
0x429e25 VPXOR %XMM4,%XMM4,%XMM4 |
0x429e29 VPMULLQ (%RSI){1to8},%ZMM6,%ZMM4 |
0x429e2f MOV -0x38(%RBP),%RAX |
0x429e33 MOV 0x8(%RAX),%RAX |
0x429e37 MOV 0x10(%RSI),%RCX |
0x429e3b VPADDQ %ZMM4,%ZMM1,%ZMM4 |
0x429e41 VBROADCASTSD (%RAX,%RBX,8),%ZMM5 |
0x429e48 KMOVQ %K2,%K3 |
0x429e4d VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} |
0x429e54 MOV -0x48(%RBP),%RAX |
0x429e58 MOV 0x8(%RAX),%RAX |
0x429e5c MOV 0xa8(%RSI),%RCX |
0x429e63 MOV 0xb8(%RSI),%RDX |
0x429e6a VPBROADCASTQ %RCX,%ZMM7 |
0x429e70 MOV -0x50(%RBP),%RDI |
0x429e74 MOV 0x8(%RDI),%R8 |
0x429e78 MOV 0xe8(%RSI),%R9 |
0x429e7f MOV 0xd8(%RSI),%R10 |
0x429e86 VPBROADCASTQ %R10,%ZMM8 |
0x429e8c VPADDQ %ZMM20,%ZMM3,%ZMM3 |
0x429e92 VPSRAQ $0x20,%ZMM3,%ZMM3 |
0x429e99 VPSUBQ %ZMM21,%ZMM3,%ZMM3 |
0x429e9f VPSUBQ %ZMM21,%ZMM6,%ZMM4 |
0x429ea5 VPMAXSQ %ZMM4,%ZMM3,%ZMM3 |
0x429eab VMOVDQA64 %ZMM6,%ZMM4 |
0x429eb1 VPTERNLOGQ $0xf,%ZMM6,%ZMM6,%ZMM4 |
0x429eb8 VPADDQ %ZMM4,%ZMM3,%ZMM3 |
0x429ebe VPMOVQD %ZMM1,%YMM4 |
0x429ec4 VPCMPEQD %YMM5,%YMM5,%YMM5 |
0x429ec8 VPSUBD %YMM5,%YMM4,%YMM5 |
0x429ecc VPMAXSD %YMM4,%YMM5,%YMM5 |
0x429ed1 VPSUBD %YMM4,%YMM5,%YMM4 |
0x429ed5 VPMOVZXDQ %YMM4,%ZMM5 |
0x429edb VPMULLQ %ZMM6,%ZMM7,%ZMM7 |
0x429ee1 VPSUBQ %ZMM21,%ZMM5,%ZMM9 |
0x429ee7 VPSRLQ $0x3,%ZMM9,%ZMM10 |
0x429eee VPMULLQ %ZMM6,%ZMM8,%ZMM8 |
0x429ef4 VPCMPNLEUD %YMM25,%YMM4,%K0 |
0x429efb KMOVW %K0,-0x2a(%RBP) |
0x429f00 VPADDQ %ZMM21,%ZMM10,%ZMM6 |
0x429f06 VPADDQ %ZMM7,%ZMM1,%ZMM7 |
0x429f0c VPADDQ %ZMM8,%ZMM1,%ZMM1 |
0x429f12 VPANDQ %ZMM26,%ZMM9,%ZMM8 |
0x429f18 VPCMPLEUQ %ZMM5,%ZMM8,%K0 |
0x429f1f XOR %R11D,%R11D |
0x429f22 JMP 429f4d |
(182) 0x429f30 VPBROADCASTQ %R11,%ZMM9 |
(182) 0x429f36 INC %R11 |
(182) 0x429f39 VPCMPEQQ %ZMM3,%ZMM9,%K4 |
(182) 0x429f3f KANDNB %K1,%K4,%K1 |
(182) 0x429f43 KTESTB %K2,%K1 |
(182) 0x429f47 JE 4295c0 |
(182) 0x429f4d VCMPPD $0x2,%ZMM2,%ZMM0,%K4{%K1} |
(182) 0x429f54 KORTESTB %K4,%K4 |
(182) 0x429f58 JE 42a1d0 |
(182) 0x429f5e VPCMPNLEUD %YMM25,%YMM4,%K5{%K4} |
(182) 0x429f65 KORTESTB %K5,%K5 |
(182) 0x429f69 JE 42a120 |
(182) 0x429f6f MOV %RCX,%R15 |
(182) 0x429f72 IMUL %R11,%R15 |
(182) 0x429f76 MOV %R10,%RSI |
(182) 0x429f79 IMUL %R11,%RSI |
(182) 0x429f7d XOR %EDI,%EDI |
(182) 0x429f7f KMOVW -0x2a(%RBP),%K6 |
(182) 0x429f84 JMP 42a102 |
(184) 0x429f90 LEA (%R15,%RDI,8),%R14 |
(184) 0x429f94 VPBROADCASTQ %R14,%ZMM9 |
(184) 0x429f9a VPADDQ %ZMM9,%ZMM7,%ZMM9 |
(184) 0x429fa0 VBROADCASTSD (%RAX,%RBX,8),%ZMM10 |
(184) 0x429fa7 KMOVQ %K3,%K7 |
(184) 0x429fac VSCATTERQPD %ZMM10,(%RDX,%ZMM9,8){%K7} |
(184) 0x429fb3 LEA (%RSI,%RDI,8),%R14 |
(184) 0x429fb7 VPBROADCASTQ %R14,%ZMM10 |
(184) 0x429fbd VPADDQ %ZMM10,%ZMM1,%ZMM10 |
(184) 0x429fc3 VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x429fca KMOVQ %K3,%K7 |
(184) 0x429fcf VSCATTERQPD %ZMM11,(%R9,%ZMM10,8){%K7} |
(184) 0x429fd6 VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x429fdd KMOVQ %K3,%K7 |
(184) 0x429fe2 VSCATTERQPD %ZMM11,0x8(%RDX,%ZMM9,8){%K7} |
(184) 0x429fea VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x429ff1 KMOVQ %K3,%K7 |
(184) 0x429ff6 VSCATTERQPD %ZMM11,0x8(%R9,%ZMM10,8){%K7} |
(184) 0x429ffe VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a005 KMOVQ %K3,%K7 |
(184) 0x42a00a VSCATTERQPD %ZMM11,0x10(%RDX,%ZMM9,8){%K7} |
(184) 0x42a012 VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x42a019 KMOVQ %K3,%K7 |
(184) 0x42a01e VSCATTERQPD %ZMM11,0x10(%R9,%ZMM10,8){%K7} |
(184) 0x42a026 VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a02d KMOVQ %K3,%K7 |
(184) 0x42a032 VSCATTERQPD %ZMM11,0x18(%RDX,%ZMM9,8){%K7} |
(184) 0x42a03a VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x42a041 KMOVQ %K3,%K7 |
(184) 0x42a046 VSCATTERQPD %ZMM11,0x18(%R9,%ZMM10,8){%K7} |
(184) 0x42a04e VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a055 KMOVQ %K3,%K7 |
(184) 0x42a05a VSCATTERQPD %ZMM11,0x20(%RDX,%ZMM9,8){%K7} |
(184) 0x42a062 VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x42a069 KMOVQ %K3,%K7 |
(184) 0x42a06e VSCATTERQPD %ZMM11,0x20(%R9,%ZMM10,8){%K7} |
(184) 0x42a076 VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a07d KMOVQ %K3,%K7 |
(184) 0x42a082 VSCATTERQPD %ZMM11,0x28(%RDX,%ZMM9,8){%K7} |
(184) 0x42a08a VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x42a091 KMOVQ %K3,%K7 |
(184) 0x42a096 VSCATTERQPD %ZMM11,0x28(%R9,%ZMM10,8){%K7} |
(184) 0x42a09e VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a0a5 KMOVQ %K3,%K7 |
(184) 0x42a0aa VSCATTERQPD %ZMM11,0x30(%RDX,%ZMM9,8){%K7} |
(184) 0x42a0b2 VBROADCASTSD (%R8,%RBX,8),%ZMM11 |
(184) 0x42a0b9 KMOVQ %K3,%K7 |
(184) 0x42a0be VSCATTERQPD %ZMM11,0x30(%R9,%ZMM10,8){%K7} |
(184) 0x42a0c6 VBROADCASTSD (%RAX,%RBX,8),%ZMM11 |
(184) 0x42a0cd KMOVQ %K3,%K7 |
(184) 0x42a0d2 VSCATTERQPD %ZMM11,0x38(%RDX,%ZMM9,8){%K7} |
(184) 0x42a0da VBROADCASTSD (%R8,%RBX,8),%ZMM9 |
(184) 0x42a0e1 VSCATTERQPD %ZMM9,0x38(%R9,%ZMM10,8){%K3} |
(184) 0x42a0e9 VPBROADCASTQ %RDI,%ZMM9 |
(184) 0x42a0ef INC %RDI |
(184) 0x42a0f2 VPCMPEQQ %ZMM6,%ZMM9,%K7 |
(184) 0x42a0f8 KANDNB %K6,%K7,%K6 |
(184) 0x42a0fc KTESTB %K5,%K6 |
(184) 0x42a100 JE 42a120 |
(184) 0x42a102 KANDB %K6,%K5,%K3 |
(184) 0x42a106 KORTESTB %K3,%K3 |
(184) 0x42a10a JNE 429f90 |
(184) 0x42a110 KXORW %K0,%K0,%K7 |
(184) 0x42a114 XOR %EDI,%EDI |
(184) 0x42a116 JMP 42a0f8 |
(182) 0x42a120 VPCMPLEUQ %ZMM5,%ZMM8,%K4{%K4} |
(182) 0x42a127 KORTESTB %K4,%K4 |
(182) 0x42a12b JE 429f30 |
(182) 0x42a131 MOV %RCX,%RSI |
(182) 0x42a134 IMUL %R11,%RSI |
(182) 0x42a138 VPBROADCASTQ %RSI,%ZMM9 |
(182) 0x42a13e MOV %R10,%RSI |
(182) 0x42a141 IMUL %R11,%RSI |
(182) 0x42a145 VPBROADCASTQ %RSI,%ZMM10 |
(182) 0x42a14b VMOVDQA64 %ZMM8,%ZMM11 |
(182) 0x42a151 KMOVQ %K0,%K5 |
(182) 0x42a156 JMP 42a1b3 |
(183) 0x42a160 VPADDQ %ZMM11,%ZMM9,%ZMM12 |
(183) 0x42a166 VPADDQ %ZMM12,%ZMM7,%ZMM12 |
(183) 0x42a16c VBROADCASTSD (%RAX,%RBX,8),%ZMM13 |
(183) 0x42a173 KMOVQ %K6,%K3 |
(183) 0x42a178 VSCATTERQPD %ZMM13,(%RDX,%ZMM12,8){%K3} |
(183) 0x42a17f VPADDQ %ZMM11,%ZMM10,%ZMM12 |
(183) 0x42a185 VPADDQ %ZMM12,%ZMM1,%ZMM12 |
(183) 0x42a18b VBROADCASTSD (%R8,%RBX,8),%ZMM13 |
(183) 0x42a192 VSCATTERQPD %ZMM13,(%R9,%ZMM12,8){%K6} |
(183) 0x42a199 VPCMPEQQ %ZMM5,%ZMM11,%K6 |
(183) 0x42a19f VPSUBQ %ZMM21,%ZMM11,%ZMM11 |
(183) 0x42a1a5 KANDNB %K5,%K6,%K5 |
(183) 0x42a1a9 KTESTB %K4,%K5 |
(183) 0x42a1ad JE 429f30 |
(183) 0x42a1b3 KANDB %K5,%K4,%K6 |
(183) 0x42a1b7 KORTESTB %K6,%K6 |
(183) 0x42a1bb JNE 42a160 |
(183) 0x42a1bd KXORW %K0,%K0,%K6 |
(183) 0x42a1c1 VPXOR %XMM11,%XMM11,%XMM11 |
(183) 0x42a1c6 JMP 42a1a5 |
(182) 0x42a1d0 KXORW %K0,%K0,%K4 |
(182) 0x42a1d4 XOR %R11D,%R11D |
(182) 0x42a1d7 JMP 429f3f |
0x42a1dc VPANDQ %ZMM23,%ZMM4,%ZMM5 |
0x42a1e2 VPCMPNEQQ %ZMM4,%ZMM5,%K1{%K1} |
0x42a1e9 KORTESTB %K1,%K1 |
0x42a1ed JE 4295c0 |
0x42a1f3 VPXOR %XMM4,%XMM4,%XMM4 |
0x42a1f7 VPMULLQ %ZMM0,%ZMM3,%ZMM4 |
0x42a1fd VPADDQ %ZMM4,%ZMM1,%ZMM4 |
0x42a203 VPMULLQ %ZMM3,%ZMM5,%ZMM3 |
0x42a209 VPADDQ %ZMM3,%ZMM4,%ZMM3 |
0x42a20f VBROADCASTSD (%RAX,%RBX,8),%ZMM4 |
0x42a216 KMOVQ %K1,%K2 |
0x42a21b VSCATTERQPD %ZMM4,(%RCX,%ZMM3,8){%K2} |
0x42a222 VPMULLQ %ZMM0,%ZMM2,%ZMM0 |
0x42a228 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
0x42a22e VPMULLQ %ZMM2,%ZMM5,%ZMM1 |
0x42a234 VPADDQ %ZMM1,%ZMM0,%ZMM0 |
0x42a23a VBROADCASTSD (%R8,%RBX,8),%ZMM1 |
0x42a241 KMOVQ %K1,%K2 |
0x42a246 VSCATTERQPD %ZMM1,(%RDX,%ZMM0,8){%K2} |
0x42a24d VBROADCASTSD (%RAX,%RBX,8),%ZMM1 |
0x42a254 KMOVQ %K1,%K2 |
0x42a259 VSCATTERQPD %ZMM1,0x8(%RCX,%ZMM3,8){%K2} |
0x42a261 VBROADCASTSD (%R8,%RBX,8),%ZMM1 |
0x42a268 VSCATTERQPD %ZMM1,0x8(%RDX,%ZMM0,8){%K1} |
0x42a270 JMP 4295c0 |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/generate_chunk.cpp: 85 - 128 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for simd collapse(2) |
86: for (int j = (0); j < (yrange); j++) { |
87: for (int i = (0); i < (xrange); i++) { |
88: double x_cent = state_xmin[state]; |
89: double y_cent = state_ymin[state]; |
90: if (state_geometry[state] == g_rect) { |
91: if (field.vertexx[i + 1] >= state_xmin[state] && field.vertexx[i] < state_xmax[state]) { |
92: if (field.vertexy[j + 1] >= state_ymin[state] && field.vertexy[j] < state_ymax[state]) { |
93: field.energy0(i, j) = state_energy[state]; |
94: field.density0(i, j) = state_density[state]; |
95: for (int kt = j; kt <= j + 1; ++kt) { |
96: for (int jt = i; jt <= i + 1; ++jt) { |
97: field.xvel0(jt, kt) = state_xvel[state]; |
98: field.yvel0(jt, kt) = state_yvel[state]; |
[...] |
105: std::sqrt((field.cellx[i] - x_cent) * (field.cellx[i] - x_cent) + (field.celly[j] - y_cent) * (field.celly[j] - y_cent)); |
106: if (radius <= state_radius[state]) { |
107: field.energy0(i, j) = state_energy[state]; |
108: field.density0(i, j) = state_density[state]; |
109: for (int kt = j; kt <= j + 1; ++kt) { |
110: for (int jt = i; jt <= i + 1; ++jt) { |
111: field.xvel0(jt, kt) = state_xvel[state]; |
112: field.yvel0(jt, kt) = state_yvel[state]; |
113: } |
114: } |
115: } |
116: } else if (state_geometry[state] == g_point) { |
117: if (field.vertexx[i] == x_cent && field.vertexy[j] == y_cent) { |
118: field.energy0(i, j) = state_energy[state]; |
119: field.density0(i, j) = state_density[state]; |
120: for (int kt = j; kt <= j + 1; ++kt) { |
121: for (int jt = i; jt <= i + 1; ++jt) { |
122: field.xvel0(jt, kt) = state_xvel[state]; |
123: field.yvel0(jt, kt) = state_yvel[state]; |
124: } |
125: } |
126: } |
127: } |
128: } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.16 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | P0, P5, |
Function | generate_chunk(int, global_variables&) [clone .extracted] |
Source | context.h:46-46,context.h:69-69,generate_chunk.cpp:85-98,generate_chunk.cpp:105-108,generate_chunk.cpp:111-112,generate_chunk.cpp:117-119,generate_chunk.cpp:122-123,generate_chunk.cpp:128-128 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 103.50 |
CQA cycles if no scalar integer | 89.50 |
CQA cycles if FP arith vectorized | 103.50 |
CQA cycles if fully vectorized | 101.00 |
Front-end cycles | 96.33 |
DIV/SQRT cycles | 103.50 |
P0 cycles | 13.40 |
P1 cycles | 49.33 |
P2 cycles | 49.33 |
P3 cycles | 41.50 |
P4 cycles | 103.50 |
P5 cycles | 15.00 |
P6 cycles | 41.50 |
P7 cycles | 41.50 |
P8 cycles | 41.50 |
P9 cycles | 13.60 |
P10 cycles | 49.33 |
P11 cycles | 18.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 113.92 - 160.54 |
Stall cycles (UFS) | 38.42 - 85.04 |
Nb insns | 289.00 |
Nb uops | 578.00 |
Nb loads | 92.00 |
Nb stores | 12.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.46 |
Nb FLOP add-sub | 16.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 8.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 8.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.66 |
Bytes prefetched | 0.00 |
Bytes loaded | 1184.00 |
Bytes stored | 644.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 81.67 |
Vectorization ratio load | 45.71 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 93.75 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 75.00 |
Vector-efficiency ratio all | 73.26 |
Vector-efficiency ratio load | 52.50 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 89.32 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 60.68 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.16 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | P0, P5, |
Function | generate_chunk(int, global_variables&) [clone .extracted] |
Source | context.h:46-46,context.h:69-69,generate_chunk.cpp:85-98,generate_chunk.cpp:105-108,generate_chunk.cpp:111-112,generate_chunk.cpp:117-119,generate_chunk.cpp:122-123,generate_chunk.cpp:128-128 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 103.50 |
CQA cycles if no scalar integer | 89.50 |
CQA cycles if FP arith vectorized | 103.50 |
CQA cycles if fully vectorized | 101.00 |
Front-end cycles | 96.33 |
DIV/SQRT cycles | 103.50 |
P0 cycles | 13.40 |
P1 cycles | 49.33 |
P2 cycles | 49.33 |
P3 cycles | 41.50 |
P4 cycles | 103.50 |
P5 cycles | 15.00 |
P6 cycles | 41.50 |
P7 cycles | 41.50 |
P8 cycles | 41.50 |
P9 cycles | 13.60 |
P10 cycles | 49.33 |
P11 cycles | 18.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 113.92 - 160.54 |
Stall cycles (UFS) | 38.42 - 85.04 |
Nb insns | 289.00 |
Nb uops | 578.00 |
Nb loads | 92.00 |
Nb stores | 12.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.46 |
Nb FLOP add-sub | 16.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 8.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 8.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.66 |
Bytes prefetched | 0.00 |
Bytes loaded | 1184.00 |
Bytes stored | 644.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 81.67 |
Vectorization ratio load | 45.71 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 93.75 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 75.00 |
Vector-efficiency ratio all | 73.26 |
Vector-efficiency ratio load | 52.50 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 89.32 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 60.68 |
Path / |
Function | generate_chunk(int, global_variables&) [clone .extracted] |
Source file and lines | generate_chunk.cpp:85-128 |
Module | exec |
nb instructions | 289 |
nb uops | 578 |
loop length | 1598 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 5 |
used zmm registers | 21 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 2.00 |
micro-operation queue | 96.33 cycles |
front end | 96.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 103.50 | 13.40 | 49.33 | 49.33 | 41.50 | 103.50 | 15.00 | 41.50 | 41.50 | 41.50 | 13.60 | 49.33 |
cycles | 103.50 | 13.40 | 49.33 | 49.33 | 41.50 | 103.50 | 15.00 | 41.50 | 41.50 | 41.50 | 13.60 | 49.33 |
Cycles executing div or sqrt instructions | 18.00 |
FE+BE cycles | 113.92-160.54 |
Stall cycles | 38.42-85.04 |
ROB full (events) | 42.92-90.64 |
RS full (events) | 0.06-0.00 |
Front-end | 96.33 |
Dispatch | 103.50 |
DIV/SQRT | 18.00 |
Overall L1 | 103.50 |
all | 84% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 82% |
all | 73% |
load | 43% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 61% |
all | 81% |
load | 45% |
store | 100% |
mul | 100% |
add-sub | 93% |
fma | 100% |
div/sqrt | 100% |
other | 75% |
all | 74% |
load | 56% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 88% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 63% |
all | 69% |
load | 50% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 55% |
all | 73% |
load | 52% |
store | 100% |
mul | 100% |
add-sub | 89% |
fma | 100% |
div/sqrt | 100% |
other | 60% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPADDQ %ZMM24,%ZMM19,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a287 <_Z14generate_chunkiR16global_variables.extracted+0xe57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPADDQ %ZMM19,%ZMM18,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x451210,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM16,%ZMM0,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM1,%ZMM17,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
CMP $0x2,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 429810 <_Z14generate_chunkiR16global_variables.extracted+0x3e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x258(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPMOVQD %ZMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM5,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM5,%YMM4,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERDPD (%RAX,%YMM4,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0x2,%ZMM5,%ZMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM3,%K1{%K1} | 2 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x278(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPSLLQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPADDQ %ZMM20,%ZMM0,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM3,%ZMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0x2,%ZMM3,%ZMM2,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPSRAQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM0,8),%ZMM2{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM2,%K1{%K1} | 2 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RSI){1to8},%ZMM0,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RSI){1to8},%ZMM0,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R9,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RSI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RSI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R10,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %ZMM21,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM0,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM5,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %ZMM0,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPCMPNLEUQ %ZMM22,%ZMM4,%K0 | |||||||||||||||
KANDB %K0,%K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KORTESTB %K2,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 42a1dc <_Z14generate_chunkiR16global_variables.extracted+0xdac> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPMULLQ %ZMM0,%ZMM3,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSRLQ $0x1,%ZMM4,%ZMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPADDQ %ZMM21,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPMULLQ %ZMM0,%ZMM2,%ZMM7 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
ADD %R10,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPADDQ %ZMM6,%ZMM1,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM7,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM3,%ZMM6,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM7,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 429d56 <_Z14generate_chunkiR16global_variables.extracted+0x926> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JE 429d6c <_Z14generate_chunkiR16global_variables.extracted+0x93c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x258(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0,%ZMM3,%ZMM4,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x278(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPSLLQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSRAQ $0x20,%ZMM0,%ZMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0,%ZMM2,%ZMM3,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KANDB %K0,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RCX){1to8},%ZMM4,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RCX){1to8},%ZMM4,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RCX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R15,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RSI,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM20,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSUBQ %ZMM21,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM2,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPTERNLOGQ $0xf,%ZMM4,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM3,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM3,%YMM2,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMAXSD %YMM2,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSUBD %YMM2,%YMM3,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMOVZXDQ %YMM2,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM4,%ZMM5,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM21,%ZMM3,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSRLQ $0x3,%ZMM7,%ZMM8 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPMULLQ %ZMM4,%ZMM6,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPCMPNLEUD %YMM25,%YMM2,%K2 | |||||||||||||||
KMOVW %K2,-0x2a(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %ZMM21,%ZMM8,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM5,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM6,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDQ %ZMM26,%ZMM7,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPLEUQ %ZMM3,%ZMM6,%K3 | |||||||||||||||
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4299dd <_Z14generate_chunkiR16global_variables.extracted+0x5ad> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV 0x238(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBPD %ZMM3,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSLLQ $0x20,%ZMM0,%ZMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSRAQ $0x20,%ZMM3,%ZMM6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM6,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM4,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM2,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %ZMM0,%ZMM0 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 23-32 | 18 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VCMPPD $0x2,%ZMM2,%ZMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RSI){1to8},%ZMM6,%ZMM4 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
VCMPPD $0x2,%ZMM2,%ZMM0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K2,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RSI){1to8},%ZMM6,%ZMM4 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K2,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RSI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RSI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R10,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM20,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM3,%ZMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSUBQ %ZMM21,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM4,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPTERNLOGQ $0xf,%ZMM6,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM4,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %ZMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM5,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM5,%YMM4,%YMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMAXSD %YMM4,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSUBD %YMM4,%YMM5,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMOVZXDQ %YMM4,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM6,%ZMM7,%ZMM7 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM21,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSRLQ $0x3,%ZMM9,%ZMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPMULLQ %ZMM6,%ZMM8,%ZMM8 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPCMPNLEUD %YMM25,%YMM4,%K0 | |||||||||||||||
KMOVW %K0,-0x2a(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %ZMM21,%ZMM10,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM7,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM8,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDQ %ZMM26,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPLEUQ %ZMM5,%ZMM8,%K0 | |||||||||||||||
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 429f4d <_Z14generate_chunkiR16global_variables.extracted+0xb1d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPANDQ %ZMM23,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPNEQQ %ZMM4,%ZMM5,%K1{%K1} | |||||||||||||||
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM0,%ZMM3,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM5,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM3,%ZMM4,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM4,(%RCX,%ZMM3,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPMULLQ %ZMM0,%ZMM2,%ZMM0 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM2,%ZMM5,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%R8,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM1,(%RDX,%ZMM0,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM1,0x8(%RCX,%ZMM3,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VBROADCASTSD (%R8,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VSCATTERQPD %ZMM1,0x8(%RDX,%ZMM0,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
JMP 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | generate_chunk(int, global_variables&) [clone .extracted] |
Source file and lines | generate_chunk.cpp:85-128 |
Module | exec |
nb instructions | 289 |
nb uops | 578 |
loop length | 1598 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 5 |
used zmm registers | 21 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 2.00 |
micro-operation queue | 96.33 cycles |
front end | 96.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 103.50 | 13.40 | 49.33 | 49.33 | 41.50 | 103.50 | 15.00 | 41.50 | 41.50 | 41.50 | 13.60 | 49.33 |
cycles | 103.50 | 13.40 | 49.33 | 49.33 | 41.50 | 103.50 | 15.00 | 41.50 | 41.50 | 41.50 | 13.60 | 49.33 |
Cycles executing div or sqrt instructions | 18.00 |
FE+BE cycles | 113.92-160.54 |
Stall cycles | 38.42-85.04 |
ROB full (events) | 42.92-90.64 |
RS full (events) | 0.06-0.00 |
Front-end | 96.33 |
Dispatch | 103.50 |
DIV/SQRT | 18.00 |
Overall L1 | 103.50 |
all | 84% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 82% |
all | 73% |
load | 43% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 61% |
all | 81% |
load | 45% |
store | 100% |
mul | 100% |
add-sub | 93% |
fma | 100% |
div/sqrt | 100% |
other | 75% |
all | 74% |
load | 56% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 88% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 63% |
all | 69% |
load | 50% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 55% |
all | 73% |
load | 52% |
store | 100% |
mul | 100% |
add-sub | 89% |
fma | 100% |
div/sqrt | 100% |
other | 60% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPADDQ %ZMM24,%ZMM19,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a287 <_Z14generate_chunkiR16global_variables.extracted+0xe57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPADDQ %ZMM19,%ZMM18,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0x451210,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM16,%ZMM0,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM1,%ZMM17,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
CMP $0x2,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 429810 <_Z14generate_chunkiR16global_variables.extracted+0x3e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x258(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPMOVQD %ZMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM5,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM5,%YMM4,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERDPD (%RAX,%YMM4,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0x2,%ZMM5,%ZMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM3,%K1{%K1} | 2 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x278(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPSLLQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPADDQ %ZMM20,%ZMM0,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM3,%ZMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0x2,%ZMM3,%ZMM2,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPSRAQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM0,8),%ZMM2{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,(%RAX,%RBX,8){1to0},%ZMM2,%K1{%K1} | 2 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RSI){1to8},%ZMM0,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RSI){1to8},%ZMM0,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RCX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R9,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RSI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RSI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R10,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %ZMM21,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM0,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM5,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %ZMM0,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPCMPNLEUQ %ZMM22,%ZMM4,%K0 | |||||||||||||||
KANDB %K0,%K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KORTESTB %K2,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 42a1dc <_Z14generate_chunkiR16global_variables.extracted+0xdac> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPMULLQ %ZMM0,%ZMM3,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSRLQ $0x1,%ZMM4,%ZMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPADDQ %ZMM21,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPMULLQ %ZMM0,%ZMM2,%ZMM7 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
ADD %R10,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPADDQ %ZMM6,%ZMM1,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM7,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM3,%ZMM6,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM7,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 429d56 <_Z14generate_chunkiR16global_variables.extracted+0x926> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JE 429d6c <_Z14generate_chunkiR16global_variables.extracted+0x93c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x258(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0,%ZMM3,%ZMM4,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x278(%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPSLLQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSRAQ $0x20,%ZMM0,%ZMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VCMPPD $0,%ZMM2,%ZMM3,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KANDB %K0,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RCX){1to8},%ZMM4,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RCX){1to8},%ZMM4,%ZMM2 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM3,(%RDX,%ZMM2,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RCX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R15,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RCX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RSI,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM20,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM0,%ZMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSUBQ %ZMM21,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM2,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPTERNLOGQ $0xf,%ZMM4,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM3,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM3,%YMM2,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMAXSD %YMM2,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSUBD %YMM2,%YMM3,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMOVZXDQ %YMM2,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM4,%ZMM5,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM21,%ZMM3,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSRLQ $0x3,%ZMM7,%ZMM8 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPMULLQ %ZMM4,%ZMM6,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPCMPNLEUD %YMM25,%YMM2,%K2 | |||||||||||||||
KMOVW %K2,-0x2a(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %ZMM21,%ZMM8,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM5,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM6,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDQ %ZMM26,%ZMM7,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPLEUQ %ZMM3,%ZMM6,%K3 | |||||||||||||||
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4299dd <_Z14generate_chunkiR16global_variables.extracted+0x5ad> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM1,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
MOV 0x238(%RSI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBPD %ZMM3,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSLLQ $0x20,%ZMM0,%ZMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSRAQ $0x20,%ZMM3,%ZMM6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM6,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM4,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM2,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %ZMM0,%ZMM0 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 23-32 | 18 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VCMPPD $0x2,%ZMM2,%ZMM0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ 0x30(%RSI){1to8},%ZMM6,%ZMM4 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
VCMPPD $0x2,%ZMM2,%ZMM0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K2,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ (%RSI){1to8},%ZMM6,%ZMM4 | 5 | 1.50 | 0 | 0.33 | 0.33 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 15 | 1.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K2,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM4,8){%K3} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xe8(%RSI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RSI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R10,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM20,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSRAQ $0x20,%ZMM3,%ZMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPSUBQ %ZMM21,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSUBQ %ZMM21,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPMAXSQ %ZMM4,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VPTERNLOGQ $0xf,%ZMM6,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM4,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMOVQD %ZMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPEQD %YMM5,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBD %YMM5,%YMM4,%YMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMAXSD %YMM4,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPSUBD %YMM4,%YMM5,%YMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPMOVZXDQ %YMM4,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM6,%ZMM7,%ZMM7 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM21,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPSRLQ $0x3,%ZMM9,%ZMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
VPMULLQ %ZMM6,%ZMM8,%ZMM8 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPCMPNLEUD %YMM25,%YMM4,%K0 | |||||||||||||||
KMOVW %K0,-0x2a(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %ZMM21,%ZMM10,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM7,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM8,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDQ %ZMM26,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPLEUQ %ZMM5,%ZMM8,%K0 | |||||||||||||||
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 429f4d <_Z14generate_chunkiR16global_variables.extracted+0xb1d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPANDQ %ZMM23,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPCMPNEQQ %ZMM4,%ZMM5,%K1{%K1} | |||||||||||||||
KORTESTB %K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPXOR %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM0,%ZMM3,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM4,%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM5,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM3,%ZMM4,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM4,(%RCX,%ZMM3,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPMULLQ %ZMM0,%ZMM2,%ZMM0 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM2,%ZMM5,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM1,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD (%R8,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM1,(%RDX,%ZMM0,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VBROADCASTSD (%RAX,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VSCATTERQPD %ZMM1,0x8(%RCX,%ZMM3,8){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VBROADCASTSD (%R8,%RBX,8),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VSCATTERQPD %ZMM1,0x8(%RDX,%ZMM0,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
JMP 4295c0 <_Z14generate_chunkiR16global_variables.extracted+0x190> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |