Loop Id: 201 | Module: exec | Source: advec_mom.cpp:182-211 [...] | Coverage: 0.01% |
---|
Loop Id: 201 | Module: exec | Source: advec_mom.cpp:182-211 [...] | Coverage: 0.01% |
---|
0x42f650 CMP %EDX,%ECX |
0x42f652 MOV 0x74(%RSP),%R9D |
0x42f657 CMOVBE %ECX,%EDX |
0x42f65a ADD $0x2,%R9D |
0x42f65e LEA (%R14,%RDX,1),%ECX |
0x42f662 MOV %R9D,0x5c(%RSP) |
0x42f667 MOV %EDX,%R8D |
0x42f66a MOV %ECX,0x70(%RSP) |
0x42f66e CMP %ECX,%R14D |
0x42f671 JAE 42ff98 |
0x42f677 MOV 0x28(%RSP),%RAX |
0x42f67c MOV 0x10(%RSP),%RDI |
0x42f681 MOV 0x8(%RSP),%RCX |
0x42f686 MOV 0x78(%RSP),%R13 |
0x42f68b MOV 0x10(%RAX),%RDX |
0x42f68f MOV (%RAX),%RSI |
0x42f692 MOV (%RDI),%R9 |
0x42f695 MOV 0x10(%RDI),%RAX |
0x42f699 MOV (%RCX),%RDI |
0x42f69c MOV 0x20(%RSP),%R11 |
0x42f6a1 MOV %RDX,0x68(%RSP) |
0x42f6a6 IMUL %R13,%RSI |
0x42f6aa MOV 0x18(%RSP),%R12 |
0x42f6af MOV 0x10(%RCX),%RDX |
0x42f6b3 IMUL %R13,%RDI |
0x42f6b7 MOV 0x74(%RSP),%ECX |
0x42f6bb MOV (%R11),%R15 |
0x42f6be MOV 0x10(%R11),%R10 |
0x42f6c2 MOV 0x8(%R12),%R11 |
0x42f6c7 MOV %RDX,0x50(%RSP) |
0x42f6cc LEA -0x1(%R8),%EDX |
0x42f6d0 ADD $0x3,%ECX |
0x42f6d3 MOV %R15,0x48(%RSP) |
0x42f6d8 LEA 0x1(%R13),%R15 |
0x42f6dc MOV %RSI,0x60(%RSP) |
0x42f6e1 LEA (%R11,%R13,8),%R12 |
0x42f6e5 MOV %RDI,0x40(%RSP) |
0x42f6ea MOV %ECX,0x58(%RSP) |
0x42f6ee CMP $0xe,%EDX |
0x42f6f1 JBE 42ffa8 |
0x42f6f7 MOVSXD %EBX,%RDX |
0x42f6fa VMOVQ %R13,%XMM6 |
0x42f6ff MOV 0x50(%RSP),%R13 |
0x42f704 VPBROADCASTD %EBX,%ZMM17 |
0x42f70a KXNORB %K1,%K1,%K1 |
0x42f70e LEA (%RSI,%RDX,1),%RCX |
0x42f712 MOV 0x68(%RSP),%RSI |
0x42f717 ADD %RDI,%RDX |
0x42f71a VPADDD 0x2f11c(%RIP),%ZMM17,%ZMM17 |
0x42f724 MOV $0x10,%EDI |
0x42f729 VPBROADCASTD 0x58(%RSP),%ZMM31 |
0x42f731 VPBROADCASTD 0x74(%RSP),%ZMM24 |
0x42f739 VPBROADCASTQ %XMM6,%ZMM14 |
0x42f73f LEA (%RSI,%RCX,8),%RSI |
0x42f743 LEA (%R13,%RDX,8),%RCX |
0x42f748 MOV %R8D,%R13D |
0x42f74b VPBROADCASTD 0x5c(%RSP),%ZMM30 |
0x42f753 SHR $0x4,%R13D |
0x42f757 VPBROADCASTQ 0x48(%RSP),%ZMM23 |
0x42f75f VBROADCASTSD 0x2e647(%RIP),%ZMM10 |
0x42f769 VPBROADCASTQ %R15,%ZMM15 |
0x42f76f VBROADCASTSD 0x2f16f(%RIP),%ZMM9 |
0x42f779 VBROADCASTSD 0x2df5d(%RIP),%ZMM22 |
0x42f783 VPBROADCASTQ %R9,%ZMM11 |
0x42f789 SAL $0x7,%R13 |
0x42f78d VBROADCASTSD 0x2f149(%RIP),%ZMM21 |
0x42f797 XOR %EDX,%EDX |
0x42f799 VPBROADCASTD %EDI,%ZMM29 |
0x42f79f VXORPD %XMM6,%XMM6,%XMM6 |
0x42f7a3 NOPL (%RAX,%RAX,1) |
(203) 0x42f7a8 VMOVUPD (%RSI,%RDX,1),%ZMM7 |
(203) 0x42f7af VMOVUPD 0x40(%RSI,%RDX,1),%ZMM8 |
(203) 0x42f7b7 VMOVDQA32 %ZMM17,%ZMM1 |
(203) 0x42f7bd KMOVB %K1,%K6 |
(203) 0x42f7c1 KMOVB %K1,%K5 |
(203) 0x42f7c5 KMOVB %K1,%K7 |
(203) 0x42f7c9 VEXTRACTI32X8 $0x1,%ZMM1,%YMM0 |
(203) 0x42f7d0 VPMOVSXDQ %YMM1,%ZMM2 |
(203) 0x42f7d6 VPADDD %ZMM29,%ZMM17,%ZMM17 |
(203) 0x42f7dc VCMPPD $0x1,%ZMM6,%ZMM7,%K3 |
(203) 0x42f7e3 VCMPPD $0x1,%ZMM6,%ZMM8,%K2 |
(203) 0x42f7ea VPMOVSXDQ %YMM0,%ZMM1 |
(203) 0x42f7f0 VPBLENDMQ %ZMM15,%ZMM14,%ZMM4{%K3} |
(203) 0x42f7f6 KUNPCKBW %K3,%K2,%K4 |
(203) 0x42f7fa VPBLENDMD %ZMM31,%ZMM24,%ZMM0{%K4} |
(203) 0x42f800 VPBLENDMQ %ZMM15,%ZMM14,%ZMM3{%K2} |
(203) 0x42f806 VXORPS %XMM5,%XMM5,%XMM5 |
(203) 0x42f80a VPMULLQ %ZMM23,%ZMM4,%ZMM5 |
(203) 0x42f810 VPMOVSXDQ %YMM0,%ZMM26 |
(203) 0x42f816 VEXTRACTI32X8 $0x1,%ZMM0,%YMM0 |
(203) 0x42f81d VXORPS %XMM27,%XMM27,%XMM27 |
(203) 0x42f823 VPMULLQ %ZMM11,%ZMM26,%ZMM27 |
(203) 0x42f829 VPBLENDMQ %ZMM14,%ZMM15,%ZMM19{%K3} |
(203) 0x42f82f VPMOVSXDQ %YMM0,%ZMM0 |
(203) 0x42f835 KMOVB %K1,%K3 |
(203) 0x42f839 VPMULLQ %ZMM11,%ZMM4,%ZMM4 |
(203) 0x42f83f VPMULLQ %ZMM11,%ZMM19,%ZMM19 |
(203) 0x42f845 VPBLENDMQ %ZMM14,%ZMM15,%ZMM18{%K2} |
(203) 0x42f84b KMOVB %K1,%K2 |
(203) 0x42f84f VPMULLQ %ZMM11,%ZMM0,%ZMM0 |
(203) 0x42f855 VPMULLQ %ZMM11,%ZMM18,%ZMM18 |
(203) 0x42f85b VPBLENDMD %ZMM30,%ZMM24,%ZMM13{%K4} |
(203) 0x42f861 KMOVB %K1,%K4 |
(203) 0x42f865 VPADDQ %ZMM2,%ZMM5,%ZMM12 |
(203) 0x42f86b VXORPS %XMM5,%XMM5,%XMM5 |
(203) 0x42f86f VPMULLQ %ZMM23,%ZMM3,%ZMM5 |
(203) 0x42f875 VPMULLQ %ZMM11,%ZMM3,%ZMM3 |
(203) 0x42f87b VPADDQ %ZMM2,%ZMM27,%ZMM28 |
(203) 0x42f881 VGATHERQPD (%R10,%ZMM12,8),%ZMM25{%K6} |
(203) 0x42f888 KMOVB %K1,%K6 |
(203) 0x42f88c VPADDQ %ZMM2,%ZMM4,%ZMM4 |
(203) 0x42f892 VPADDQ %ZMM1,%ZMM0,%ZMM0 |
(203) 0x42f898 VGATHERQPD (%RAX,%ZMM0,8),%ZMM26{%K4} |
(203) 0x42f89f VPADDQ %ZMM1,%ZMM5,%ZMM12 |
(203) 0x42f8a5 VGATHERQPD (%RAX,%ZMM4,8),%ZMM5{%K5} |
(203) 0x42f8ac VPADDQ %ZMM1,%ZMM3,%ZMM3 |
(203) 0x42f8b2 VGATHERQPD (%R10,%ZMM12,8),%ZMM20{%K7} |
(203) 0x42f8b9 KMOVB %K1,%K7 |
(203) 0x42f8bd VMOVSD (%R12),%XMM12 |
(203) 0x42f8c3 VGATHERQPD (%RAX,%ZMM3,8),%ZMM4{%K3} |
(203) 0x42f8ca VGATHERQPD (%RAX,%ZMM28,8),%ZMM3{%K2} |
(203) 0x42f8d1 VPADDQ %ZMM2,%ZMM19,%ZMM28 |
(203) 0x42f8d7 VGATHERQPD (%RAX,%ZMM28,8),%ZMM0{%K6} |
(203) 0x42f8de VSUBPD %ZMM3,%ZMM5,%ZMM3 |
(203) 0x42f8e4 VSUBPD %ZMM26,%ZMM4,%ZMM27 |
(203) 0x42f8ea VSUBPD %ZMM5,%ZMM0,%ZMM0 |
(203) 0x42f8f0 VPADDQ %ZMM1,%ZMM18,%ZMM26 |
(203) 0x42f8f6 VMOVAPD %ZMM6,%ZMM28 |
(203) 0x42f8fc VGATHERQPD (%RAX,%ZMM26,8),%ZMM2{%K7} |
(203) 0x42f903 VBROADCASTSD %XMM12,%ZMM16 |
(203) 0x42f909 VSUBPD %ZMM4,%ZMM2,%ZMM2 |
(203) 0x42f90f VMULSD 0x2eef1(%RIP),%XMM12,%XMM12 |
(203) 0x42f917 VMULPD %ZMM0,%ZMM3,%ZMM1 |
(203) 0x42f91d VANDPD %ZMM10,%ZMM0,%ZMM18 |
(203) 0x42f923 VANDPD %ZMM10,%ZMM3,%ZMM3 |
(203) 0x42f929 VBROADCASTSD %XMM12,%ZMM12 |
(203) 0x42f92f VCMPPD $0xe,%ZMM6,%ZMM1,%K3 |
(203) 0x42f936 VMULPD %ZMM2,%ZMM27,%ZMM1 |
(203) 0x42f93c VCMPPD $0xe,%ZMM6,%ZMM0,%K5{%K3} |
(203) 0x42f943 VANDPD %ZMM10,%ZMM7,%ZMM0 |
(203) 0x42f949 KMOVB %K3,%K6 |
(203) 0x42f94d VCMPPD $0xe,%ZMM6,%ZMM1,%K2 |
(203) 0x42f954 VANDPD %ZMM10,%ZMM27,%ZMM1 |
(203) 0x42f95a VANDPD %ZMM10,%ZMM2,%ZMM27 |
(203) 0x42f960 VBLENDMPD %ZMM9,%ZMM22,%ZMM19{%K5} |
(203) 0x42f966 VCMPPD $0xe,%ZMM6,%ZMM2,%K4{%K2} |
(203) 0x42f96d VDIVPD %ZMM25,%ZMM0,%ZMM2 |
(203) 0x42f973 VANDPD %ZMM10,%ZMM8,%ZMM25 |
(203) 0x42f979 KMOVB %K2,%K7 |
(203) 0x42f97d VDIVPD %ZMM20,%ZMM25,%ZMM25 |
(203) 0x42f983 VSUBPD %ZMM25,%ZMM21,%ZMM0 |
(203) 0x42f989 VMOVAPD %ZMM6,%ZMM20 |
(203) 0x42f98f VGATHERDPD (%R11,%YMM13,8),%ZMM20{%K6} |
(203) 0x42f996 VSHUFI32X4 $-0x12,%ZMM13,%ZMM13,%ZMM13 |
(203) 0x42f99d VGATHERDPD (%R11,%YMM13,8),%ZMM28{%K7} |
(203) 0x42f9a4 VBLENDMPD %ZMM9,%ZMM22,%ZMM26{%K4} |
(203) 0x42f9aa VMULPD %ZMM27,%ZMM0,%ZMM13 |
(203) 0x42f9b0 VDIVPD %ZMM16,%ZMM13,%ZMM0 |
(203) 0x42f9b6 VADDPD %ZMM9,%ZMM25,%ZMM13 |
(203) 0x42f9bc VMULPD %ZMM1,%ZMM13,%ZMM13 |
(203) 0x42f9c2 VMINPD %ZMM27,%ZMM1,%ZMM1 |
(203) 0x42f9c8 VDIVPD %ZMM28,%ZMM13,%ZMM13 |
(203) 0x42f9ce VADDPD %ZMM13,%ZMM0,%ZMM0 |
(203) 0x42f9d4 VMULPD %ZMM12,%ZMM0,%ZMM13 |
(203) 0x42f9da VMINPD %ZMM1,%ZMM13,%ZMM0 |
(203) 0x42f9e0 VSUBPD %ZMM2,%ZMM21,%ZMM1 |
(203) 0x42f9e6 VSUBPD %ZMM25,%ZMM9,%ZMM13 |
(203) 0x42f9ec VMULPD %ZMM18,%ZMM1,%ZMM1 |
(203) 0x42f9f2 VMULPD %ZMM26,%ZMM13,%ZMM13 |
(203) 0x42f9f8 VDIVPD %ZMM16,%ZMM1,%ZMM1 |
(203) 0x42f9fe VADDPD %ZMM9,%ZMM2,%ZMM16 |
(203) 0x42fa04 VSUBPD %ZMM2,%ZMM9,%ZMM2 |
(203) 0x42fa0a VFMADD231PD %ZMM13,%ZMM0,%ZMM4{%K2} |
(203) 0x42fa10 VMULPD %ZMM3,%ZMM16,%ZMM27 |
(203) 0x42fa16 VMINPD %ZMM18,%ZMM3,%ZMM3 |
(203) 0x42fa1c VMULPD %ZMM8,%ZMM4,%ZMM8 |
(203) 0x42fa22 VDIVPD %ZMM20,%ZMM27,%ZMM26 |
(203) 0x42fa28 VADDPD %ZMM26,%ZMM1,%ZMM1 |
(203) 0x42fa2e VMULPD %ZMM12,%ZMM1,%ZMM12 |
(203) 0x42fa34 VMOVUPD %ZMM8,0x40(%RCX,%RDX,1) |
(203) 0x42fa3c VMINPD %ZMM3,%ZMM12,%ZMM1 |
(203) 0x42fa42 VMULPD %ZMM19,%ZMM2,%ZMM12 |
(203) 0x42fa48 VFMADD231PD %ZMM12,%ZMM1,%ZMM5{%K3} |
(203) 0x42fa4e VMULPD %ZMM5,%ZMM7,%ZMM7 |
(203) 0x42fa54 VMOVUPD %ZMM7,(%RCX,%RDX,1) |
(203) 0x42fa5b SUB $-0x80,%RDX |
(203) 0x42fa5f CMP %RDX,%R13 |
(203) 0x42fa62 JNE 42f7a8 |
0x42fa68 MOV %R8D,%ECX |
0x42fa6b AND $-0x10,%ECX |
0x42fa6e ADD %ECX,%R14D |
0x42fa71 LEA (%RCX,%RBX,1),%R13D |
0x42fa75 TEST $0xf,%R8B |
0x42fa79 JE 42ff68 |
0x42fa7f SUB %ECX,%R8D |
0x42fa82 LEA -0x1(%R8),%ESI |
0x42fa86 CMP $0x6,%ESI |
0x42fa89 JBE 42fda7 |
0x42fa8f MOVSXD %EBX,%RDX |
0x42fa92 MOV 0x60(%RSP),%RBX |
0x42fa97 VPBROADCASTD %R13D,%YMM15 |
0x42fa9d MOV 0x68(%RSP),%RSI |
0x42faa2 VPADDD 0x2ed96(%RIP),%YMM15,%YMM14 |
0x42faaa VXORPD %XMM2,%XMM2,%XMM2 |
0x42faae VPBROADCASTQ 0x78(%RSP),%YMM11 |
0x42fab5 VPBROADCASTQ %R15,%YMM9 |
0x42fabb LEA (%RBX,%RDX,1),%RDI |
0x42fabf VPBROADCASTQ 0x48(%RSP),%YMM1 |
0x42fac6 VPBROADCASTQ %R9,%YMM12 |
0x42facc VPBROADCASTD 0x74(%RSP),%YMM6 |
0x42fad3 ADD %RCX,%RDI |
0x42fad6 VEXTRACTI128 $0x1,%YMM14,%XMM5 |
0x42fadc VMOVDQA %YMM11,%YMM8 |
0x42fae1 VMOVSD (%R12),%XMM31 |
0x42fae8 LEA (%RSI,%RDI,8),%RDI |
0x42faec VPMOVSXDQ %XMM5,%YMM3 |
0x42faf1 VMOVDQA %YMM11,%YMM7 |
0x42faf5 MOV $0xf,%ESI |
0x42fafa VMOVUPD (%RDI),%YMM5 |
0x42fafe VMOVUPD 0x20(%RDI),%YMM4 |
0x42fb03 VPMOVSXDQ %XMM14,%YMM13 |
0x42fb08 KMOVB %ESI,%K0 |
0x42fb0c VMOVDQA %YMM6,%YMM0 |
0x42fb10 VBROADCASTSD %XMM31,%YMM10 |
0x42fb16 VBROADCASTSD 0x2dbc0(%RIP),%YMM23 |
0x42fb20 MOV 0x40(%RSP),%RBX |
0x42fb25 MOV %R8D,%EDI |
0x42fb28 VCMPPD $0x1,%YMM2,%YMM5,%K3 |
0x42fb2f VCMPPD $0x1,%YMM2,%YMM4,%K2 |
0x42fb36 AND $-0x8,%EDI |
0x42fb39 ADD %RBX,%RDX |
0x42fb3c ADD %EDI,%R14D |
0x42fb3f ADD %EDI,%R13D |
0x42fb42 ADD %RCX,%RDX |
0x42fb45 MOV 0x50(%RSP),%RCX |
0x42fb4a AND $0x7,%R8D |
0x42fb4e VPBROADCASTQ %R15,%YMM8{%K3} |
0x42fb54 VPBROADCASTQ %R15,%YMM7{%K2} |
0x42fb5a VPBLENDMQ %YMM11,%YMM9,%YMM14{%K3} |
0x42fb60 KANDB %K0,%K3,%K1 |
0x42fb64 KXNORB %K3,%K3,%K3 |
0x42fb68 KMOVB %K3,%K6 |
0x42fb6c VXORPS %XMM15,%XMM15,%XMM15 |
0x42fb71 VPMULLQ %YMM1,%YMM8,%YMM15 |
0x42fb77 VPMULLQ %YMM1,%YMM7,%YMM1 |
0x42fb7d VMOVDQA64 %YMM11,%YMM9{%K2} |
0x42fb83 KSHIFTLB $0x4,%K2,%K4 |
0x42fb89 KORB %K4,%K1,%K5 |
0x42fb8d VPBROADCASTD 0x58(%RSP),%YMM0{%K5} |
0x42fb95 KMOVB %K3,%K7 |
0x42fb99 KMOVB %K3,%K2 |
0x42fb9d KMOVB %K3,%K1 |
0x42fba1 KMOVB %K3,%K4 |
0x42fba5 VPBROADCASTD 0x5c(%RSP),%YMM6{%K5} |
0x42fbad KMOVB %K3,%K5 |
0x42fbb1 LEA (%RCX,%RDX,8),%RDX |
0x42fbb5 VPADDQ %YMM13,%YMM15,%YMM11 |
0x42fbba VGATHERQPD (%R10,%YMM11,8),%YMM24{%K6} |
0x42fbc1 VPADDQ %YMM3,%YMM1,%YMM11 |
0x42fbc5 VMOVDQA %YMM12,%YMM1 |
0x42fbc9 KMOVB %K3,%K6 |
0x42fbcd VPMULLQ %YMM12,%YMM8,%YMM12 |
0x42fbd3 VPMULLQ %YMM1,%YMM7,%YMM7 |
0x42fbd9 VGATHERQPD (%R10,%YMM11,8),%YMM15{%K7} |
0x42fbe0 VPMULLQ %YMM1,%YMM14,%YMM14 |
0x42fbe6 VPMULLQ %YMM1,%YMM9,%YMM9 |
0x42fbec VPADDQ %YMM13,%YMM12,%YMM11 |
0x42fbf1 VPADDQ %YMM3,%YMM7,%YMM12 |
0x42fbf5 VGATHERQPD (%RAX,%YMM11,8),%YMM8{%K2} |
0x42fbfc VPMOVSXDQ %XMM0,%YMM11 |
0x42fc01 VGATHERQPD (%RAX,%YMM12,8),%YMM7{%K1} |
0x42fc08 VXORPS %XMM12,%XMM12,%XMM12 |
0x42fc0d VPMULLQ %YMM1,%YMM11,%YMM12 |
0x42fc13 VEXTRACTI128 $0x1,%YMM0,%XMM0 |
0x42fc19 VPADDQ %YMM13,%YMM12,%YMM12 |
0x42fc1e VPADDQ %YMM13,%YMM14,%YMM13 |
0x42fc23 VGATHERQPD (%RAX,%YMM12,8),%YMM11{%K4} |
0x42fc2a VPMOVSXDQ %XMM0,%YMM12 |
0x42fc2f VXORPS %XMM0,%XMM0,%XMM0 |
0x42fc33 VPMULLQ %YMM1,%YMM12,%YMM0 |
0x42fc39 VSUBPD %YMM11,%YMM8,%YMM11 |
0x42fc3e VPADDQ %YMM3,%YMM0,%YMM0 |
0x42fc42 VPADDQ %YMM3,%YMM9,%YMM3 |
0x42fc46 VGATHERQPD (%RAX,%YMM0,8),%YMM12{%K5} |
0x42fc4d VGATHERQPD (%RAX,%YMM13,8),%YMM0{%K6} |
0x42fc54 VGATHERQPD (%RAX,%YMM3,8),%YMM1{%K3} |
0x42fc5b VBROADCASTSD 0x2e14c(%RIP),%YMM3 |
0x42fc64 VSUBPD %YMM8,%YMM0,%YMM14 |
0x42fc69 VSUBPD %YMM7,%YMM1,%YMM13 |
0x42fc6d VSUBPD %YMM12,%YMM7,%YMM12 |
0x42fc72 VANDPD %YMM3,%YMM5,%YMM1 |
0x42fc76 VMULPD %YMM14,%YMM11,%YMM0 |
0x42fc7b VANDPD %YMM3,%YMM13,%YMM30 |
0x42fc81 VANDPD %YMM3,%YMM11,%YMM11 |
0x42fc85 VMULPD %YMM13,%YMM12,%YMM9 |
0x42fc8a VCMPPD $0xe,%YMM2,%YMM0,%K2 |
0x42fc91 VANDPD %YMM3,%YMM4,%YMM0 |
0x42fc95 VCMPPD $0xe,%YMM2,%YMM9,%K1 |
0x42fc9c VANDPD %YMM3,%YMM12,%YMM9 |
0x42fca0 VANDPD %YMM3,%YMM14,%YMM12 |
0x42fca4 VDIVPD %YMM15,%YMM0,%YMM3 |
0x42fca9 VPERM2I128 $0x11,%YMM6,%YMM6,%YMM0 |
0x42fcaf VMOVAPD %YMM2,%YMM15 |
0x42fcb3 VCMPPD $0xe,%YMM2,%YMM14,%K7{%K2} |
0x42fcba VBROADCASTSD 0x2ec25(%RIP),%YMM14 |
0x42fcc3 KMOVB %K2,%K4 |
0x42fcc7 VGATHERDPD (%R11,%XMM6,8),%YMM15{%K4} |
0x42fcce KMOVB %K1,%K5 |
0x42fcd2 VMULSD 0x2eb2c(%RIP),%XMM31,%XMM6 |
0x42fcdc VCMPPD $0xe,%YMM2,%YMM13,%K3{%K1} |
0x42fce3 VGATHERDPD (%R11,%XMM0,8),%YMM2{%K5} |
0x42fcea VBROADCASTSD 0x2ebed(%RIP),%YMM0 |
0x42fcf3 VBLENDMPD %YMM14,%YMM23,%YMM22{%K7} |
0x42fcf9 VMOVAPD %YMM14,%YMM23{%K3} |
0x42fcff VBROADCASTSD %XMM6,%YMM6 |
0x42fd04 VDIVPD %YMM24,%YMM1,%YMM13 |
0x42fd0a VADDPD %YMM14,%YMM3,%YMM21 |
0x42fd10 VSUBPD %YMM3,%YMM0,%YMM1 |
0x42fd14 VSUBPD %YMM3,%YMM14,%YMM3 |
0x42fd18 VMULPD %YMM9,%YMM21,%YMM29 |
0x42fd1e VMINPD %YMM30,%YMM9,%YMM9 |
0x42fd24 VMULPD %YMM30,%YMM1,%YMM1 |
0x42fd2a VMULPD %YMM23,%YMM3,%YMM3 |
0x42fd30 VDIVPD %YMM10,%YMM1,%YMM1 |
0x42fd35 VSUBPD %YMM13,%YMM0,%YMM0 |
0x42fd3a VDIVPD %YMM2,%YMM29,%YMM2 |
0x42fd40 VADDPD %YMM2,%YMM1,%YMM1 |
0x42fd44 VMULPD %YMM6,%YMM1,%YMM2 |
0x42fd48 VMINPD %YMM9,%YMM2,%YMM1 |
0x42fd4d VSUBPD %YMM13,%YMM14,%YMM2 |
0x42fd52 VADDPD %YMM14,%YMM13,%YMM14 |
0x42fd57 VMULPD %YMM22,%YMM2,%YMM9 |
0x42fd5d VFMADD231PD %YMM1,%YMM3,%YMM7{%K1} |
0x42fd63 VMULPD %YMM12,%YMM0,%YMM2 |
0x42fd68 VMULPD %YMM11,%YMM14,%YMM13 |
0x42fd6d VMINPD %YMM12,%YMM11,%YMM11 |
0x42fd72 VMULPD %YMM4,%YMM7,%YMM4 |
0x42fd76 VDIVPD %YMM10,%YMM2,%YMM10 |
0x42fd7b VMOVUPD %YMM4,0x20(%RDX) |
0x42fd80 VDIVPD %YMM15,%YMM13,%YMM15 |
0x42fd85 VADDPD %YMM15,%YMM10,%YMM0 |
0x42fd8a VMULPD %YMM6,%YMM0,%YMM6 |
0x42fd8e VMINPD %YMM11,%YMM6,%YMM12 |
0x42fd93 VFMADD231PD %YMM12,%YMM9,%YMM8{%K2} |
0x42fd99 VMULPD %YMM5,%YMM8,%YMM5 |
0x42fd9d VMOVUPD %YMM5,(%RDX) |
0x42fda1 JE 42ff68 |
0x42fda7 MOV 0x68(%RSP),%R8 |
0x42fdac MOV 0x60(%RSP),%RBX |
0x42fdb1 MOVSXD %R13D,%RDX |
0x42fdb4 VXORPD %XMM12,%XMM12,%XMM12 |
0x42fdb9 MOV 0x50(%RSP),%RDI |
0x42fdbe MOV 0x40(%RSP),%RSI |
0x42fdc3 MOV %R11,0x50(%RSP) |
0x42fdc8 LEA (%R8,%RBX,8),%RCX |
0x42fdcc MOV 0x48(%RSP),%R11 |
0x42fdd1 LEA (%RDI,%RSI,8),%R8 |
0x42fdd5 MOV %RCX,0x60(%RSP) |
0x42fdda MOV %R8,0x68(%RSP) |
0x42fddf JMP 42fefb |
(202) 0x42fde8 MOVSXD 0x74(%RSP),%RCX |
(202) 0x42fded MOV 0x78(%RSP),%RSI |
(202) 0x42fdf2 MOV %R15,%RDI |
(202) 0x42fdf5 MOVSXD %ECX,%RBX |
(202) 0x42fdf8 MOV %R11,%R8 |
(202) 0x42fdfb IMUL %R9,%RDI |
(202) 0x42fdff VMOVSD (%R12),%XMM2 |
(202) 0x42fe05 IMUL %RSI,%R8 |
(202) 0x42fe09 IMUL %R9,%RSI |
(202) 0x42fe0d IMUL %R9,%RCX |
(202) 0x42fe11 ADD %RDX,%RDI |
(202) 0x42fe14 VMOVSD (%RAX,%RDI,8),%XMM10 |
(202) 0x42fe19 ADD %RDX,%R8 |
(202) 0x42fe1c ADD %RDX,%RSI |
(202) 0x42fe1f VMOVSD (%R10,%R8,8),%XMM9 |
(202) 0x42fe25 VMOVSD (%RAX,%RSI,8),%XMM11 |
(202) 0x42fe2a ADD %RDX,%RCX |
(202) 0x42fe2d VSUBSD (%RAX,%RCX,8),%XMM11,%XMM1 |
(202) 0x42fe32 VSUBSD %XMM11,%XMM10,%XMM14 |
(202) 0x42fe37 VMULSD %XMM14,%XMM1,%XMM13 |
(202) 0x42fe3c VCOMISD %XMM12,%XMM13 |
(202) 0x42fe41 JBE 42fed9 |
(202) 0x42fe47 VCOMISD %XMM14,%XMM12 |
(202) 0x42fe4c JAE 42ff48 |
(202) 0x42fe52 MOV 0x2ea8f(%RIP),%RDI |
(202) 0x42fe59 VMOVQ %RDI,%XMM15 |
(202) 0x42fe5e VMOVQ %RDI,%XMM6 |
(202) 0x42fe63 VANDPD 0x2df45(%RIP),%XMM3,%XMM7 |
(202) 0x42fe6b VMOVAPD 0x2ea6d(%RIP),%XMM5 |
(202) 0x42fe73 VUNPCKLPD %XMM1,%XMM14,%XMM0 |
(202) 0x42fe77 VMOVDDUP 0x2df31(%RIP),%XMM8 |
(202) 0x42fe7f VANDPD %XMM8,%XMM0,%XMM4 |
(202) 0x42fe84 MOV 0x50(%RSP),%R8 |
(202) 0x42fe89 VDIVSD %XMM9,%XMM7,%XMM9 |
(202) 0x42fe8e VMOVHPD (%R8,%RBX,8),%XMM2,%XMM13 |
(202) 0x42fe94 VMULSD 0x2e96c(%RIP),%XMM2,%XMM2 |
(202) 0x42fe9c VMOVDDUP %XMM9,%XMM1 |
(202) 0x42fea1 VSUBSD %XMM9,%XMM15,%XMM15 |
(202) 0x42fea6 VADDSUBPD %XMM1,%XMM5,%XMM10 |
(202) 0x42feaa VMOVSD %XMM4,%XMM4,%XMM1 |
(202) 0x42feae VMULSD %XMM6,%XMM15,%XMM6 |
(202) 0x42feb2 VMULPD %XMM10,%XMM4,%XMM14 |
(202) 0x42feb7 VUNPCKHPD %XMM4,%XMM4,%XMM4 |
(202) 0x42febb VMINSD %XMM4,%XMM1,%XMM5 |
(202) 0x42febf VDIVPD %XMM13,%XMM14,%XMM0 |
(202) 0x42fec4 VUNPCKHPD %XMM0,%XMM0,%XMM8 |
(202) 0x42fec8 VADDPD %XMM0,%XMM8,%XMM7 |
(202) 0x42fecc VMULSD %XMM7,%XMM2,%XMM9 |
(202) 0x42fed0 VMINSD %XMM5,%XMM9,%XMM10 |
(202) 0x42fed4 VFMADD231SD %XMM10,%XMM6,%XMM11 |
(202) 0x42fed9 VMULSD %XMM11,%XMM3,%XMM3 |
(202) 0x42fede MOV 0x68(%RSP),%RBX |
(202) 0x42fee3 MOV %R14D,%ESI |
(202) 0x42fee6 MOV 0x70(%RSP),%EDI |
(202) 0x42feea SUB %R13D,%ESI |
(202) 0x42feed VMOVSD %XMM3,(%RBX,%RDX,8) |
(202) 0x42fef2 INC %RDX |
(202) 0x42fef5 ADD %EDX,%ESI |
(202) 0x42fef7 CMP %EDI,%ESI |
(202) 0x42fef9 JAE 42ff68 |
(202) 0x42fefb MOV 0x60(%RSP),%RBX |
(202) 0x42ff00 VMOVSD (%RBX,%RDX,8),%XMM3 |
(202) 0x42ff05 VCOMISD %XMM3,%XMM12 |
(202) 0x42ff09 JBE 42fde8 |
(202) 0x42ff0f MOV 0x78(%RSP),%RDI |
(202) 0x42ff14 MOVSXD 0x5c(%RSP),%RBX |
(202) 0x42ff19 MOV %R15,%RSI |
(202) 0x42ff1c MOVSXD 0x58(%RSP),%RCX |
(202) 0x42ff21 JMP 42fdf8 |
(202) 0x42ff48 MOV 0x2d791(%RIP),%RSI |
(202) 0x42ff4f MOV 0x2e992(%RIP),%RCX |
(202) 0x42ff56 VMOVQ %RSI,%XMM6 |
(202) 0x42ff5b VMOVQ %RCX,%XMM15 |
(202) 0x42ff60 JMP 42fe63 |
0x42ff68 MOV 0x70(%RSP),%R14D |
0x42ff6d MOV 0x5c(%RSP),%EAX |
0x42ff71 INCL 0x74(%RSP) |
0x42ff75 MOV %R15,0x78(%RSP) |
0x42ff7a CMP %EAX,0x38(%RSP) |
0x42ff7e JLE 42ff30 |
0x42ff80 MOV 0x30(%RSP),%ECX |
0x42ff84 MOV 0x3c(%RSP),%EDX |
0x42ff88 MOV 0x34(%RSP),%EBX |
0x42ff8c SUB %R14D,%ECX |
0x42ff8f JMP 42f650 |
0x42ff98 MOV 0x78(%RSP),%RBX |
0x42ff9d LEA 0x1(%RBX),%R15 |
0x42ffa1 JMP 42ff6d |
0x42ffa8 MOV %EBX,%R13D |
0x42ffab XOR %ECX,%ECX |
0x42ffad JMP 42fa7f |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 182 - 211 |
-------------------------------------------------------------------------------- |
182: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) |
183: ({ |
184: int upwind, donor, downwind, dif; |
185: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
186: if (node_flux(i, j) < 0.0) { |
187: upwind = j + 2; |
188: donor = j + 1; |
189: downwind = j; |
190: dif = donor; |
191: } else { |
192: upwind = j - 1; |
193: donor = j; |
194: downwind = j + 1; |
195: dif = upwind; |
196: } |
197: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(i, donor)); |
198: width = celldy[j]; |
199: vdiffuw = vel1(i, donor) - vel1(i, upwind); |
200: vdiffdw = vel1(i, downwind) - vel1(i, donor); |
201: limiter = 0.0; |
202: if (vdiffuw * vdiffdw > 0.0) { |
203: auw = std::fabs(vdiffuw); |
204: adw = std::fabs(vdiffdw); |
205: wind = 1.0; |
206: if (vdiffdw <= 0.0) wind = -1.0; |
207: limiter = |
208: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldy[dif]) / 6.0, auw), adw); |
209: } |
210: advec_vel_s = vel1(i, donor) + (1.0 - sigma) * limiter; |
211: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.16 |
CQA speedup if FP arith vectorized | 1.16 |
CQA speedup if fully vectorized | 1.16 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 |
Bottlenecks | micro-operation queue, |
Function | advec_mom_kernel(int, int, int, int, clover::Buffer2D |
Source | context.h:46-46,context.h:69-69,advec_mom.cpp:182-182,advec_mom.cpp:186-186,advec_mom.cpp:197-204,advec_mom.cpp:208-211 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 55.67 |
CQA cycles if no scalar integer | 48.00 |
CQA cycles if FP arith vectorized | 48.00 |
CQA cycles if fully vectorized | 48.00 |
Front-end cycles | 55.67 |
DIV/SQRT cycles | 50.83 |
P0 cycles | 50.67 |
P1 cycles | 33.00 |
P2 cycles | 33.00 |
P3 cycles | 7.50 |
P4 cycles | 50.50 |
P5 cycles | 20.00 |
P6 cycles | 7.50 |
P7 cycles | 7.50 |
P8 cycles | 7.50 |
P9 cycles | 20.00 |
P10 cycles | 33.00 |
P11 cycles | 48.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 70.52 - 102.55 |
Stall cycles (UFS) | 20.02 - 52.05 |
Nb insns | 259.00 |
Nb uops | 334.00 |
Nb loads | 69.00 |
Nb stores | 15.00 |
Nb stack references | 19.00 |
FLOP/cycle | 2.46 |
Nb FLOP add-sub | 48.00 |
Nb FLOP mul | 49.00 |
Nb FLOP fma | 8.00 |
Nb FLOP div | 24.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.18 |
Bytes prefetched | 0.00 |
Bytes loaded | 860.00 |
Bytes stored | 152.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 65.38 |
Vectorization ratio load | 36.84 |
Vectorization ratio store | 13.33 |
Vectorization ratio mul | 95.24 |
Vectorization ratio add_sub | 95.65 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 57.83 |
Vector-efficiency ratio all | 34.46 |
Vector-efficiency ratio load | 26.15 |
Vector-efficiency ratio store | 15.83 |
Vector-efficiency ratio mul | 48.21 |
Vector-efficiency ratio add_sub | 50.27 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 29.22 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.16 |
CQA speedup if FP arith vectorized | 1.16 |
CQA speedup if fully vectorized | 1.16 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 |
Bottlenecks | micro-operation queue, |
Function | advec_mom_kernel(int, int, int, int, clover::Buffer2D |
Source | context.h:46-46,context.h:69-69,advec_mom.cpp:182-182,advec_mom.cpp:186-186,advec_mom.cpp:197-204,advec_mom.cpp:208-211 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 55.67 |
CQA cycles if no scalar integer | 48.00 |
CQA cycles if FP arith vectorized | 48.00 |
CQA cycles if fully vectorized | 48.00 |
Front-end cycles | 55.67 |
DIV/SQRT cycles | 50.83 |
P0 cycles | 50.67 |
P1 cycles | 33.00 |
P2 cycles | 33.00 |
P3 cycles | 7.50 |
P4 cycles | 50.50 |
P5 cycles | 20.00 |
P6 cycles | 7.50 |
P7 cycles | 7.50 |
P8 cycles | 7.50 |
P9 cycles | 20.00 |
P10 cycles | 33.00 |
P11 cycles | 48.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 70.52 - 102.55 |
Stall cycles (UFS) | 20.02 - 52.05 |
Nb insns | 259.00 |
Nb uops | 334.00 |
Nb loads | 69.00 |
Nb stores | 15.00 |
Nb stack references | 19.00 |
FLOP/cycle | 2.46 |
Nb FLOP add-sub | 48.00 |
Nb FLOP mul | 49.00 |
Nb FLOP fma | 8.00 |
Nb FLOP div | 24.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.18 |
Bytes prefetched | 0.00 |
Bytes loaded | 860.00 |
Bytes stored | 152.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 65.38 |
Vectorization ratio load | 36.84 |
Vectorization ratio store | 13.33 |
Vectorization ratio mul | 95.24 |
Vectorization ratio add_sub | 95.65 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 57.83 |
Vector-efficiency ratio all | 34.46 |
Vector-efficiency ratio load | 26.15 |
Vector-efficiency ratio store | 15.83 |
Vector-efficiency ratio mul | 48.21 |
Vector-efficiency ratio add_sub | 50.27 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 29.22 |
Path / |
nb instructions | 259 |
nb uops | 334 |
loop length | 1301 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 22 |
used zmm registers | 13 |
nb stack references | 19 |
ADD-SUB / MUL ratio | 0.92 |
micro-operation queue | 55.67 cycles |
front end | 55.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 50.83 | 50.67 | 33.00 | 33.00 | 7.50 | 50.50 | 20.00 | 7.50 | 7.50 | 7.50 | 20.00 | 33.00 |
cycles | 50.83 | 50.67 | 33.00 | 33.00 | 7.50 | 50.50 | 20.00 | 7.50 | 7.50 | 7.50 | 20.00 | 33.00 |
Cycles executing div or sqrt instructions | 48.00 |
FE+BE cycles | 70.52-102.55 |
Stall cycles | 20.02-52.05 |
ROB full (events) | 22.35-57.33 |
LM full (events) | 0.09-0.00 |
Front-end | 55.67 |
Dispatch | 50.83 |
DIV/SQRT | 48.00 |
Overall L1 | 55.67 |
all | 42% |
load | 12% |
store | 0% |
mul | 100% |
add-sub | 90% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 85% |
load | 54% |
store | 100% |
mul | 92% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 77% |
all | 65% |
load | 36% |
store | 13% |
mul | 95% |
add-sub | 95% |
fma | 100% |
div/sqrt | 100% |
other | 57% |
all | 25% |
load | 16% |
store | 10% |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 42% |
load | 32% |
store | 50% |
mul | 47% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 38% |
all | 34% |
load | 26% |
store | 15% |
mul | 48% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 29% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x74(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMOVBE %ECX,%EDX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
ADD $0x2,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R14,%RDX,1),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ECX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42ff98 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa38> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R13,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x74(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R8),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%R13,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0xe,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 42ffa8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa48> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EBX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVQ %R13,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x50(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EBX,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RSI,%RDX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPADDD 0x2f11c(%RIP),%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
MOV $0x10,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTD 0x58(%RSP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTD 0x74(%RSP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM6,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R13,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD 0x5c(%RSP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
SHR $0x4,%R13D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VPBROADCASTQ 0x48(%RSP),%ZMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x2e647(%RIP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R15,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2f16f(%RIP),%ZMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x2df5d(%RIP),%ZMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x7,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VBROADCASTSD 0x2f149(%RIP),%ZMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTD %EDI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x10,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %ECX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RCX,%RBX,1),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
TEST $0xf,%R8B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 42ff68 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa08> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %ECX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R8),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP $0x6,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 42fda7 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x847> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EBX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %R13D,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDD 0x2ed96(%RIP),%YMM15,%YMM14 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x78(%RSP),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R15,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%RDX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x48(%RSP),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R9,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0x74(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VEXTRACTI128 $0x1,%YMM14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD (%R12),%XMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM5,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM11,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0xf,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD (%RDI),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RDI),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPMOVSXDQ %XMM14,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %ESI,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM6,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM31,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2dbc0(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x40(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VCMPPD $0x1,%YMM2,%YMM5,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0x1,%YMM2,%YMM4,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RBX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
AND $0x7,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTQ %R15,%YMM8{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R15,%YMM7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBLENDMQ %YMM11,%YMM9,%YMM14{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KANDB %K0,%K3,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KXNORB %K3,%K3,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K3,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPS %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM8,%YMM15 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM7,%YMM1 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VMOVDQA64 %YMM11,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KSHIFTLB $0x4,%K2,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 |
KORB %K4,%K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD 0x58(%RSP),%YMM0{%K5} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K3,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0x5c(%RSP),%YMM6{%K5} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K3,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %YMM13,%YMM15,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%R10,%YMM11,8),%YMM24{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPADDQ %YMM3,%YMM1,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVDQA %YMM12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KMOVB %K3,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %YMM12,%YMM8,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM7,%YMM7 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VGATHERQPD (%R10,%YMM11,8),%YMM15{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMULLQ %YMM1,%YMM14,%YMM14 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM9,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM13,%YMM12,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM7,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM11,8),%YMM8{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMOVSXDQ %XMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%RAX,%YMM12,8),%YMM7{%K1} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM11,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VEXTRACTI128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %YMM13,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM13,%YMM14,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM12,8),%YMM11{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMOVSXDQ %XMM0,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM12,%YMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VSUBPD %YMM11,%YMM8,%YMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM3,%YMM0,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM9,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM0,8),%YMM12{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERQPD (%RAX,%YMM13,8),%YMM0{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERQPD (%RAX,%YMM3,8),%YMM1{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VBROADCASTSD 0x2e14c(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VSUBPD %YMM8,%YMM0,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM7,%YMM1,%YMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM12,%YMM7,%YMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VANDPD %YMM3,%YMM5,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM14,%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM3,%YMM13,%YMM30 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %YMM3,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM13,%YMM12,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0xe,%YMM2,%YMM0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %YMM3,%YMM4,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPPD $0xe,%YMM2,%YMM9,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %YMM3,%YMM12,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %YMM3,%YMM14,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VDIVPD %YMM15,%YMM0,%YMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VCMPPD $0xe,%YMM2,%YMM14,%K7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2ec25(%RIP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K2,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%R11,%XMM6,8),%YMM15{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMULSD 0x2eb2c(%RIP),%XMM31,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCMPPD $0xe,%YMM2,%YMM13,%K3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%R11,%XMM0,8),%YMM2{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VBROADCASTSD 0x2ebed(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBLENDMPD %YMM14,%YMM23,%YMM22{%K7} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVAPD %YMM14,%YMM23{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VDIVPD %YMM24,%YMM1,%YMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM14,%YMM3,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM3,%YMM0,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM3,%YMM14,%YMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM9,%YMM21,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM30,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM1,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VSUBPD %YMM13,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VDIVPD %YMM2,%YMM29,%YMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM2,%YMM1,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM6,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM9,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM13,%YMM14,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM14,%YMM13,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM22,%YMM2,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM3,%YMM7{%K1} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM12,%YMM0,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM14,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM12,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM4,%YMM7,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM2,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VMOVUPD %YMM4,0x20(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VDIVPD %YMM15,%YMM13,%YMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM15,%YMM10,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM6,%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM11,%YMM6,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM9,%YMM8{%K2} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM5,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 42ff68 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa08> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x68(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RBX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RSI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42fefb <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x99b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x70(%RSP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x5c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INCL 0x74(%RSP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,0x38(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 42ff30 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x9d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RSP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 42f650 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xf0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x78(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ff6d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa0d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42fa7f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x51f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
nb instructions | 259 |
nb uops | 334 |
loop length | 1301 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 22 |
used zmm registers | 13 |
nb stack references | 19 |
ADD-SUB / MUL ratio | 0.92 |
micro-operation queue | 55.67 cycles |
front end | 55.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 50.83 | 50.67 | 33.00 | 33.00 | 7.50 | 50.50 | 20.00 | 7.50 | 7.50 | 7.50 | 20.00 | 33.00 |
cycles | 50.83 | 50.67 | 33.00 | 33.00 | 7.50 | 50.50 | 20.00 | 7.50 | 7.50 | 7.50 | 20.00 | 33.00 |
Cycles executing div or sqrt instructions | 48.00 |
FE+BE cycles | 70.52-102.55 |
Stall cycles | 20.02-52.05 |
ROB full (events) | 22.35-57.33 |
LM full (events) | 0.09-0.00 |
Front-end | 55.67 |
Dispatch | 50.83 |
DIV/SQRT | 48.00 |
Overall L1 | 55.67 |
all | 42% |
load | 12% |
store | 0% |
mul | 100% |
add-sub | 90% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 85% |
load | 54% |
store | 100% |
mul | 92% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 77% |
all | 65% |
load | 36% |
store | 13% |
mul | 95% |
add-sub | 95% |
fma | 100% |
div/sqrt | 100% |
other | 57% |
all | 25% |
load | 16% |
store | 10% |
mul | 50% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 42% |
load | 32% |
store | 50% |
mul | 47% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 38% |
all | 34% |
load | 26% |
store | 15% |
mul | 48% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 29% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x74(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMOVBE %ECX,%EDX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
ADD $0x2,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R14,%RDX,1),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %ECX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42ff98 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa38> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R13,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x74(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R8),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%R13,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0xe,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 42ffa8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa48> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EBX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VMOVQ %R13,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x50(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EBX,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%RSI,%RDX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPADDD 0x2f11c(%RIP),%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
MOV $0x10,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTD 0x58(%RSP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTD 0x74(%RSP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM6,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R13,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTD 0x5c(%RSP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
SHR $0x4,%R13D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VPBROADCASTQ 0x48(%RSP),%ZMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x2e647(%RIP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R15,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2f16f(%RIP),%ZMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x2df5d(%RIP),%ZMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x7,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VBROADCASTSD 0x2f149(%RIP),%ZMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTD %EDI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x10,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %ECX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RCX,%RBX,1),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
TEST $0xf,%R8B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 42ff68 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa08> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %ECX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%R8),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP $0x6,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 42fda7 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x847> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %EBX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %R13D,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDD 0x2ed96(%RIP),%YMM15,%YMM14 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x78(%RSP),%YMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R15,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%RDX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x48(%RSP),%YMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R9,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0x74(%RSP),%YMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VEXTRACTI128 $0x1,%YMM14,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVSD (%R12),%XMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM5,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM11,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV $0xf,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVUPD (%RDI),%YMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x20(%RDI),%YMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPMOVSXDQ %XMM14,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %ESI,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM6,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM31,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2dbc0(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x40(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VCMPPD $0x1,%YMM2,%YMM5,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VCMPPD $0x1,%YMM2,%YMM4,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RBX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDI,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
AND $0x7,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTQ %R15,%YMM8{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R15,%YMM7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBLENDMQ %YMM11,%YMM9,%YMM14{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KANDB %K0,%K3,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KXNORB %K3,%K3,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K3,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPS %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM8,%YMM15 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM7,%YMM1 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VMOVDQA64 %YMM11,%YMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KSHIFTLB $0x4,%K2,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 |
KORB %K4,%K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD 0x58(%RSP),%YMM0{%K5} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K3,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KMOVB %K3,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0x5c(%RSP),%YMM6{%K5} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K3,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %YMM13,%YMM15,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%R10,%YMM11,8),%YMM24{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPADDQ %YMM3,%YMM1,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVDQA %YMM12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KMOVB %K3,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %YMM12,%YMM8,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM7,%YMM7 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VGATHERQPD (%R10,%YMM11,8),%YMM15{%K7} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMULLQ %YMM1,%YMM14,%YMM14 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMULLQ %YMM1,%YMM9,%YMM9 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %YMM13,%YMM12,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM7,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM11,8),%YMM8{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMOVSXDQ %XMM0,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERQPD (%RAX,%YMM12,8),%YMM7{%K1} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VXORPS %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM11,%YMM12 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VEXTRACTI128 $0x1,%YMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %YMM13,%YMM12,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM13,%YMM14,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM12,8),%YMM11{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VPMOVSXDQ %XMM0,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPS %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %YMM1,%YMM12,%YMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VSUBPD %YMM11,%YMM8,%YMM11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM3,%YMM0,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDQ %YMM3,%YMM9,%YMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VGATHERQPD (%RAX,%YMM0,8),%YMM12{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERQPD (%RAX,%YMM13,8),%YMM0{%K6} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VGATHERQPD (%RAX,%YMM3,8),%YMM1{%K3} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VBROADCASTSD 0x2e14c(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VSUBPD %YMM8,%YMM0,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM7,%YMM1,%YMM13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM12,%YMM7,%YMM12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VANDPD %YMM3,%YMM5,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM14,%YMM11,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM3,%YMM13,%YMM30 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %YMM3,%YMM11,%YMM11 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM13,%YMM12,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0xe,%YMM2,%YMM0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %YMM3,%YMM4,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VCMPPD $0xe,%YMM2,%YMM9,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VANDPD %YMM3,%YMM12,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VANDPD %YMM3,%YMM14,%YMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VDIVPD %YMM15,%YMM0,%YMM3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VCMPPD $0xe,%YMM2,%YMM14,%K7{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x2ec25(%RIP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
KMOVB %K2,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%R11,%XMM6,8),%YMM15{%K4} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMULSD 0x2eb2c(%RIP),%XMM31,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VCMPPD $0xe,%YMM2,%YMM13,%K3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VGATHERDPD (%R11,%XMM0,8),%YMM2{%K5} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VBROADCASTSD 0x2ebed(%RIP),%YMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBLENDMPD %YMM14,%YMM23,%YMM22{%K7} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVAPD %YMM14,%YMM23{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VDIVPD %YMM24,%YMM1,%YMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM14,%YMM3,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM3,%YMM0,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSUBPD %YMM3,%YMM14,%YMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM9,%YMM21,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM30,%YMM9,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM1,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VSUBPD %YMM13,%YMM0,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VDIVPD %YMM2,%YMM29,%YMM2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM2,%YMM1,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM6,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM9,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM13,%YMM14,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD %YMM14,%YMM13,%YMM14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM22,%YMM2,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM3,%YMM7{%K1} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM12,%YMM0,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM14,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM12,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM4,%YMM7,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM10,%YMM2,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VMOVUPD %YMM4,0x20(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VDIVPD %YMM15,%YMM13,%YMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
VADDPD %YMM15,%YMM10,%YMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM6,%YMM0,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM11,%YMM6,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM9,%YMM8{%K2} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM8,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM5,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JE 42ff68 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa08> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x68(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%RBX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RSI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42fefb <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x99b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x70(%RSP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x5c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INCL 0x74(%RSP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,0x38(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 42ff30 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x9d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RSP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 42f650 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xf0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x78(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ff6d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0xa0d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42fa7f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.10+0x51f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |