Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 4.06% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 4.06% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
115: upwind = i + 2; |
116: donor = i + 1; |
117: downwind = i; |
118: dif = donor; |
119: } else { |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42b5f0 PUSH %RBP |
0x42b5f1 MOV %RSP,%RBP |
0x42b5f4 PUSH %R15 |
0x42b5f6 PUSH %R14 |
0x42b5f8 PUSH %R13 |
0x42b5fa PUSH %R12 |
0x42b5fc PUSH %RBX |
0x42b5fd AND $-0x40,%RSP |
0x42b601 ADD $-0x80,%RSP |
0x42b605 MOV 0x30(%RDI),%EAX |
0x42b608 MOV 0x34(%RDI),%EDX |
0x42b60b MOV 0x28(%RDI),%ESI |
0x42b60e MOV 0x2c(%RDI),%EBX |
0x42b611 ADD $0x3,%EDX |
0x42b614 LEA 0x1(%RAX),%R15D |
0x42b618 MOV %ESI,0x40(%RSP) |
0x42b61c MOV %EDX,0x44(%RSP) |
0x42b620 CMP %EDX,%R15D |
0x42b623 JGE 42bf13 |
0x42b629 LEA 0x3(%RBX),%R14D |
0x42b62d MOV %EDX,%EBX |
0x42b62f SUB %R15D,%EBX |
0x42b632 CMP %R14D,%ESI |
0x42b635 JGE 42bf13 |
0x42b63b MOV %R14D,%ECX |
0x42b63e MOV %RDI,%R13 |
0x42b641 SUB %ESI,%ECX |
0x42b643 MOV %ECX,0x70(%RSP) |
0x42b647 CALL 4046c0 <omp_get_num_threads@plt> |
0x42b64c MOV %EAX,%R12D |
0x42b64f CALL 4045b0 <omp_get_thread_num@plt> |
0x42b654 XOR %EDX,%EDX |
0x42b656 MOV %EAX,%EDI |
0x42b658 MOV 0x70(%RSP),%EAX |
0x42b65c IMUL %EBX,%EAX |
0x42b65f DIV %R12D |
0x42b662 MOV %EAX,%R12D |
0x42b665 CMP %EDX,%EDI |
0x42b667 JB 42bf9b |
0x42b66d IMUL %R12D,%EDI |
0x42b671 LEA (%RDI,%RDX,1),%R8D |
0x42b675 LEA (%R12,%R8,1),%R9D |
0x42b679 MOV %R9D,0x3c(%RSP) |
0x42b67e CMP %R9D,%R8D |
0x42b681 JAE 42bf13 |
0x42b687 MOV %R8D,%EAX |
0x42b68a XOR %EDX,%EDX |
0x42b68c MOV 0x40(%RSP),%R10D |
0x42b691 MOV 0x8(%R13),%R11 |
0x42b695 DIVL 0x70(%RSP) |
0x42b699 MOV (%R13),%RBX |
0x42b69d VMOVSD 0x373f1(%RIP),%XMM29 |
0x42b6a7 MOV %R8D,0x78(%RSP) |
0x42b6ac VMOVQ 0x373ca(%RIP),%XMM30 |
0x42b6b6 VMOVDQA32 0x36f80(%RIP),%YMM31 |
0x42b6c0 MOV %R11,0x28(%RSP) |
0x42b6c5 MOV %RBX,0x10(%RSP) |
0x42b6ca MOV %R14D,%R9D |
0x42b6cd MOV 0x20(%R13),%R14 |
0x42b6d1 MOV %R14,0x18(%RSP) |
0x42b6d6 ADD %R15D,%EAX |
0x42b6d9 MOV 0x10(%R13),%R15 |
0x42b6dd MOV 0x18(%R13),%R13 |
0x42b6e1 ADD %EDX,%R10D |
0x42b6e4 MOV %EAX,0x38(%RSP) |
0x42b6e8 CLTQ |
0x42b6ea SUB %R10D,%R9D |
0x42b6ed MOV %R10D,0x74(%RSP) |
0x42b6f2 MOV %R15,0x20(%RSP) |
0x42b6f7 MOV %R13,0x8(%RSP) |
0x42b6fc MOV %RAX,0x30(%RSP) |
0x42b701 MOV %RAX,0x68(%RSP) |
0x42b706 NOPW %CS:(%RAX,%RAX,1) |
(136) 0x42b710 CMP %R9D,%R12D |
(136) 0x42b713 MOV 0x78(%RSP),%ESI |
(136) 0x42b717 CMOVBE %R12D,%R9D |
(136) 0x42b71b LEA (%RSI,%R9,1),%ECX |
(136) 0x42b71f MOV %R9D,%R14D |
(136) 0x42b722 MOV %ECX,0x7c(%RSP) |
(136) 0x42b726 CMP %ECX,%ESI |
(136) 0x42b728 JAE 42bf50 |
(136) 0x42b72e MOV 0x28(%RSP),%R12 |
(136) 0x42b733 MOV 0x20(%RSP),%R8 |
(136) 0x42b738 LEA -0x1(%R14),%R11D |
(136) 0x42b73c MOV 0x18(%RSP),%R9 |
(136) 0x42b741 MOV 0x10(%RSP),%RAX |
(136) 0x42b746 MOV 0x68(%RSP),%R15 |
(136) 0x42b74b MOV 0x10(%R12),%R10 |
(136) 0x42b750 MOV (%R12),%RDI |
(136) 0x42b754 MOV 0x8(%RSP),%RDX |
(136) 0x42b759 MOV (%R8),%R12 |
(136) 0x42b75c MOV 0x8(%R9),%RSI |
(136) 0x42b760 MOV %R10,0x60(%RSP) |
(136) 0x42b765 MOV (%RAX),%R9 |
(136) 0x42b768 IMUL %R15,%RDI |
(136) 0x42b76c MOV 0x10(%RDX),%R13 |
(136) 0x42b770 IMUL %R15,%R12 |
(136) 0x42b774 MOV 0x10(%R8),%RBX |
(136) 0x42b778 MOV 0x10(%RAX),%R8 |
(136) 0x42b77c IMUL %R15,%R9 |
(136) 0x42b780 MOV %R13,0x50(%RSP) |
(136) 0x42b785 IMUL (%RDX),%R15 |
(136) 0x42b789 MOV %RDI,0x58(%RSP) |
(136) 0x42b78e MOV %R15,0x48(%RSP) |
(136) 0x42b793 CMP $0xe,%R11D |
(136) 0x42b797 JBE 42bf90 |
(136) 0x42b79d MOVSXD 0x74(%RSP),%RCX |
(136) 0x42b7a2 VPBROADCASTD 0x74(%RSP),%ZMM11 |
(136) 0x42b7aa LEA (%R8,%R9,8),%RDX |
(136) 0x42b7ae VXORPD %XMM6,%XMM6,%XMM6 |
(136) 0x42b7b2 KXNORB %K1,%K1,%K1 |
(136) 0x42b7b6 VBROADCASTSD 0x372c0(%RIP),%ZMM9 |
(136) 0x42b7c0 VBROADCASTSD 0x372ce(%RIP),%ZMM8 |
(136) 0x42b7ca VPTERNLOGD $-0x1,%ZMM12,%ZMM12,%ZMM12 |
(136) 0x42b7d1 LEA (%RDI,%RCX,1),%RAX |
(136) 0x42b7d5 LEA (%RSI,%RCX,8),%RDI |
(136) 0x42b7d9 ADD %R15,%RCX |
(136) 0x42b7dc MOV %R14D,%R15D |
(136) 0x42b7df LEA (%R13,%RCX,8),%RCX |
(136) 0x42b7e4 MOV $0x10,%R13D |
(136) 0x42b7ea SHR $0x4,%R15D |
(136) 0x42b7ee VPADDD 0x37108(%RIP),%ZMM11,%ZMM11 |
(136) 0x42b7f8 VPBROADCASTD %R13D,%ZMM18 |
(136) 0x42b7fe MOV $0x1,%R13D |
(136) 0x42b804 VBROADCASTSD 0x37132(%RIP),%ZMM15 |
(136) 0x42b80e VBROADCASTSD 0x37278(%RIP),%ZMM14 |
(136) 0x42b818 VPBROADCASTD %R13D,%ZMM17 |
(136) 0x42b81e VBROADCASTSD 0x37158(%RIP),%ZMM13 |
(136) 0x42b828 MOV $0x2,%R13D |
(136) 0x42b82e LEA (%R10,%RAX,8),%R11 |
(136) 0x42b832 SAL $0x7,%R15 |
(136) 0x42b836 LEA (%RBX,%R12,8),%R10 |
(136) 0x42b83a VPBROADCASTD %R13D,%ZMM16 |
(136) 0x42b840 XOR %EAX,%EAX |
(136) 0x42b842 NOPW (%RAX,%RAX,1) |
(138) 0x42b848 VMOVUPD (%R11,%RAX,1),%ZMM19 |
(138) 0x42b84f VMOVUPD 0x40(%R11,%RAX,1),%ZMM10 |
(138) 0x42b857 VMOVDQA32 %ZMM11,%ZMM0 |
(138) 0x42b85d KMOVB %K1,%K6 |
(138) 0x42b861 KMOVB %K1,%K7 |
(138) 0x42b865 VMOVAPD %ZMM6,%ZMM28 |
(138) 0x42b86b VPADDD %ZMM17,%ZMM0,%ZMM1 |
(138) 0x42b871 VPMOVSXDQ %YMM0,%ZMM4 |
(138) 0x42b877 VEXTRACTI32X8 $0x1,%ZMM0,%YMM3 |
(138) 0x42b87e VMOVUPD 0x40(%RDI,%RAX,1),%ZMM25 |
(138) 0x42b886 VCMPPD $0x1,%ZMM6,%ZMM10,%K3 |
(138) 0x42b88d VCMPPD $0x1,%ZMM6,%ZMM19,%K4 |
(138) 0x42b894 VPMOVSXDQ %YMM3,%ZMM3 |
(138) 0x42b89a VMOVUPD (%RDI,%RAX,1),%ZMM22 |
(138) 0x42b8a1 VMOVDQA %YMM1,%YMM5 |
(138) 0x42b8a5 VPADDD %ZMM12,%ZMM0,%ZMM2 |
(138) 0x42b8ab VMOVDQA64 %ZMM4,%ZMM7 |
(138) 0x42b8b1 VEXTRACTI32X8 $0x1,%ZMM1,%YMM20 |
(138) 0x42b8b8 VPMOVSXDQ %YMM1,%ZMM23 |
(138) 0x42b8be VPADDD %ZMM18,%ZMM11,%ZMM11 |
(138) 0x42b8c4 VPMOVSXDQ %YMM5,%ZMM7{%K4} |
(138) 0x42b8ca KUNPCKBW %K4,%K3,%K2 |
(138) 0x42b8ce VMOVDQA64 %ZMM3,%ZMM5 |
(138) 0x42b8d4 VPADDD %ZMM16,%ZMM0,%ZMM2{%K2} |
(138) 0x42b8da KNOTW %K2,%K5 |
(138) 0x42b8de KMOVB %K1,%K2 |
(138) 0x42b8e2 VPMOVSXDQ %YMM20,%ZMM5{%K3} |
(138) 0x42b8e8 VMOVDQA64 %ZMM4,%ZMM23{%K4} |
(138) 0x42b8ee VPMOVSXDQ %YMM20,%ZMM21 |
(138) 0x42b8f4 KMOVB %K1,%K4 |
(138) 0x42b8f8 VGATHERDPD (%RDX,%YMM2,8),%ZMM4{%K2} |
(138) 0x42b8ff VSHUFI32X4 $-0x12,%ZMM2,%ZMM2,%ZMM2 |
(138) 0x42b906 VGATHERQPD (%RDX,%ZMM7,8),%ZMM20{%K4} |
(138) 0x42b90d VGATHERQPD (%R10,%ZMM7,8),%ZMM24{%K6} |
(138) 0x42b914 VPADDD %ZMM12,%ZMM0,%ZMM1{%K5} |
(138) 0x42b91a VMOVDQA64 %ZMM3,%ZMM21{%K3} |
(138) 0x42b920 KMOVB %K1,%K5 |
(138) 0x42b924 KMOVB %K1,%K3 |
(138) 0x42b928 KMOVB %K1,%K6 |
(138) 0x42b92c VGATHERQPD (%RDX,%ZMM5,8),%ZMM7{%K3} |
(138) 0x42b933 VGATHERQPD (%R10,%ZMM5,8),%ZMM0{%K7} |
(138) 0x42b93a VGATHERDPD (%RDX,%YMM2,8),%ZMM5{%K5} |
(138) 0x42b941 KMOVB %K1,%K7 |
(138) 0x42b945 VGATHERQPD (%RDX,%ZMM23,8),%ZMM2{%K6} |
(138) 0x42b94c VGATHERQPD (%RDX,%ZMM21,8),%ZMM3{%K7} |
(138) 0x42b953 VSUBPD %ZMM4,%ZMM20,%ZMM4 |
(138) 0x42b959 VSUBPD %ZMM20,%ZMM2,%ZMM2 |
(138) 0x42b95f VSUBPD %ZMM7,%ZMM3,%ZMM3 |
(138) 0x42b965 VSUBPD %ZMM5,%ZMM7,%ZMM5 |
(138) 0x42b96b VMULPD %ZMM2,%ZMM4,%ZMM26 |
(138) 0x42b971 VANDPD %ZMM9,%ZMM2,%ZMM23 |
(138) 0x42b977 VANDPD %ZMM9,%ZMM4,%ZMM4 |
(138) 0x42b97d VMULPD %ZMM3,%ZMM5,%ZMM27 |
(138) 0x42b983 VANDPD %ZMM9,%ZMM5,%ZMM5 |
(138) 0x42b989 VCMPPD $0xe,%ZMM6,%ZMM26,%K3 |
(138) 0x42b990 VCMPPD $0xe,%ZMM6,%ZMM27,%K2 |
(138) 0x42b997 VANDPD %ZMM9,%ZMM3,%ZMM27 |
(138) 0x42b99d VCMPPD $0xe,%ZMM6,%ZMM2,%K5{%K3} |
(138) 0x42b9a4 VANDPD %ZMM9,%ZMM19,%ZMM2 |
(138) 0x42b9aa KMOVB %K3,%K6 |
(138) 0x42b9ae VCMPPD $0xe,%ZMM6,%ZMM3,%K4{%K2} |
(138) 0x42b9b5 VDIVPD %ZMM24,%ZMM2,%ZMM3 |
(138) 0x42b9bb VANDPD %ZMM9,%ZMM10,%ZMM2 |
(138) 0x42b9c1 VMOVAPD %ZMM6,%ZMM24 |
(138) 0x42b9c7 KMOVB %K2,%K7 |
(138) 0x42b9cb VDIVPD %ZMM0,%ZMM2,%ZMM2 |
(138) 0x42b9d1 VSUBPD %ZMM2,%ZMM14,%ZMM0 |
(138) 0x42b9d7 VGATHERDPD (%RSI,%YMM1,8),%ZMM24{%K6} |
(138) 0x42b9de VSHUFI32X4 $-0x12,%ZMM1,%ZMM1,%ZMM1 |
(138) 0x42b9e5 VGATHERDPD (%RSI,%YMM1,8),%ZMM28{%K7} |
(138) 0x42b9ec VBLENDMPD %ZMM8,%ZMM15,%ZMM21{%K5} |
(138) 0x42b9f2 VBLENDMPD %ZMM8,%ZMM15,%ZMM26{%K4} |
(138) 0x42b9f8 VMULPD %ZMM27,%ZMM0,%ZMM1 |
(138) 0x42b9fe VDIVPD %ZMM25,%ZMM1,%ZMM0 |
(138) 0x42ba04 VADDPD %ZMM8,%ZMM2,%ZMM1 |
(138) 0x42ba0a VMULPD %ZMM13,%ZMM25,%ZMM25 |
(138) 0x42ba10 VSUBPD %ZMM2,%ZMM8,%ZMM2 |
(138) 0x42ba16 VMULPD %ZMM5,%ZMM1,%ZMM1 |
(138) 0x42ba1c VMINPD %ZMM27,%ZMM5,%ZMM5 |
(138) 0x42ba22 VMULPD %ZMM26,%ZMM2,%ZMM2 |
(138) 0x42ba28 VDIVPD %ZMM28,%ZMM1,%ZMM1 |
(138) 0x42ba2e VADDPD %ZMM1,%ZMM0,%ZMM0 |
(138) 0x42ba34 VMULPD %ZMM25,%ZMM0,%ZMM1 |
(138) 0x42ba3a VMINPD %ZMM5,%ZMM1,%ZMM0 |
(138) 0x42ba40 VSUBPD %ZMM3,%ZMM14,%ZMM1 |
(138) 0x42ba46 VMULPD %ZMM23,%ZMM1,%ZMM5 |
(138) 0x42ba4c VFMADD231PD %ZMM2,%ZMM0,%ZMM7{%K2} |
(138) 0x42ba52 VDIVPD %ZMM22,%ZMM5,%ZMM1 |
(138) 0x42ba58 VADDPD %ZMM8,%ZMM3,%ZMM5 |
(138) 0x42ba5e VMULPD %ZMM13,%ZMM22,%ZMM22 |
(138) 0x42ba64 VSUBPD %ZMM3,%ZMM8,%ZMM3 |
(138) 0x42ba6a VMULPD %ZMM7,%ZMM10,%ZMM10 |
(138) 0x42ba70 VMULPD %ZMM4,%ZMM5,%ZMM5 |
(138) 0x42ba76 VMINPD %ZMM23,%ZMM4,%ZMM4 |
(138) 0x42ba7c VMOVUPD %ZMM10,0x40(%RCX,%RAX,1) |
(138) 0x42ba84 VDIVPD %ZMM24,%ZMM5,%ZMM5 |
(138) 0x42ba8a VADDPD %ZMM5,%ZMM1,%ZMM1 |
(138) 0x42ba90 VMULPD %ZMM22,%ZMM1,%ZMM5 |
(138) 0x42ba96 VMINPD %ZMM4,%ZMM5,%ZMM1 |
(138) 0x42ba9c VMULPD %ZMM21,%ZMM3,%ZMM5 |
(138) 0x42baa2 VMOVAPD %ZMM20,%ZMM4 |
(138) 0x42baa8 VFMADD231PD %ZMM5,%ZMM1,%ZMM4{%K3} |
(138) 0x42baae VMULPD %ZMM19,%ZMM4,%ZMM0 |
(138) 0x42bab4 VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(138) 0x42babb SUB $-0x80,%RAX |
(138) 0x42babf CMP %RAX,%R15 |
(138) 0x42bac2 JNE 42b848 |
(136) 0x42bac8 MOV 0x74(%RSP),%R11D |
(136) 0x42bacd MOV %R14D,%EDX |
(136) 0x42bad0 AND $-0x10,%EDX |
(136) 0x42bad3 ADD %EDX,0x78(%RSP) |
(136) 0x42bad7 LEA (%RDX,%R11,1),%EDI |
(136) 0x42badb TEST $0xf,%R14B |
(136) 0x42badf JE 42bf48 |
(136) 0x42bae5 SUB %EDX,%R14D |
(136) 0x42bae8 MOV %R14D,%R10D |
(136) 0x42baeb LEA -0x1(%R14),%R14D |
(136) 0x42baef CMP $0x6,%R14D |
(136) 0x42baf3 JBE 42bdc1 |
(136) 0x42baf9 MOVSXD 0x74(%RSP),%RAX |
(136) 0x42bafe MOV 0x58(%RSP),%RCX |
(136) 0x42bb03 VXORPD %XMM2,%XMM2,%XMM2 |
(136) 0x42bb07 VPBROADCASTD %EDI,%YMM15 |
(136) 0x42bb0d MOV 0x60(%RSP),%R15 |
(136) 0x42bb12 VPADDD 0x36f86(%RIP),%YMM15,%YMM0 |
(136) 0x42bb1a VPADDD %YMM31,%YMM15,%YMM1 |
(136) 0x42bb20 ADD %RAX,%RCX |
(136) 0x42bb23 VPADDD 0x36dd5(%RIP),%YMM15,%YMM9 |
(136) 0x42bb2b LEA (%RBX,%R12,8),%R13 |
(136) 0x42bb2f VBROADCASTSD 0x36e47(%RIP),%YMM27 |
(136) 0x42bb39 ADD %RDX,%RCX |
(136) 0x42bb3c VEXTRACTI128 $0x1,%YMM0,%XMM7 |
(136) 0x42bb42 VMOVDQA %XMM0,%XMM12 |
(136) 0x42bb46 VBROADCASTSD 0x36df0(%RIP),%YMM17 |
(136) 0x42bb50 LEA (%R15,%RCX,8),%R14 |
(136) 0x42bb54 VPMOVSXDQ %XMM9,%YMM14 |
(136) 0x42bb59 VEXTRACTI128 $0x1,%YMM9,%XMM8 |
(136) 0x42bb5f MOV 0x48(%RSP),%R15 |
(136) 0x42bb64 VMOVUPD (%R14),%YMM5 |
(136) 0x42bb69 VMOVUPD 0x20(%R14),%YMM4 |
(136) 0x42bb6f MOV $0xf,%R14D |
(136) 0x42bb75 VPMOVSXDQ %XMM8,%YMM13 |
(136) 0x42bb7a KMOVB %R14D,%K0 |
(136) 0x42bb7f VPMOVSXDQ %XMM7,%YMM3 |
(136) 0x42bb84 VPMOVSXDQ %XMM0,%YMM11 |
(136) 0x42bb89 VMOVDQA %YMM14,%YMM6 |
(136) 0x42bb8d VCMPPD $0x1,%YMM2,%YMM5,%K3 |
(136) 0x42bb94 VCMPPD $0x1,%YMM2,%YMM4,%K2 |
(136) 0x42bb9b LEA (%R8,%R9,8),%RCX |
(136) 0x42bb9f VMOVDQA %YMM13,%YMM10 |
(136) 0x42bba4 LEA (%RAX,%RDX,1),%R11 |
(136) 0x42bba8 ADD %R15,%RAX |
(136) 0x42bbab LEA (%RSI,%R11,8),%R11 |
(136) 0x42bbaf ADD %RDX,%RAX |
(136) 0x42bbb2 MOV 0x50(%RSP),%RDX |
(136) 0x42bbb7 KANDB %K0,%K3,%K1 |
(136) 0x42bbbb KSHIFTLB $0x4,%K2,%K4 |
(136) 0x42bbc1 KORB %K4,%K1,%K5 |
(136) 0x42bbc5 VPADDD 0x36ef1(%RIP),%YMM15,%YMM1{%K5} |
(136) 0x42bbcf VPMOVSXDQ %XMM12,%YMM6{%K3} |
(136) 0x42bbd5 VMOVDQA64 %YMM14,%YMM11{%K3} |
(136) 0x42bbdb VPBLENDMQ %YMM13,%YMM3,%YMM12{%K2} |
(136) 0x42bbe1 KXNORB %K3,%K3,%K3 |
(136) 0x42bbe5 VPMOVSXDQ %XMM7,%YMM10{%K2} |
(136) 0x42bbeb KMOVB %K3,%K7 |
(136) 0x42bbef KMOVB %K3,%K1 |
(136) 0x42bbf3 VGATHERQPD (%R13,%YMM6,8),%YMM19{%K7} |
(136) 0x42bbfb VPERM2I128 $0x11,%YMM1,%YMM1,%YMM3 |
(136) 0x42bc01 VGATHERQPD (%RCX,%YMM6,8),%YMM7{%K1} |
(136) 0x42bc08 KNOTB %K5,%K6 |
(136) 0x42bc0c KMOVB %K3,%K7 |
(136) 0x42bc10 VPADDD %YMM31,%YMM15,%YMM0{%K6} |
(136) 0x42bc16 KMOVB %K3,%K5 |
(136) 0x42bc1a KMOVB %K3,%K6 |
(136) 0x42bc1e VGATHERDPD (%RCX,%XMM1,8),%YMM8{%K5} |
(136) 0x42bc25 VGATHERDPD (%RCX,%XMM3,8),%YMM1{%K6} |
(136) 0x42bc2c VGATHERQPD (%RCX,%YMM11,8),%YMM3{%K7} |
(136) 0x42bc33 KMOVB %K3,%K2 |
(136) 0x42bc37 KMOVB %K3,%K4 |
(136) 0x42bc3b VGATHERQPD (%R13,%YMM10,8),%YMM13{%K2} |
(136) 0x42bc43 VGATHERQPD (%RCX,%YMM10,8),%YMM6{%K4} |
(136) 0x42bc4a VGATHERQPD (%RCX,%YMM12,8),%YMM11{%K3} |
(136) 0x42bc51 VMOVUPD 0x20(%R11),%YMM15 |
(136) 0x42bc57 VSUBPD %YMM7,%YMM3,%YMM12 |
(136) 0x42bc5b VSUBPD %YMM8,%YMM7,%YMM10 |
(136) 0x42bc60 VMOVUPD (%R11),%YMM9 |
(136) 0x42bc65 MOV %R10D,%R13D |
(136) 0x42bc68 VSUBPD %YMM6,%YMM11,%YMM8 |
(136) 0x42bc6c VSUBPD %YMM1,%YMM6,%YMM14 |
(136) 0x42bc70 AND $-0x8,%R13D |
(136) 0x42bc74 LEA (%RDX,%RAX,8),%RAX |
(136) 0x42bc78 ADD %R13D,%EDI |
(136) 0x42bc7b VMULPD %YMM12,%YMM10,%YMM1 |
(136) 0x42bc80 VMULPD %YMM8,%YMM14,%YMM3 |
(136) 0x42bc85 VCMPPD $0xe,%YMM2,%YMM1,%K2 |
(136) 0x42bc8c VBROADCASTSD 0x36deb(%RIP),%YMM1 |
(136) 0x42bc95 VCMPPD $0xe,%YMM2,%YMM3,%K1 |
(136) 0x42bc9c VANDPD %YMM1,%YMM4,%YMM3 |
(136) 0x42bca0 VANDPD %YMM1,%YMM12,%YMM11 |
(136) 0x42bca4 VANDPD %YMM1,%YMM8,%YMM18 |
(136) 0x42bcaa VDIVPD %YMM13,%YMM3,%YMM3 |
(136) 0x42bcaf VMOVAPD %YMM2,%YMM13 |
(136) 0x42bcb3 KMOVB %K2,%K5 |
(136) 0x42bcb7 VANDPD %YMM1,%YMM10,%YMM10 |
(136) 0x42bcbb VGATHERDPD (%RSI,%XMM0,8),%YMM13{%K5} |
(136) 0x42bcc2 VPERM2I128 $0x11,%YMM0,%YMM0,%YMM0 |
(136) 0x42bcc8 KMOVB %K1,%K6 |
(136) 0x42bccc VANDPD %YMM1,%YMM14,%YMM14 |
(136) 0x42bcd0 VCMPPD $0xe,%YMM2,%YMM8,%K3{%K1} |
(136) 0x42bcd7 VCMPPD $0xe,%YMM2,%YMM12,%K4{%K2} |
(136) 0x42bcde VBROADCASTSD 0x36db1(%RIP),%YMM12 |
(136) 0x42bce7 VGATHERDPD (%RSI,%XMM0,8),%YMM2{%K6} |
(136) 0x42bcee VBROADCASTSD 0x36d99(%RIP),%YMM0 |
(136) 0x42bcf7 VANDPD %YMM1,%YMM5,%YMM8 |
(136) 0x42bcfb ADD %R13D,0x78(%RSP) |
(136) 0x42bd00 AND $0x7,%R10D |
(136) 0x42bd04 VBLENDMPD %YMM12,%YMM17,%YMM16{%K4} |
(136) 0x42bd0a VMOVAPD %YMM12,%YMM17{%K3} |
(136) 0x42bd10 VDIVPD %YMM19,%YMM8,%YMM8 |
(136) 0x42bd16 VSUBPD %YMM3,%YMM0,%YMM1 |
(136) 0x42bd1a VADDPD %YMM12,%YMM3,%YMM20 |
(136) 0x42bd20 VSUBPD %YMM3,%YMM12,%YMM3 |
(136) 0x42bd24 VMULPD %YMM14,%YMM20,%YMM23 |
(136) 0x42bd2a VMINPD %YMM18,%YMM14,%YMM14 |
(136) 0x42bd30 VMULPD %YMM18,%YMM1,%YMM1 |
(136) 0x42bd36 VDIVPD %YMM2,%YMM23,%YMM2 |
(136) 0x42bd3c VSUBPD %YMM8,%YMM0,%YMM0 |
(136) 0x42bd41 VDIVPD %YMM15,%YMM1,%YMM1 |
(136) 0x42bd46 VMULPD %YMM27,%YMM15,%YMM15 |
(136) 0x42bd4c VADDPD %YMM2,%YMM1,%YMM1 |
(136) 0x42bd50 VMULPD %YMM15,%YMM1,%YMM2 |
(136) 0x42bd55 VMULPD %YMM17,%YMM3,%YMM15 |
(136) 0x42bd5b VMULPD %YMM11,%YMM0,%YMM3 |
(136) 0x42bd60 VMINPD %YMM14,%YMM2,%YMM1 |
(136) 0x42bd65 VSUBPD %YMM8,%YMM12,%YMM2 |
(136) 0x42bd6a VADDPD %YMM12,%YMM8,%YMM12 |
(136) 0x42bd6f VDIVPD %YMM9,%YMM3,%YMM0 |
(136) 0x42bd74 VMULPD %YMM27,%YMM9,%YMM9 |
(136) 0x42bd7a VFMADD231PD %YMM1,%YMM15,%YMM6{%K1} |
(136) 0x42bd80 VMULPD %YMM10,%YMM12,%YMM8 |
(136) 0x42bd85 VMINPD %YMM11,%YMM10,%YMM10 |
(136) 0x42bd8a VMULPD %YMM16,%YMM2,%YMM14 |
(136) 0x42bd90 VMULPD %YMM4,%YMM6,%YMM4 |
(136) 0x42bd94 VMOVUPD %YMM4,0x20(%RAX) |
(136) 0x42bd99 VDIVPD %YMM13,%YMM8,%YMM13 |
(136) 0x42bd9e VADDPD %YMM13,%YMM0,%YMM2 |
(136) 0x42bda3 VMULPD %YMM9,%YMM2,%YMM3 |
(136) 0x42bda8 VMINPD %YMM10,%YMM3,%YMM11 |
(136) 0x42bdad VFMADD231PD %YMM11,%YMM14,%YMM7{%K2} |
(136) 0x42bdb3 VMULPD %YMM5,%YMM7,%YMM5 |
(136) 0x42bdb7 VMOVUPD %YMM5,(%RAX) |
(136) 0x42bdbb JE 42bf48 |
(136) 0x42bdc1 MOV 0x78(%RSP),%R13D |
(136) 0x42bdc6 MOV 0x60(%RSP),%R10 |
(136) 0x42bdcb LEA 0x1(%RDI),%EAX |
(136) 0x42bdce VXORPD %XMM8,%XMM8,%XMM8 |
(136) 0x42bdd3 VMOVDDUP 0x36ca5(%RIP),%XMM11 |
(136) 0x42bddb MOV 0x58(%RSP),%R11 |
(136) 0x42bde0 MOV 0x50(%RSP),%RCX |
(136) 0x42bde5 CLTQ |
(136) 0x42bde7 MOV 0x48(%RSP),%RDX |
(136) 0x42bdec LEA -0x1(%R13),%R13D |
(136) 0x42bdf0 VMOVAPD 0x36c98(%RIP),%XMM9 |
(136) 0x42bdf8 VMOVSD 0x36b80(%RIP),%XMM10 |
(136) 0x42be00 LEA (%R10,%R11,8),%R15 |
(136) 0x42be04 SUB %EDI,%R13D |
(136) 0x42be07 LEA (%RCX,%RDX,8),%R14 |
(136) 0x42be0b JMP 42bee4 |
(137) 0x42be10 LEA -0x2(%RAX),%EDX |
(137) 0x42be13 MOV %RAX,%RDI |
(137) 0x42be16 MOVSXD %EDX,%R10 |
(137) 0x42be19 LEA (%R12,%RCX,1),%R11 |
(137) 0x42be1d ADD %R9,%RDI |
(137) 0x42be20 ADD %R9,%RCX |
(137) 0x42be23 VMOVSD -0x8(%RSI,%RAX,8),%XMM12 |
(137) 0x42be29 VMOVSD (%R8,%RCX,8),%XMM4 |
(137) 0x42be2f VMOVSD (%R8,%RDI,8),%XMM0 |
(137) 0x42be35 MOVSXD %EDX,%RCX |
(137) 0x42be38 ADD %R9,%RCX |
(137) 0x42be3b VMOVSD (%RBX,%R11,8),%XMM14 |
(137) 0x42be41 VSUBSD (%R8,%RCX,8),%XMM4,%XMM1 |
(137) 0x42be47 VSUBSD %XMM4,%XMM0,%XMM2 |
(137) 0x42be4b VMULSD %XMM2,%XMM1,%XMM13 |
(137) 0x42be4f VCOMISD %XMM8,%XMM13 |
(137) 0x42be54 JBE 42bec7 |
(137) 0x42be56 VCOMISD %XMM2,%XMM8 |
(137) 0x42be5a JAE 42bf28 |
(137) 0x42be60 VMOVSD %XMM29,%XMM29,%XMM3 |
(137) 0x42be66 VMOVSD %XMM29,%XMM29,%XMM6 |
(137) 0x42be6c VANDPD %XMM30,%XMM15,%XMM5 |
(137) 0x42be72 VUNPCKLPD %XMM1,%XMM2,%XMM7 |
(137) 0x42be76 VDIVSD %XMM14,%XMM5,%XMM14 |
(137) 0x42be7b VANDPD %XMM11,%XMM7,%XMM0 |
(137) 0x42be80 VMOVHPD (%RSI,%R10,8),%XMM12,%XMM7 |
(137) 0x42be86 VMULSD %XMM10,%XMM12,%XMM12 |
(137) 0x42be8b VMOVDDUP %XMM14,%XMM1 |
(137) 0x42be90 VSUBSD %XMM14,%XMM3,%XMM3 |
(137) 0x42be95 VADDSUBPD %XMM1,%XMM9,%XMM2 |
(137) 0x42be99 VMULSD %XMM6,%XMM3,%XMM6 |
(137) 0x42be9d VMULPD %XMM2,%XMM0,%XMM13 |
(137) 0x42bea1 VMOVSD %XMM0,%XMM0,%XMM2 |
(137) 0x42bea5 VUNPCKHPD %XMM0,%XMM0,%XMM0 |
(137) 0x42bea9 VDIVPD %XMM7,%XMM13,%XMM1 |
(137) 0x42bead VMINSD %XMM0,%XMM2,%XMM7 |
(137) 0x42beb1 VUNPCKHPD %XMM1,%XMM1,%XMM5 |
(137) 0x42beb5 VADDPD %XMM1,%XMM5,%XMM13 |
(137) 0x42beb9 VMULSD %XMM13,%XMM12,%XMM14 |
(137) 0x42bebe VMINSD %XMM7,%XMM14,%XMM1 |
(137) 0x42bec2 VFMADD231SD %XMM1,%XMM6,%XMM4 |
(137) 0x42bec7 VMULSD %XMM4,%XMM15,%XMM15 |
(137) 0x42becb MOV 0x7c(%RSP),%R11D |
(137) 0x42bed0 VMOVSD %XMM15,-0x8(%R14,%RAX,8) |
(137) 0x42bed7 INC %RAX |
(137) 0x42beda LEA (%R13,%RAX,1),%R10D |
(137) 0x42bedf CMP %R11D,%R10D |
(137) 0x42bee2 JAE 42bf48 |
(137) 0x42bee4 VMOVSD -0x8(%R15,%RAX,8),%XMM15 |
(137) 0x42beeb LEA -0x1(%RAX),%RCX |
(137) 0x42beef VCOMISD %XMM15,%XMM8 |
(137) 0x42bef4 JBE 42be10 |
(137) 0x42befa MOV %RCX,%RDI |
(137) 0x42befd MOVSXD %EAX,%R10 |
(137) 0x42bf00 LEA 0x1(%RAX),%EDX |
(137) 0x42bf03 MOV %RAX,%RCX |
(137) 0x42bf06 JMP 42be19 |
0x42bf0b NOPL (%RAX,%RAX,1) |
0x42bf10 VZEROUPPER |
0x42bf13 LEA -0x28(%RBP),%RSP |
0x42bf17 POP %RBX |
0x42bf18 POP %R12 |
0x42bf1a POP %R13 |
0x42bf1c POP %R14 |
0x42bf1e POP %R15 |
0x42bf20 POP %RBP |
0x42bf21 RET |
0x42bf22 NOPW (%RAX,%RAX,1) |
(137) 0x42bf28 MOV 0x36a11(%RIP),%RDX |
(137) 0x42bf2f MOV 0x36b62(%RIP),%RDI |
(137) 0x42bf36 VMOVQ %RDX,%XMM6 |
(137) 0x42bf3b VMOVQ %RDI,%XMM3 |
(137) 0x42bf40 JMP 42be6c |
0x42bf45 NOPL (%RAX) |
(136) 0x42bf48 MOV 0x7c(%RSP),%ECX |
(136) 0x42bf4c MOV %ECX,0x78(%RSP) |
(136) 0x42bf50 INCQ 0x68(%RSP) |
(136) 0x42bf55 MOV 0x38(%RSP),%EAX |
(136) 0x42bf59 MOV 0x30(%RSP),%ESI |
(136) 0x42bf5d SUB %ESI,%EAX |
(136) 0x42bf5f MOV 0x68(%RSP),%RBX |
(136) 0x42bf64 ADD %EBX,%EAX |
(136) 0x42bf66 CMP %EAX,0x44(%RSP) |
(136) 0x42bf6a JLE 42bf10 |
(136) 0x42bf6c MOV 0x3c(%RSP),%R12D |
(136) 0x42bf71 MOV 0x78(%RSP),%R8D |
(136) 0x42bf76 MOV 0x40(%RSP),%R13D |
(136) 0x42bf7b MOV 0x70(%RSP),%R9D |
(136) 0x42bf80 SUB %R8D,%R12D |
(136) 0x42bf83 MOV %R13D,0x74(%RSP) |
(136) 0x42bf88 JMP 42b710 |
0x42bf8d NOPL (%RAX) |
(136) 0x42bf90 MOV 0x74(%RSP),%EDI |
(136) 0x42bf94 XOR %EDX,%EDX |
(136) 0x42bf96 JMP 42bae5 |
0x42bf9b INC %R12D |
0x42bf9e XOR %EDX,%EDX |
0x42bfa0 JMP 42b66d |
0x42bfa5 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.06 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.93 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 344 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 15 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 8.00 | 7.33 | 7.33 | 11.00 | 5.60 | 5.50 | 11.00 | 11.00 | 11.00 | 5.40 | 7.33 |
cycles | 5.50 | 11.70 | 7.33 | 7.33 | 11.00 | 5.60 | 5.50 | 11.00 | 11.00 | 11.00 | 5.40 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.93-16.04 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.70 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 5% |
load | 12% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 5% |
load | 11% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 10% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x70(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42bf9b <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x9ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R8,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x70(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV (%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x373f1(%RIP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x373ca(%RIP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA32 0x36f80(%RIP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42b66d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x7d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 344 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 15 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 8.00 | 7.33 | 7.33 | 11.00 | 5.60 | 5.50 | 11.00 | 11.00 | 11.00 | 5.40 | 7.33 |
cycles | 5.50 | 11.70 | 7.33 | 7.33 | 11.00 | 5.60 | 5.50 | 11.00 | 11.00 | 11.00 | 5.40 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.93-16.04 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.70 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 5% |
load | 12% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 5% |
load | 11% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 10% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x70(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42bf9b <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x9ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R8,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42bf13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x923> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x70(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV (%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x373f1(%RIP),%XMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x373ca(%RIP),%XMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA32 0x36f80(%RIP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42b66d <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.6+0x7d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 4.06 | 1.34 |
▼Loop 136 - advec_mom.cpp:114-139 - exec– | 0.01 | 0 |
○Loop 138 - advec_mom.cpp:114-139 - exec | 4.05 | 1.33 |
○Loop 137 - advec_mom.cpp:114-139 - exec | 0 | 0 |