Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-101 [...] | Coverage: 0.8% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:95-101 [...] | Coverage: 0.8% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 95 - 101 |
-------------------------------------------------------------------------------- |
95: #pragma omp parallel for simd collapse(2) |
96: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
97: for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { |
98: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
99: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
100: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i - 1, j + 0) + node_flux(i, j); |
101: } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x424660 PUSH %RBP |
0x424661 MOV %RSP,%RBP |
0x424664 PUSH %R15 |
0x424666 PUSH %R14 |
0x424668 PUSH %R13 |
0x42466a PUSH %R12 |
0x42466c PUSH %RBX |
0x42466d AND $-0x20,%RSP |
0x424671 SUB $0x100,%RSP |
0x424678 MOV %RDX,%R13 |
0x42467b MOV 0x38(%RBP),%RAX |
0x42467f MOV 0x28(%RBP),%R14 |
0x424683 MOV 0x20(%RBP),%RSI |
0x424687 MOV 0x10(%RBP),%RBX |
0x42468b MOV 0x18(%RBP),%EDX |
0x42468e MOV %EDX,0x1c(%RSP) |
0x424692 MOVL $0,0x34(%RSP) |
0x42469a TEST %RAX,%RAX |
0x42469d JS 424c01 |
0x4246a3 MOV %R9,%R15 |
0x4246a6 MOV %R8,%R12 |
0x4246a9 MOV %RCX,0x20(%RSP) |
0x4246ae MOV %RSI,0x28(%RSP) |
0x4246b3 MOV (%RDI),%ESI |
0x4246b5 MOVQ $0,0x50(%RSP) |
0x4246be MOV %RAX,0x48(%RSP) |
0x4246c3 MOVQ $0x1,0x98(%RSP) |
0x4246cf SUB $0x8,%RSP |
0x4246d3 LEA 0xa0(%RSP),%RAX |
0x4246db LEA 0x3c(%RSP),%RCX |
0x4246e0 LEA 0x58(%RSP),%R8 |
0x4246e5 LEA 0x50(%RSP),%R9 |
0x4246ea MOV $0x682930,%EDI |
0x4246ef MOV %ESI,0x38(%RSP) |
0x4246f3 MOV $0x22,%EDX |
0x4246f8 PUSH $0x1 |
0x4246fa PUSH $0x1 |
0x4246fc PUSH %RAX |
0x4246fd CALL 403020 <__kmpc_for_static_init_8@plt> |
0x424702 ADD $0x20,%RSP |
0x424706 MOV 0x50(%RSP),%RSI |
0x42470b MOV 0x48(%RSP),%RAX |
0x424710 MOV %RAX,0x40(%RSP) |
0x424715 CMP %RAX,%RSI |
0x424718 JA 424be2 |
0x42471e SUB 0x28(%RSP),%R14D |
0x424723 MOV (%R13),%R8 |
0x424727 MOV 0x10(%R13),%R13 |
0x42472b MOV (%RBX),%R9 |
0x42472e MOV %R15,%RAX |
0x424731 MOV 0x10(%RBX),%R15 |
0x424735 MOV (%R12),%R10 |
0x424739 MOV 0x10(%R12),%R12 |
0x42473e MOV 0x20(%RSP),%RCX |
0x424743 MOV (%RCX),%R11 |
0x424746 MOV 0x10(%RCX),%RBX |
0x42474a MOV (%RAX),%RCX |
0x42474d MOV 0x10(%RAX),%RDI |
0x424751 LEA 0x1(%RSI),%RAX |
0x424755 MOV 0x40(%RSP),%RDX |
0x42475a INC %RDX |
0x42475d CMP %RDX,%RAX |
0x424760 CMOVG %RAX,%RDX |
0x424764 SUB %RSI,%RDX |
0x424767 MOV $-0x8,%EAX |
0x42476c AND %RDX,%RAX |
0x42476f MOV %R8,0x20(%RSP) |
0x424774 MOV %R9,0x90(%RSP) |
0x42477c MOV %R10,0x88(%RSP) |
0x424784 MOV %R12,0x80(%RSP) |
0x42478c MOV %R11,0x78(%RSP) |
0x424791 MOV %RCX,0x70(%RSP) |
0x424796 JE 424d0f |
0x42479c MOV %RDX,0x60(%RSP) |
0x4247a1 MOV %R14,0x68(%RSP) |
0x4247a6 VPBROADCASTQ %R14,%YMM8 |
0x4247ac MOV %RAX,0x38(%RSP) |
0x4247b1 MOV 0x1c(%RSP),%EAX |
0x4247b5 VPBROADCASTD %EAX,%YMM0 |
0x4247bb VMOVDQU %YMM0,0xc0(%RSP) |
0x4247c4 MOV 0x28(%RSP),%RAX |
0x4247c9 VPBROADCASTD %EAX,%YMM0 |
0x4247cf VMOVDQU %YMM0,0xa0(%RSP) |
0x4247d8 VPBROADCASTQ %R8,%YMM14 |
0x4247de VPBROADCASTQ %R9,%YMM15 |
0x4247e4 VPBROADCASTQ %R10,%YMM16 |
0x4247ea VPBROADCASTQ %R11,%YMM17 |
0x4247f0 VPBROADCASTQ %RCX,%YMM18 |
0x4247f6 MOV %RSI,0x58(%RSP) |
0x4247fb VPBROADCASTQ %RSI,%YMM0 |
0x424801 VPADDQ 0x41a77(%RIP),%YMM0,%YMM9 |
0x424809 VPADDQ 0x418ef(%RIP),%YMM0,%YMM10 |
0x424811 XOR %ESI,%ESI |
0x424813 VPBROADCASTQ 0x41a83(%RIP),%YMM21 |
0x42481d NOPL (%RAX) |
(131) 0x424820 VMOVDQA %YMM10,%YMM0 |
(131) 0x424824 VMOVDQA %YMM8,%YMM1 |
(131) 0x424828 MOV $0x454690,%R14 |
(131) 0x42482f CALL %R14 |
(131) 0x424832 VMOVDQA %YMM0,%YMM11 |
(131) 0x424836 VMOVDQA %YMM9,%YMM0 |
(131) 0x42483a VMOVDQA %YMM8,%YMM1 |
(131) 0x42483e CALL %R14 |
(131) 0x424841 VPMOVQD %YMM11,%XMM1 |
(131) 0x424847 VPMOVQD %YMM0,%XMM0 |
(131) 0x42484d VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(131) 0x424853 VPADDD 0xc0(%RSP),%YMM0,%YMM22 |
(131) 0x42485b VMOVDQA %YMM10,%YMM0 |
(131) 0x42485f VMOVDQA %YMM8,%YMM1 |
(131) 0x424863 MOV $0x454460,%R14 |
(131) 0x42486a CALL %R14 |
(131) 0x42486d VMOVDQA %YMM0,%YMM11 |
(131) 0x424871 VMOVDQA %YMM9,%YMM0 |
(131) 0x424875 VMOVDQA %YMM8,%YMM1 |
(131) 0x424879 CALL %R14 |
(131) 0x42487c VPMOVQD %YMM11,%XMM1 |
(131) 0x424882 VPMOVQD %YMM0,%XMM0 |
(131) 0x424888 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(131) 0x42488e VPCMPEQD %YMM12,%YMM12,%YMM12 |
(131) 0x424893 VPADDD %YMM12,%YMM22,%YMM1 |
(131) 0x424899 VEXTRACTI128 $0x1,%YMM1,%XMM2 |
(131) 0x42489f VPMOVSXDQ %XMM2,%YMM5 |
(131) 0x4248a4 VPMOVSXDQ %XMM1,%YMM4 |
(131) 0x4248a9 VPADDD 0xa0(%RSP),%YMM0,%YMM2 |
(131) 0x4248b2 VPMULLQ %YMM4,%YMM14,%YMM26 |
(131) 0x4248b8 VXORPS %XMM6,%XMM6,%XMM6 |
(131) 0x4248bc VPMULLQ %YMM5,%YMM14,%YMM6 |
(131) 0x4248c2 VPMOVSXDQ %XMM2,%YMM0 |
(131) 0x4248c7 VPMOVSXDQ %XMM22,%YMM1 |
(131) 0x4248cd VPMULLQ %YMM1,%YMM14,%YMM23 |
(131) 0x4248d3 VPADDQ %YMM0,%YMM23,%YMM7 |
(131) 0x4248d9 KXNORW %K0,%K0,%K1 |
(131) 0x4248dd VXORPD %XMM3,%XMM3,%XMM3 |
(131) 0x4248e1 VPMULLQ %YMM1,%YMM15,%YMM24 |
(131) 0x4248e7 VGATHERQPD (%R13,%YMM7,8),%YMM3{%K1} |
(131) 0x4248ef VPADDQ %YMM0,%YMM26,%YMM25 |
(131) 0x4248f5 VPADDQ %YMM0,%YMM24,%YMM11 |
(131) 0x4248fb KXNORW %K0,%K0,%K1 |
(131) 0x4248ff VXORPD %XMM7,%XMM7,%XMM7 |
(131) 0x424903 VGATHERQPD (%R15,%YMM11,8),%YMM7{%K1} |
(131) 0x42490a KXNORW %K0,%K0,%K1 |
(131) 0x42490e VXORPD %XMM11,%XMM11,%XMM11 |
(131) 0x424913 VPMULLQ %YMM4,%YMM15,%YMM27 |
(131) 0x424919 VGATHERQPD (%R13,%YMM25,8),%YMM11{%K1} |
(131) 0x424921 VPADDQ %YMM0,%YMM27,%YMM4 |
(131) 0x424927 KXNORW %K0,%K0,%K1 |
(131) 0x42492b VXORPD %XMM25,%XMM25,%XMM25 |
(131) 0x424931 VGATHERQPD (%R15,%YMM4,8),%YMM25{%K1} |
(131) 0x424938 VPADDD %YMM12,%YMM2,%YMM28 |
(131) 0x42493e VPMOVSXDQ %XMM28,%YMM4 |
(131) 0x424944 VPADDQ %YMM4,%YMM26,%YMM29 |
(131) 0x42494a KXNORW %K0,%K0,%K1 |
(131) 0x42494e VPXORD %XMM26,%XMM26,%XMM26 |
(131) 0x424954 VGATHERQPD (%R13,%YMM29,8),%YMM26{%K1} |
(131) 0x42495c VEXTRACTI128 $0x1,%YMM2,%XMM2 |
(131) 0x424962 VPADDQ %YMM4,%YMM27,%YMM29 |
(131) 0x424968 KXNORW %K0,%K0,%K1 |
(131) 0x42496c VPXORD %XMM27,%XMM27,%XMM27 |
(131) 0x424972 VGATHERQPD (%R15,%YMM29,8),%YMM27{%K1} |
(131) 0x424979 VPMOVSXDQ %XMM2,%YMM2 |
(131) 0x42497e KXNORW %K0,%K0,%K1 |
(131) 0x424982 VPXORD %XMM29,%XMM29,%XMM29 |
(131) 0x424988 VPMULLQ %YMM5,%YMM15,%YMM29 |
(131) 0x42498e VPADDQ %YMM2,%YMM6,%YMM5 |
(131) 0x424992 VXORPD %XMM30,%XMM30,%XMM30 |
(131) 0x424998 VPADDQ %YMM2,%YMM29,%YMM31 |
(131) 0x42499e KXNORW %K0,%K0,%K2 |
(131) 0x4249a2 VXORPD %XMM19,%XMM19,%XMM19 |
(131) 0x4249a8 VEXTRACTI32X4 $0x1,%YMM22,%XMM22 |
(131) 0x4249af VGATHERQPD (%R13,%YMM5,8),%YMM30{%K1} |
(131) 0x4249b7 VPMOVSXDQ %XMM22,%YMM5 |
(131) 0x4249bd VPMULLQ %YMM5,%YMM14,%YMM13 |
(131) 0x4249c3 VPADDQ %YMM2,%YMM13,%YMM22 |
(131) 0x4249c9 VGATHERQPD (%R15,%YMM31,8),%YMM19{%K2} |
(131) 0x4249d0 KXNORW %K0,%K0,%K1 |
(131) 0x4249d4 VXORPD %XMM31,%XMM31,%XMM31 |
(131) 0x4249da VXORPS %XMM12,%XMM12,%XMM12 |
(131) 0x4249df VPMULLQ %YMM5,%YMM15,%YMM12 |
(131) 0x4249e5 VGATHERQPD (%R13,%YMM22,8),%YMM31{%K1} |
(131) 0x4249ed VPADDQ %YMM2,%YMM12,%YMM22 |
(131) 0x4249f3 KXNORW %K0,%K0,%K1 |
(131) 0x4249f7 VXORPD %XMM20,%XMM20,%XMM20 |
(131) 0x4249fd VGATHERQPD (%R15,%YMM22,8),%YMM20{%K1} |
(131) 0x424a04 VEXTRACTI32X4 $0x1,%YMM28,%XMM22 |
(131) 0x424a0b VPMOVSXDQ %XMM22,%YMM22 |
(131) 0x424a11 VPADDQ %YMM22,%YMM6,%YMM6 |
(131) 0x424a17 KXNORW %K0,%K0,%K1 |
(131) 0x424a1b VPXORD %XMM28,%XMM28,%XMM28 |
(131) 0x424a21 VGATHERQPD (%R13,%YMM6,8),%YMM28{%K1} |
(131) 0x424a29 VPADDQ %YMM22,%YMM29,%YMM6 |
(131) 0x424a2f KXNORW %K0,%K0,%K1 |
(131) 0x424a33 VPXORD %XMM29,%XMM29,%XMM29 |
(131) 0x424a39 VGATHERQPD (%R15,%YMM6,8),%YMM29{%K1} |
(131) 0x424a40 VPADDQ %YMM4,%YMM23,%YMM6 |
(131) 0x424a46 KXNORW %K0,%K0,%K1 |
(131) 0x424a4a VPXORD %XMM23,%XMM23,%XMM23 |
(131) 0x424a50 VGATHERQPD (%R13,%YMM6,8),%YMM23{%K1} |
(131) 0x424a58 VPADDQ %YMM4,%YMM24,%YMM6 |
(131) 0x424a5e KXNORW %K0,%K0,%K1 |
(131) 0x424a62 VPXORD %XMM24,%XMM24,%XMM24 |
(131) 0x424a68 VGATHERQPD (%R15,%YMM6,8),%YMM24{%K1} |
(131) 0x424a6f VPADDQ %YMM22,%YMM13,%YMM6 |
(131) 0x424a75 KXNORW %K0,%K0,%K1 |
(131) 0x424a79 VPXOR %XMM13,%XMM13,%XMM13 |
(131) 0x424a7e VGATHERQPD (%R13,%YMM6,8),%YMM13{%K1} |
(131) 0x424a86 VPADDQ %YMM22,%YMM12,%YMM6 |
(131) 0x424a8c KXNORW %K0,%K0,%K1 |
(131) 0x424a90 VPXOR %XMM12,%XMM12,%XMM12 |
(131) 0x424a95 VGATHERQPD (%R15,%YMM6,8),%YMM12{%K1} |
(131) 0x424a9c VMULPD %YMM11,%YMM25,%YMM6 |
(131) 0x424aa2 VMULPD %YMM30,%YMM19,%YMM11 |
(131) 0x424aa8 VPXORD %XMM19,%XMM19,%XMM19 |
(131) 0x424aae VPMULLQ %YMM1,%YMM16,%YMM19 |
(131) 0x424ab4 VFMADD213PD %YMM11,%YMM31,%YMM20 |
(131) 0x424aba VFMADD213PD %YMM6,%YMM3,%YMM7 |
(131) 0x424abf VFMADD213PD %YMM7,%YMM26,%YMM27 |
(131) 0x424ac5 VPMULLQ %YMM5,%YMM16,%YMM3 |
(131) 0x424acb VFMADD213PD %YMM20,%YMM28,%YMM29 |
(131) 0x424ad1 VFMADD213PD %YMM29,%YMM13,%YMM12 |
(131) 0x424ad7 VFMADD213PD %YMM27,%YMM23,%YMM24 |
(131) 0x424add VXORPS %XMM6,%XMM6,%XMM6 |
(131) 0x424ae1 VPMULLQ %YMM5,%YMM17,%YMM6 |
(131) 0x424ae7 VBROADCASTSD 0x41600(%RIP),%YMM11 |
(131) 0x424af0 VMULPD %YMM11,%YMM12,%YMM7 |
(131) 0x424af5 VMULPD %YMM11,%YMM24,%YMM11 |
(131) 0x424afb VPADDQ %YMM0,%YMM19,%YMM12 |
(131) 0x424b01 VXORPS %XMM13,%XMM13,%XMM13 |
(131) 0x424b06 VPMULLQ %YMM1,%YMM17,%YMM13 |
(131) 0x424b0c VPADDQ %YMM2,%YMM3,%YMM3 |
(131) 0x424b10 KXNORW %K0,%K0,%K1 |
(131) 0x424b14 VSCATTERQPD %YMM11,(%R12,%YMM12,8){%K1} |
(131) 0x424b1b KXNORW %K0,%K0,%K1 |
(131) 0x424b1f VSCATTERQPD %YMM7,(%R12,%YMM3,8){%K1} |
(131) 0x424b26 VPADDQ %YMM22,%YMM6,%YMM3 |
(131) 0x424b2c KXNORW %K0,%K0,%K1 |
(131) 0x424b30 VXORPD %XMM12,%XMM12,%XMM12 |
(131) 0x424b35 VGATHERQPD (%RBX,%YMM3,8),%YMM12{%K1} |
(131) 0x424b3c VPADDQ %YMM4,%YMM13,%YMM3 |
(131) 0x424b40 KXNORW %K0,%K0,%K1 |
(131) 0x424b44 VPXOR %XMM4,%XMM4,%XMM4 |
(131) 0x424b48 VGATHERQPD (%RBX,%YMM3,8),%YMM4{%K1} |
(131) 0x424b4f VPADDQ %YMM0,%YMM13,%YMM3 |
(131) 0x424b53 KXNORW %K0,%K0,%K1 |
(131) 0x424b57 VPXOR %XMM13,%XMM13,%XMM13 |
(131) 0x424b5c VGATHERQPD (%RBX,%YMM3,8),%YMM13{%K1} |
(131) 0x424b63 VPADDQ %YMM2,%YMM6,%YMM3 |
(131) 0x424b67 KXNORW %K0,%K0,%K1 |
(131) 0x424b6b VPXOR %XMM6,%XMM6,%XMM6 |
(131) 0x424b6f VGATHERQPD (%RBX,%YMM3,8),%YMM6{%K1} |
(131) 0x424b76 VSUBPD %YMM4,%YMM11,%YMM3 |
(131) 0x424b7a VSUBPD %YMM12,%YMM7,%YMM4 |
(131) 0x424b7f VPMULLQ %YMM1,%YMM18,%YMM1 |
(131) 0x424b85 VADDPD %YMM3,%YMM13,%YMM3 |
(131) 0x424b89 VADDPD %YMM6,%YMM4,%YMM4 |
(131) 0x424b8d VPMULLQ %YMM5,%YMM18,%YMM5 |
(131) 0x424b93 VPADDQ %YMM0,%YMM1,%YMM0 |
(131) 0x424b97 VPADDQ %YMM2,%YMM5,%YMM1 |
(131) 0x424b9b KXNORW %K0,%K0,%K1 |
(131) 0x424b9f VSCATTERQPD %YMM3,(%RDI,%YMM0,8){%K1} |
(131) 0x424ba6 KXNORW %K0,%K0,%K1 |
(131) 0x424baa VSCATTERQPD %YMM4,(%RDI,%YMM1,8){%K1} |
(131) 0x424bb1 VPADDQ %YMM21,%YMM10,%YMM10 |
(131) 0x424bb7 VPADDQ %YMM21,%YMM9,%YMM9 |
(131) 0x424bbd ADD $0x8,%RSI |
(131) 0x424bc1 CMP 0x38(%RSP),%RSI |
(131) 0x424bc6 JB 424820 |
0x424bcc MOV 0x38(%RSP),%RAX |
0x424bd1 CMP %RAX,0x60(%RSP) |
0x424bd6 MOV 0x68(%RSP),%R14 |
0x424bdb MOV 0x58(%RSP),%RSI |
0x424be0 JNE 424c10 |
0x424be2 MOV $0x682950,%EDI |
0x424be7 MOV 0x30(%RSP),%ESI |
0x424beb LEA -0x28(%RBP),%RSP |
0x424bef POP %RBX |
0x424bf0 POP %R12 |
0x424bf2 POP %R13 |
0x424bf4 POP %R14 |
0x424bf6 POP %R15 |
0x424bf8 POP %RBP |
0x424bf9 VZEROUPPER |
0x424bfc JMP 402e90 |
0x424c01 LEA -0x28(%RBP),%RSP |
0x424c05 POP %RBX |
0x424c06 POP %R12 |
0x424c08 POP %R13 |
0x424c0a POP %R14 |
0x424c0c POP %R15 |
0x424c0e POP %RBP |
0x424c0f RET |
0x424c10 ADD %RAX,%RSI |
0x424c13 JMP 424d0f |
0x424c18 NOPL (%RAX,%RAX,1) |
(130) 0x424c20 MOV %RSI,%RAX |
(130) 0x424c23 CQTO |
(130) 0x424c25 IDIV %R14 |
(130) 0x424c28 ADD 0x1c(%RSP),%ECX |
(130) 0x424c2c ADD 0x28(%RSP),%EDX |
(130) 0x424c30 MOVSXD %EDX,%RAX |
(130) 0x424c33 LEA -0x1(%RCX),%EDX |
(130) 0x424c36 MOVSXD %EDX,%RDX |
(130) 0x424c39 MOV %R11,%R8 |
(130) 0x424c3c IMUL %RDX,%R8 |
(130) 0x424c40 LEA (%R8,%RAX,1),%R9 |
(130) 0x424c44 MOV 0x90(%RSP),%R12 |
(130) 0x424c4c IMUL %R12,%RDX |
(130) 0x424c50 LEA (%RDX,%RAX,1),%R10 |
(130) 0x424c54 VMOVSD (%R15,%R10,8),%XMM0 |
(130) 0x424c5a VMULSD (%R13,%R9,8),%XMM0,%XMM0 |
(130) 0x424c61 MOVSXD %ECX,%RCX |
(130) 0x424c64 MOV %R11,%R9 |
(130) 0x424c67 IMUL %RCX,%R9 |
(130) 0x424c6b IMUL %RCX,%R12 |
(130) 0x424c6f LEA (%R12,%RAX,1),%R11 |
(130) 0x424c73 VMOVSD (%R15,%R11,8),%XMM1 |
(130) 0x424c79 LEA (%R9,%RAX,1),%R11 |
(130) 0x424c7d VFMADD132SD (%R13,%R11,8),%XMM0,%XMM1 |
(130) 0x424c84 LEA -0x1(%RDX,%RAX,1),%RDX |
(130) 0x424c89 VMOVSD (%R15,%RDX,8),%XMM0 |
(130) 0x424c8f LEA -0x1(%R8,%RAX,1),%RDX |
(130) 0x424c94 VFMADD132SD (%R13,%RDX,8),%XMM1,%XMM0 |
(130) 0x424c9b LEA -0x1(%R9,%RAX,1),%RDX |
(130) 0x424ca0 LEA -0x1(%R12,%RAX,1),%R8 |
(130) 0x424ca5 VMOVSD (%R15,%R8,8),%XMM1 |
(130) 0x424cab VFMADD132SD (%R13,%RDX,8),%XMM0,%XMM1 |
(130) 0x424cb2 VMULSD 0x41436(%RIP),%XMM1,%XMM0 |
(130) 0x424cba MOV 0x88(%RSP),%RDX |
(130) 0x424cc2 IMUL %RCX,%RDX |
(130) 0x424cc6 ADD %RAX,%RDX |
(130) 0x424cc9 MOV 0x80(%RSP),%R8 |
(130) 0x424cd1 VMOVSD %XMM0,(%R8,%RDX,8) |
(130) 0x424cd7 MOV 0x78(%RSP),%RDX |
(130) 0x424cdc IMUL %RCX,%RDX |
(130) 0x424ce0 LEA -0x1(%RDX,%RAX,1),%R8 |
(130) 0x424ce5 VSUBSD (%RBX,%R8,8),%XMM0,%XMM0 |
(130) 0x424ceb ADD %RAX,%RDX |
(130) 0x424cee VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(130) 0x424cf3 IMUL 0x70(%RSP),%RCX |
(130) 0x424cf9 ADD %RAX,%RCX |
(130) 0x424cfc VMOVSD %XMM0,(%RDI,%RCX,8) |
(130) 0x424d01 INC %RSI |
(130) 0x424d04 CMP 0x40(%RSP),%RSI |
(130) 0x424d09 JG 424be2 |
(130) 0x424d0f MOV %RSI,%R8 |
(130) 0x424d12 SHR $0x20,%R8 |
(130) 0x424d16 JE 424d40 |
(130) 0x424d18 MOV %RSI,%RAX |
(130) 0x424d1b XOR %EDX,%EDX |
(130) 0x424d1d DIV %R14 |
(130) 0x424d20 MOV %RAX,%RCX |
(130) 0x424d23 MOV 0x20(%RSP),%R11 |
(130) 0x424d28 TEST %R8,%R8 |
(130) 0x424d2b JNE 424c20 |
(130) 0x424d31 JMP 424d57 |
0x424d33 NOPW %CS:(%RAX,%RAX,1) |
(130) 0x424d40 MOV %ESI,%EAX |
(130) 0x424d42 XOR %EDX,%EDX |
(130) 0x424d44 DIV %R14D |
(130) 0x424d47 MOV %EAX,%ECX |
(130) 0x424d49 MOV 0x20(%RSP),%R11 |
(130) 0x424d4e TEST %R8,%R8 |
(130) 0x424d51 JNE 424c20 |
(130) 0x424d57 MOV %ESI,%EAX |
(130) 0x424d59 XOR %EDX,%EDX |
(130) 0x424d5b DIV %R14D |
(130) 0x424d5e JMP 424c28 |
0x424d63 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_mom.cpp:95-101 |
Module | exec |
nb instructions | 124 |
nb uops | 126 |
loop length | 558 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 10 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.67 | 14.67 | 15.50 | 9.00 | 5.40 | 15.50 | 15.50 | 15.50 | 5.40 | 14.67 |
cycles | 5.60 | 5.60 | 14.67 | 14.67 | 15.50 | 9.00 | 5.40 | 15.50 | 15.50 | 15.50 | 5.40 | 14.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.80 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 15.50 |
Overall L1 | 21.00 |
all | 9% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 16% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 424c01 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x5a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x682930,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 424be2 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x582> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB 0x28(%RSP),%R14D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 424d0f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x6af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RCX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x41a77(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x418ef(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x41a83(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x60(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 424c10 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x5b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x682950,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 424d0f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x6af> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:95-101 |
Module | exec |
nb instructions | 124 |
nb uops | 126 |
loop length | 558 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 10 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.67 | 14.67 | 15.50 | 9.00 | 5.40 | 15.50 | 15.50 | 15.50 | 5.40 | 14.67 |
cycles | 5.60 | 5.60 | 14.67 | 14.67 | 15.50 | 9.00 | 5.40 | 15.50 | 15.50 | 15.50 | 5.40 | 14.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.80 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 15.50 |
Overall L1 | 21.00 |
all | 9% |
load | 14% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 16% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 424c01 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x5a1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x682930,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 424be2 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x582> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB 0x28(%RSP),%R14D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 424d0f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x6af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RCX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x41a77(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x418ef(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x41a83(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x60(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 424c10 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x5b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x682950,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 424d0f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x6af> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 0.8 | 0.98 |
○Loop 131 - advec_mom.cpp:96-101 - exec | 0.8 | 0.97 |
○Loop 130 - advec_mom.cpp:96-101 - exec | 0 | 0.01 |