Loop Id: 14921 | Module: libgromacs_mpi.so.9.0.0 | Source: lincs.cpp:966-992 [...] | Coverage: 0.15% |
---|
Loop Id: 14921 | Module: libgromacs_mpi.so.9.0.0 | Source: lincs.cpp:966-992 [...] | Coverage: 0.15% |
---|
0xb03620 VPBROADCASTQ %R14,%ZMM10 |
0xb03626 VPADDQ %YMM18,%YMM10,%YMM11 |
0xb0362c VPEXTRQ $0x1,%XMM11,%RDX |
0xb03632 VEXTRACTI128 $0x1,%YMM11,%XMM9 |
0xb03638 VPEXTRQ $0x1,%XMM9,%RSI |
0xb0363e VPADDQ %ZMM20,%ZMM10,%ZMM12 |
0xb03644 VEXTRACTI32X4 $0x2,%ZMM12,%XMM13 |
0xb0364b VMOVQ %XMM13,%R11 |
0xb03650 VPEXTRQ $0x1,%XMM13,%R8 |
0xb03656 VMOVD 0x4(%R15,%R11,8),%XMM13 [8] |
0xb0365d VEXTRACTI32X4 $0x3,%ZMM12,%XMM14 |
0xb03664 VMOVQ %XMM14,%R11 |
0xb03669 VPINSRD $0x1,0x4(%R15,%R8,8),%XMM13,%XMM13 [8] |
0xb03671 VPINSRD $0x2,0x4(%R15,%R11,8),%XMM13,%XMM13 [8] |
0xb03679 VMOVQ %XMM11,%R11 |
0xb0367e VPEXTRQ $0x1,%XMM14,%R8 |
0xb03684 VPINSRD $0x3,0x4(%R15,%R8,8),%XMM13,%XMM13 [8] |
0xb0368c VMOVQ %XMM12,%R8 |
0xb03691 VMOVD 0x4(%R15,%R8,8),%XMM14 [8] |
0xb03698 VPEXTRQ $0x1,%XMM12,%R8 |
0xb0369e VPINSRD $0x1,0x4(%R15,%R8,8),%XMM14,%XMM14 [8] |
0xb036a6 VEXTRACTI128 $0x1,%YMM12,%XMM15 |
0xb036ac VMOVQ %XMM15,%R8 |
0xb036b1 VPINSRD $0x2,0x4(%R15,%R8,8),%XMM14,%XMM14 [8] |
0xb036b9 VPEXTRQ $0x1,%XMM15,%R8 |
0xb036bf VPADDQ %XMM19,%XMM10,%XMM15 |
0xb036c5 VPINSRD $0x3,0x4(%R15,%R8,8),%XMM14,%XMM14 [8] |
0xb036cd LEA 0x7(%R14),%R8 |
0xb036d1 VPERMT2Q %ZMM10,%ZMM21,%ZMM11 |
0xb036d7 VPERMT2Q %ZMM15,%ZMM22,%ZMM11 |
0xb036dd KMOVD %EDI,%K1 |
0xb036e1 VPBROADCASTQ %R8,%ZMM11{%K1} |
0xb036e7 MOV 0x60(%RSP),%R8 [9] |
0xb036ec MOV 0x68(%RSP),%RCX [9] |
0xb036f1 VMOVD 0x4(%R15,%RSI,8),%XMM10 [8] |
0xb036f8 VMOVQ %XMM15,%RSI |
0xb036fd KXNORW %K0,%K0,%K1 |
0xb03701 VXORPS %XMM16,%XMM16,%XMM16 |
0xb03707 VPGATHERQD (%R15,%ZMM12,8),%YMM16{%K1} [8] |
0xb0370e KXNORW %K0,%K0,%K1 |
0xb03712 VPXOR %XMM12,%XMM12,%XMM12 |
0xb03717 VPGATHERQD (%R15,%ZMM11,8),%YMM12{%K1} [8] |
0xb0371e VINSERTI64X4 $0x1,%YMM16,%ZMM12,%ZMM11 |
0xb03725 VPINSRD $0x1,0x4(%R15,%RSI,8),%XMM10,%XMM10 [8] |
0xb0372d VPMULLD %ZMM23,%ZMM11,%ZMM11 |
0xb03733 KXNORW %K0,%K0,%K1 |
0xb03737 VPXOR %XMM12,%XMM12,%XMM12 |
0xb0373c VGATHERDPS (%R12,%ZMM11,4),%ZMM12{%K1} [6] |
0xb03743 VPEXTRQ $0x1,%XMM15,%RSI |
0xb03749 VPINSRD $0x2,0x4(%R15,%RSI,8),%XMM10,%XMM10 [8] |
0xb03751 MOV 0xa0(%RSP),%RSI [9] |
0xb03759 KXNORW %K0,%K0,%K1 |
0xb0375d VPXOR %XMM15,%XMM15,%XMM15 |
0xb03762 VGATHERDPS (%RSI,%ZMM11,4),%ZMM15{%K1} [3] |
0xb03769 KXNORW %K0,%K0,%K1 |
0xb0376d VPXORD %XMM16,%XMM16,%XMM16 |
0xb03773 VGATHERDPS (%R8,%ZMM11,4),%ZMM16{%K1} [5] |
0xb0377a VMOVD 0x4(%R15,%R14,8),%XMM11 [8] |
0xb03781 VPINSRD $0x1,0x4(%R15,%R11,8),%XMM11,%XMM11 [8] |
0xb03789 VPINSRD $0x2,0x4(%R15,%RDX,8),%XMM11,%XMM11 [8] |
0xb03791 VMOVQ %XMM9,%RDX |
0xb03796 VPINSRD $0x3,0x3c(%R15,%R14,8),%XMM10,%XMM9 [8] |
0xb0379e VPINSRD $0x3,0x4(%R15,%RDX,8),%XMM11,%XMM10 [8] |
0xb037a6 VINSERTI128 $0x1,%XMM13,%YMM14,%YMM11 |
0xb037ac VINSERTI128 $0x1,%XMM9,%YMM10,%YMM9 |
0xb037b2 VINSERTI64X4 $0x1,%YMM11,%ZMM9,%ZMM9 |
0xb037b9 VPMULLD %ZMM23,%ZMM9,%ZMM9 |
0xb037bf KXNORW %K0,%K0,%K1 |
0xb037c3 VPXOR %XMM10,%XMM10,%XMM10 |
0xb037c8 VGATHERDPS (%R12,%ZMM9,4),%ZMM10{%K1} [6] |
0xb037cf KXNORW %K0,%K0,%K1 |
0xb037d3 VPXOR %XMM11,%XMM11,%XMM11 |
0xb037d8 VGATHERDPS (%RSI,%ZMM9,4),%ZMM11{%K1} [3] |
0xb037df KXNORW %K0,%K0,%K1 |
0xb037e3 VPXOR %XMM13,%XMM13,%XMM13 |
0xb037e8 VGATHERDPS (%R8,%ZMM9,4),%ZMM13{%K1} [5] |
0xb037ef VSUBPS %ZMM10,%ZMM12,%ZMM9 |
0xb037f5 VSUBPS %ZMM13,%ZMM16,%ZMM10 |
0xb037fb VMULPS %ZMM10,%ZMM0,%ZMM12 |
0xb03801 VRNDSCALEPS $0,%ZMM12,%ZMM12 |
0xb03808 VSUBPS %ZMM11,%ZMM15,%ZMM11 |
0xb0380e VMULPS %ZMM12,%ZMM1,%ZMM13 |
0xb03814 VSUBPS %ZMM13,%ZMM9,%ZMM9 |
0xb0381a VMULPS %ZMM12,%ZMM2,%ZMM13 |
0xb03820 VSUBPS %ZMM13,%ZMM11,%ZMM11 |
0xb03826 VMULPS %ZMM12,%ZMM3,%ZMM12 |
0xb0382c VSUBPS %ZMM12,%ZMM10,%ZMM10 |
0xb03832 VMULPS %ZMM11,%ZMM4,%ZMM12 |
0xb03838 VRNDSCALEPS $0,%ZMM12,%ZMM12 |
0xb0383f VMULPS %ZMM12,%ZMM5,%ZMM13 |
0xb03845 VSUBPS %ZMM13,%ZMM9,%ZMM9 |
0xb0384b VMULPS %ZMM12,%ZMM6,%ZMM12 |
0xb03851 VSUBPS %ZMM12,%ZMM11,%ZMM11 |
0xb03857 VMULPS %ZMM9,%ZMM7,%ZMM12 |
0xb0385d VRNDSCALEPS $0,%ZMM12,%ZMM12 |
0xb03864 VMULPS %ZMM12,%ZMM8,%ZMM12 |
0xb0386a VSUBPS %ZMM12,%ZMM9,%ZMM9 |
0xb03870 VMULPS %ZMM9,%ZMM9,%ZMM9 |
0xb03876 VMULPS %ZMM11,%ZMM11,%ZMM11 |
0xb0387c VADDPS %ZMM9,%ZMM11,%ZMM9 |
0xb03882 VMULPS %ZMM10,%ZMM10,%ZMM10 |
0xb03888 VADDPS %ZMM9,%ZMM10,%ZMM9 |
0xb0388e VMOVAPS (%R10,%R14,4),%ZMM10 [1] |
0xb03895 VMULPS %ZMM10,%ZMM10,%ZMM11 |
0xb0389b VFMSUB231PS %ZMM24,%ZMM11,%ZMM9 |
0xb038a1 VMULPS %ZMM11,%ZMM17,%ZMM11 |
0xb038a7 VCMPPS $0x1,%ZMM11,%ZMM9,%K0 |
0xb038ae KMOVD %K0,%EDX |
0xb038b2 OR %EDX,%EAX |
0xb038b4 VMAXPS %ZMM25,%ZMM9,%ZMM9 |
0xb038ba VRSQRT14PS %ZMM9,%ZMM11 |
0xb038c0 VMULPS %ZMM11,%ZMM9,%ZMM12 |
0xb038c6 VFMADD213PS %ZMM26,%ZMM11,%ZMM12 |
0xb038cc VMULPS %ZMM27,%ZMM11,%ZMM11 |
0xb038d2 VMULPS %ZMM12,%ZMM11,%ZMM11 |
0xb038d8 VFMADD213PS %ZMM10,%ZMM9,%ZMM11 |
0xb038de VMULPS (%R13,%R14,4),%ZMM11,%ZMM9 [7] |
0xb038e6 VMOVAPS %ZMM9,(%RCX,%R14,4) [2] |
0xb038ed VMOVAPS %ZMM9,(%RBX,%R14,4) [4] |
0xb038f4 ADD $0x10,%R14 |
0xb038f8 CMP %R9,%R14 |
0xb038fb JL b03620 |
/home/eoseret/gromacs-2024.2/src/gromacs/mdlib/lincs.cpp: 966 - 992 |
-------------------------------------------------------------------------------- |
966: for (int bs = b0; bs < b1; bs += GMX_SIMD_REAL_WIDTH) |
[...] |
977: offset0[i] = atoms[bs + i].index1; |
978: offset1[i] = atoms[bs + i].index2; |
[...] |
992: len_S = load<SimdReal>(bllen + bs); |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/utility/arrayref.h: 82 - 82 |
-------------------------------------------------------------------------------- |
82: it_ += i; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 113 - 113 |
-------------------------------------------------------------------------------- |
113: v->simdInternal_ = _mm512_i32gather_ps(offset.simdInternal_, base, sizeof(float) * align_); |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 109 - 451 |
-------------------------------------------------------------------------------- |
109: _mm512_store_ps(m, a.simdInternal_); |
[...] |
181: return { _mm512_add_ps(a.simdInternal_, b.simdInternal_) }; |
182: } |
183: |
184: static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b) |
185: { |
186: return { _mm512_sub_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
198: } |
199: |
200: static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c) |
201: { |
202: return { _mm512_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
203: } |
204: |
205: static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c) |
206: { |
207: return { _mm512_fmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
208: } |
209: |
210: static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c) |
211: { |
212: return { _mm512_fnmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
[...] |
224: return { _mm512_rsqrt14_ps(x.simdInternal_) }; |
[...] |
269: return { _mm512_max_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
279: return { _mm512_roundscale_ps(x.simdInternal_, 0) }; |
[...] |
367: return { _mm512_cmp_ps_mask(a.simdInternal_, b.simdInternal_, _CMP_LT_OQ) }; |
[...] |
388: return { _mm512_kor(a.simdInternal_, b.simdInternal_) }; |
[...] |
451: return { _mm512_mullo_epi32(a.simdInternal_, b.simdInternal_) }; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►67.28+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►16.54+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►16.18+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►80.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►17.66+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►2.34+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►81.82+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►17.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►1.19+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►96.75+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►3.08+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►80.92+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►11.45+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►7.63+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx::constrain_lincs(bool, t_i[...] | lincs.cpp:2534 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Constraints::Impl::apply([...] | constr.cpp:512 | libgromacs_mpi.so.9.0.0 |
○ | gmx::constrain_coordinates(gmx[...] | constr.cpp:373 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1660 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.35 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.48 |
Bottlenecks | micro-operation queue, |
Function | .omp_outlined..37 |
Source | lincs.cpp:966-966,lincs.cpp:977-978,lincs.cpp:992-992,arrayref.h:82-82,impl_x86_avx_512_util_float.h:113-113,impl_x86_avx_512_simd_float.h:109-109,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-224,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:279-279,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:388-388,impl_x86_avx_512_simd_float.h:451-451 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 117.17 |
CQA cycles if no scalar integer | 114.50 |
CQA cycles if FP arith vectorized | 117.17 |
CQA cycles if fully vectorized | 86.85 |
Front-end cycles | 117.17 |
P0 cycles | 1.00 |
P1 cycles | 1.00 |
P2 cycles | 0.75 |
P3 cycles | 0.75 |
P4 cycles | 0.50 |
P5 cycles | 8.33 |
P6 cycles | 8.33 |
P7 cycles | 8.33 |
P8 cycles | 79.00 |
P9 cycles | 64.08 |
P10 cycles | 64.92 |
P11 cycles | 68.00 |
P12 cycles | 73.50 |
P13 cycles | 73.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 122.00 |
Nb uops | 703.00 |
Nb loads | 29.00 |
Nb stores | 2.00 |
Nb stack references | 3.00 |
FLOP/cycle | 4.92 |
Nb FLOP add-sub | 176.00 |
Nb FLOP mul | 288.00 |
Nb FLOP fma | 48.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 16.00 |
Bytes/cycle | 6.76 |
Bytes prefetched | 0.00 |
Bytes loaded | 664.00 |
Bytes stored | 128.00 |
Stride 0 | 1.00 |
Stride 1 | 4.00 |
Stride n | 4.00 |
Stride unknown | 0.00 |
Stride indirect | 9.00 |
Vectorization ratio all | 68.57 |
Vectorization ratio load | 38.46 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 51.67 |
Vector-efficiency ratio all | 58.39 |
Vector-efficiency ratio load | 38.46 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.07 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 35.52 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.35 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.48 |
Bottlenecks | micro-operation queue, |
Function | .omp_outlined..37 |
Source | lincs.cpp:966-966,lincs.cpp:977-978,lincs.cpp:992-992,arrayref.h:82-82,impl_x86_avx_512_util_float.h:113-113,impl_x86_avx_512_simd_float.h:109-109,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-224,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:279-279,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:388-388,impl_x86_avx_512_simd_float.h:451-451 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 117.17 |
CQA cycles if no scalar integer | 114.50 |
CQA cycles if FP arith vectorized | 117.17 |
CQA cycles if fully vectorized | 86.85 |
Front-end cycles | 117.17 |
P0 cycles | 1.00 |
P1 cycles | 1.00 |
P2 cycles | 0.75 |
P3 cycles | 0.75 |
P4 cycles | 0.50 |
P5 cycles | 8.33 |
P6 cycles | 8.33 |
P7 cycles | 8.33 |
P8 cycles | 79.00 |
P9 cycles | 64.08 |
P10 cycles | 64.92 |
P11 cycles | 68.00 |
P12 cycles | 73.50 |
P13 cycles | 73.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 122.00 |
Nb uops | 703.00 |
Nb loads | 29.00 |
Nb stores | 2.00 |
Nb stack references | 3.00 |
FLOP/cycle | 4.92 |
Nb FLOP add-sub | 176.00 |
Nb FLOP mul | 288.00 |
Nb FLOP fma | 48.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 16.00 |
Bytes/cycle | 6.76 |
Bytes prefetched | 0.00 |
Bytes loaded | 664.00 |
Bytes stored | 128.00 |
Stride 0 | 1.00 |
Stride 1 | 4.00 |
Stride n | 4.00 |
Stride unknown | 0.00 |
Stride indirect | 9.00 |
Vectorization ratio all | 68.57 |
Vectorization ratio load | 38.46 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 51.67 |
Vector-efficiency ratio all | 58.39 |
Vector-efficiency ratio load | 38.46 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.07 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 35.52 |
Path / |
Function | .omp_outlined..37 |
Source file and lines | lincs.cpp:966-992 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 122 |
nb uops | 703 |
loop length | 737 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 7 |
used zmm registers | 25 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 117.17 cycles |
front end | 117.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 0.75 | 0.75 | 0.50 | 7.67 | 7.67 | 7.67 | 61.00 | 61.08 | 60.92 | 61.00 | 73.50 | 73.50 |
cycles | 1.00 | 1.00 | 0.75 | 0.75 | 0.50 | 8.33 | 8.33 | 8.33 | 79.00 | 64.08 | 64.92 | 68.00 | 73.50 | 73.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 117.17 |
Dispatch | 79.00 |
Data deps. | 1.00 |
Overall L1 | 117.17 |
all | 42% |
load | 11% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 39% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 68% |
load | 38% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 51% |
all | 24% |
load | 11% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 98% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 93% |
all | 58% |
load | 38% |
store | 100% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 35% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPBROADCASTQ %R14,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPADDQ %YMM18,%YMM10,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
VPEXTRQ $0x1,%XMM11,%RDX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTI128 $0x1,%YMM11,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPEXTRQ $0x1,%XMM9,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPADDQ %ZMM20,%ZMM10,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VEXTRACTI32X4 $0x2,%ZMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM13,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPEXTRQ $0x1,%XMM13,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVD 0x4(%R15,%R11,8),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VEXTRACTI32X4 $0x3,%ZMM12,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM14,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x1,0x4(%R15,%R8,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x2,0x4(%R15,%R11,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM11,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPEXTRQ $0x1,%XMM14,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x3,0x4(%R15,%R8,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM12,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVD 0x4(%R15,%R8,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VPEXTRQ $0x1,%XMM12,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x1,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VEXTRACTI128 $0x1,%YMM12,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x2,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPEXTRQ $0x1,%XMM15,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPADDQ %XMM19,%XMM10,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPINSRD $0x3,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
LEA 0x7(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPERMT2Q %ZMM10,%ZMM21,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPERMT2Q %ZMM15,%ZMM22,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
KMOVD %EDI,%K1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 5 | 1 | N/A |
VPBROADCASTQ %R8,%ZMM11{%K1} | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVD 0x4(%R15,%RSI,8),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VMOVQ %XMM15,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VPGATHERQD (%R15,%ZMM12,8),%YMM16{%K1} | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 3.92 | 3.92 | 2.92 | 5 | 5 | 0-17 | 8.50 | vect (50.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPGATHERQD (%R15,%ZMM11,8),%YMM12{%K1} | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 3.92 | 3.92 | 2.92 | 5 | 5 | 0-17 | 8.50 | vect (50.0%) |
VINSERTI64X4 $0x1,%YMM16,%ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VPINSRD $0x1,0x4(%R15,%RSI,8),%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPMULLD %ZMM23,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R12,%ZMM11,4),%ZMM12{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VPEXTRQ $0x1,%XMM15,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x2,0x4(%R15,%RSI,8),%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV 0xa0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%RSI,%ZMM11,4),%ZMM15{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXORD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VGATHERDPS (%R8,%ZMM11,4),%ZMM16{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VMOVD 0x4(%R15,%R14,8),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VPINSRD $0x1,0x4(%R15,%R11,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x2,0x4(%R15,%RDX,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x3,0x3c(%R15,%R14,8),%XMM10,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x3,0x4(%R15,%RDX,8),%XMM11,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VINSERTI128 $0x1,%XMM13,%YMM14,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
VINSERTI128 $0x1,%XMM9,%YMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
VINSERTI64X4 $0x1,%YMM11,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VPMULLD %ZMM23,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R12,%ZMM9,4),%ZMM10{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%RSI,%ZMM9,4),%ZMM11{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R8,%ZMM9,4),%ZMM13{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VSUBPS %ZMM10,%ZMM12,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM16,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM0,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM11,%ZMM15,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM1,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM2,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM3,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM4,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM5,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM6,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM7,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM8,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM10,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS (%R10,%R14,4),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMSUB231PS %ZMM24,%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM17,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM11,%ZMM9,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
KMOVD %K0,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | N/A |
OR %EDX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VMAXPS %ZMM25,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM9,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM26,%ZMM11,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM27,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS (%R13,%R14,4),%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM9,(%RCX,%R14,4) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM9,(%RBX,%R14,4) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
ADD $0x10,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R9,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JL b03620 <.omp_outlined..37+0xe80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Function | .omp_outlined..37 |
Source file and lines | lincs.cpp:966-992 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 122 |
nb uops | 703 |
loop length | 737 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 7 |
used zmm registers | 25 |
nb stack references | 3 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 117.17 cycles |
front end | 117.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 0.75 | 0.75 | 0.50 | 7.67 | 7.67 | 7.67 | 61.00 | 61.08 | 60.92 | 61.00 | 73.50 | 73.50 |
cycles | 1.00 | 1.00 | 0.75 | 0.75 | 0.50 | 8.33 | 8.33 | 8.33 | 79.00 | 64.08 | 64.92 | 68.00 | 73.50 | 73.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 117.17 |
Dispatch | 79.00 |
Data deps. | 1.00 |
Overall L1 | 117.17 |
all | 42% |
load | 11% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 39% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 68% |
load | 38% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 51% |
all | 24% |
load | 11% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 58% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 98% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 93% |
all | 58% |
load | 38% |
store | 100% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 35% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPBROADCASTQ %R14,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPADDQ %YMM18,%YMM10,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
VPEXTRQ $0x1,%XMM11,%RDX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTI128 $0x1,%YMM11,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPEXTRQ $0x1,%XMM9,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPADDQ %ZMM20,%ZMM10,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VEXTRACTI32X4 $0x2,%ZMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM13,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPEXTRQ $0x1,%XMM13,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVD 0x4(%R15,%R11,8),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VEXTRACTI32X4 $0x3,%ZMM12,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM14,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x1,0x4(%R15,%R8,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x2,0x4(%R15,%R11,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM11,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPEXTRQ $0x1,%XMM14,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x3,0x4(%R15,%R8,8),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM12,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVD 0x4(%R15,%R8,8),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VPEXTRQ $0x1,%XMM12,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x1,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VEXTRACTI128 $0x1,%YMM12,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x2,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPEXTRQ $0x1,%XMM15,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPADDQ %XMM19,%XMM10,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPINSRD $0x3,0x4(%R15,%R8,8),%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
LEA 0x7(%R14),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPERMT2Q %ZMM10,%ZMM21,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPERMT2Q %ZMM15,%ZMM22,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
KMOVD %EDI,%K1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 5 | 1 | N/A |
VPBROADCASTQ %R8,%ZMM11{%K1} | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVD 0x4(%R15,%RSI,8),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VMOVQ %XMM15,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VPGATHERQD (%R15,%ZMM12,8),%YMM16{%K1} | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 3.92 | 3.92 | 2.92 | 5 | 5 | 0-17 | 8.50 | vect (50.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPGATHERQD (%R15,%ZMM11,8),%YMM12{%K1} | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 3.92 | 3.92 | 2.92 | 5 | 5 | 0-17 | 8.50 | vect (50.0%) |
VINSERTI64X4 $0x1,%YMM16,%ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VPINSRD $0x1,0x4(%R15,%RSI,8),%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPMULLD %ZMM23,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R12,%ZMM11,4),%ZMM12{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VPEXTRQ $0x1,%XMM15,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VPINSRD $0x2,0x4(%R15,%RSI,8),%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV 0xa0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%RSI,%ZMM11,4),%ZMM15{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXORD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VGATHERDPS (%R8,%ZMM11,4),%ZMM16{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VMOVD 0x4(%R15,%R14,8),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0.50 | scal (6.3%) |
VPINSRD $0x1,0x4(%R15,%R11,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x2,0x4(%R15,%RDX,8),%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVQ %XMM9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VPINSRD $0x3,0x3c(%R15,%R14,8),%XMM10,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VPINSRD $0x3,0x4(%R15,%RDX,8),%XMM11,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VINSERTI128 $0x1,%XMM13,%YMM14,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
VINSERTI128 $0x1,%XMM9,%YMM10,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
VINSERTI64X4 $0x1,%YMM11,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VPMULLD %ZMM23,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R12,%ZMM9,4),%ZMM10{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%RSI,%ZMM9,4),%ZMM11{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERDPS (%R8,%ZMM9,4),%ZMM13{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VSUBPS %ZMM10,%ZMM12,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM16,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM0,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM11,%ZMM15,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM1,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM2,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM3,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM4,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM5,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM13,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM6,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM7,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM8,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM12,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM10,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS (%R10,%R14,4),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMSUB231PS %ZMM24,%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM17,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM11,%ZMM9,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
KMOVD %K0,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | N/A |
OR %EDX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VMAXPS %ZMM25,%ZMM9,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM9,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM26,%ZMM11,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM27,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS (%R13,%R14,4),%ZMM11,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM9,(%RCX,%R14,4) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM9,(%RBX,%R14,4) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
ADD $0x10,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R9,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JL b03620 <.omp_outlined..37+0xe80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1.03 | 0 | 1.01 | -0 | 0.74 | 0.06 | 0.86 | 0.03 | 0.8 | 0.03 | 0.69 | 0.04 | 0.63 | 0.05 | 0.54 | 0.07 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 2.0249993801117 | 0.24292086064816 |
2x1 | 2 | 1.03 | 2.06 | 2 | 1.1200000047684 | 0.2173098474741 |
4x1 | 4 | 1.01 | 4.03 | 4 | 0.61499977111816 | 0.20182256400585 |
8x1 | 8 | 0.74 | 5.95 | 8 | 0.45999994874001 | 0.21982951462269 |
16x1 | 16 | 0.86 | 13.73 | 16 | 0.28999999165535 | 0.17838218808174 |
32x1 | 20 | 0.8 | 25.58 | 32 | 0.28000000119209 | 0.15401721000671 |
64x1 | 36 | 0.69 | 44.16 | 64 | 0.16499996185303 | 0.11538498848677 |
128x1 | 72 | 0.63 | 81.25 | 128 | 0.14000000059605 | 0.14665095508099 |
192x1 | 110 | 0.54 | 103.33 | 192 | 0.11000001430511 | 0.14549696445465 |