Loop Id: 2671 | Module: libgromacs_mpi.so.9.0.0 | Source: simd_prune_kernel.cpp:106-213 [...] | Coverage: 0.02% |
---|
Loop Id: 2671 | Module: libgromacs_mpi.so.9.0.0 | Source: simd_prune_kernel.cpp:106-213 [...] | Coverage: 0.02% |
---|
0x66f090 INC %RBX |
0x66f093 CMP %RDI,%RBX |
0x66f096 JE 66f296 |
0x66f09c MOV %RBX,%R10 |
0x66f09f SAL $0x4,%R10 |
0x66f0a3 MOVSXD %R9D,%R11 |
0x66f0a6 SAL $0x4,%R11 |
0x66f0aa MOV (%RSI,%R10,1),%R14 |
0x66f0ae MOV %R14,(%RCX,%R11,1) |
0x66f0b2 MOV %R13D,0x8(%RCX,%R11,1) |
0x66f0b7 MOVSXD 0x8(%RSI,%R10,1),%R15 |
0x66f0bc CMP 0xc(%RSI,%R10,1),%R15D |
0x66f0c1 JGE 66f090 |
0x66f0c3 MOV (%RSI,%R10,1),%R14D |
0x66f0c7 MOV 0x4(%RSI,%R10,1),%R12D |
0x66f0cc AND $0x7f,%R12D |
0x66f0d0 LEA (%R12,%R12,2),%R12 |
0x66f0d4 MOV -0x50(%RBP),%RDI |
0x66f0d8 VBROADCASTSS (%RDI,%R12,4),%ZMM4 |
0x66f0df VBROADCASTSS 0x4(%RDI,%R12,4),%ZMM5 |
0x66f0e7 VBROADCASTSS 0x8(%RDI,%R12,4),%ZMM6 |
0x66f0ef SAL $0x2,%R14D |
0x66f0f3 MOV %R14D,%R12D |
0x66f0f6 AND $-0x8,%R12D |
0x66f0fa LEA (%R12,%R12,2),%R12D |
0x66f0fe AND $0x4,%R14D |
0x66f102 OR %R12D,%R14D |
0x66f105 MOVSXD %R14D,%R12 |
0x66f108 VBROADCASTSS (%R8,%R12,4),%YMM1 |
0x66f10e LEA (%RCX,%R11,1),%R14 |
0x66f112 ADD $0x8,%R14 |
0x66f116 VBROADCASTSS 0x4(%R8,%R12,4),%YMM2 |
0x66f11d VINSERTF64X4 $0x1,%YMM2,%ZMM1,%ZMM1 |
0x66f124 VBROADCASTSS 0x20(%R8,%R12,4),%YMM2 |
0x66f12b VADDPS %ZMM1,%ZMM4,%ZMM1 |
0x66f131 VBROADCASTSS 0x24(%R8,%R12,4),%YMM3 |
0x66f138 VINSERTF64X4 $0x1,%YMM3,%ZMM2,%ZMM2 |
0x66f13f VBROADCASTSS 0x40(%R8,%R12,4),%YMM3 |
0x66f146 VADDPS %ZMM2,%ZMM5,%ZMM2 |
0x66f14c VBROADCASTSS 0x44(%R8,%R12,4),%YMM7 |
0x66f153 VINSERTF64X4 $0x1,%YMM7,%ZMM3,%ZMM3 |
0x66f15a VBROADCASTSS 0x8(%R8,%R12,4),%YMM7 |
0x66f161 VADDPS %ZMM3,%ZMM6,%ZMM3 |
0x66f167 VBROADCASTSS 0xc(%R8,%R12,4),%YMM8 |
0x66f16e VINSERTF64X4 $0x1,%YMM8,%ZMM7,%ZMM7 |
0x66f175 VBROADCASTSS 0x28(%R8,%R12,4),%YMM8 |
0x66f17c VADDPS %ZMM7,%ZMM4,%ZMM4 |
0x66f182 VBROADCASTSS 0x2c(%R8,%R12,4),%YMM7 |
0x66f189 VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 |
0x66f190 VBROADCASTSS 0x48(%R8,%R12,4),%YMM8 |
0x66f197 VADDPS %ZMM7,%ZMM5,%ZMM5 |
0x66f19d VBROADCASTSS 0x4c(%R8,%R12,4),%YMM7 |
0x66f1a4 VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 |
0x66f1ab VADDPS %ZMM7,%ZMM6,%ZMM6 |
0x66f1b1 ADD %RSI,%R10 |
0x66f1b4 ADD $0xc,%R10 |
0x66f1b8 NOPL (%RAX,%RAX,1) |
(2672) 0x66f1c0 MOVSXD (%RAX,%R15,8),%R12 |
(2672) 0x66f1c4 SAL $0x3,%R12 |
(2672) 0x66f1c8 LEA (%R12,%R12,2),%R12 |
(2672) 0x66f1cc VBROADCASTF64X4 (%R8,%R12,4),%ZMM7 |
(2672) 0x66f1d3 MOVSXD %R12D,%R12 |
(2672) 0x66f1d6 VBROADCASTF64X4 0x20(%R8,%R12,4),%ZMM8 |
(2672) 0x66f1de VBROADCASTF64X4 0x40(%R8,%R12,4),%ZMM9 |
(2672) 0x66f1e6 VSUBPS %ZMM7,%ZMM1,%ZMM10 |
(2672) 0x66f1ec VSUBPS %ZMM8,%ZMM2,%ZMM11 |
(2672) 0x66f1f2 VSUBPS %ZMM9,%ZMM3,%ZMM12 |
(2672) 0x66f1f8 VSUBPS %ZMM7,%ZMM4,%ZMM7 |
(2672) 0x66f1fe VSUBPS %ZMM8,%ZMM5,%ZMM8 |
(2672) 0x66f204 VSUBPS %ZMM9,%ZMM6,%ZMM9 |
(2672) 0x66f20a VMULPS %ZMM10,%ZMM10,%ZMM10 |
(2672) 0x66f210 VMULPS %ZMM11,%ZMM11,%ZMM11 |
(2672) 0x66f216 VADDPS %ZMM11,%ZMM10,%ZMM10 |
(2672) 0x66f21c VMULPS %ZMM12,%ZMM12,%ZMM11 |
(2672) 0x66f222 VADDPS %ZMM10,%ZMM11,%ZMM10 |
(2672) 0x66f228 VMULPS %ZMM7,%ZMM7,%ZMM7 |
(2672) 0x66f22e VMULPS %ZMM8,%ZMM8,%ZMM8 |
(2672) 0x66f234 VADDPS %ZMM8,%ZMM7,%ZMM7 |
(2672) 0x66f23a VMULPS %ZMM9,%ZMM9,%ZMM8 |
(2672) 0x66f240 VADDPS %ZMM7,%ZMM8,%ZMM7 |
(2672) 0x66f246 VCMPPS $0x1,%ZMM0,%ZMM10,%K0 |
(2672) 0x66f24d VCMPPS $0x1,%ZMM0,%ZMM7,%K1 |
(2672) 0x66f254 MOVSXD %R13D,%R13 |
(2672) 0x66f257 MOV (%RAX,%R15,8),%R12 |
(2672) 0x66f25b MOV %R12,(%RDX,%R13,8) |
(2672) 0x66f25f XOR %R12D,%R12D |
(2672) 0x66f262 KORTESTW %K0,%K1 |
(2672) 0x66f266 SETNE %R12B |
(2672) 0x66f26a ADD %R12D,%R13D |
(2672) 0x66f26d INC %R15 |
(2672) 0x66f270 MOVSXD (%R10),%R12 |
(2672) 0x66f273 CMP %R12,%R15 |
(2672) 0x66f276 JL 66f1c0 |
0x66f27c CMP (%R14),%R13D |
0x66f27f MOV -0x30(%RBP),%RDI |
0x66f283 JLE 66f090 |
0x66f289 MOV %R13D,0xc(%RCX,%R11,1) |
0x66f28e INC %R9D |
0x66f291 JMP 66f090 |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_load_store_functions.h: 109 - 109 |
-------------------------------------------------------------------------------- |
109: return loadDuplicateHsimd(ptr + offset); |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_prune_kernel.cpp: 106 - 213 |
-------------------------------------------------------------------------------- |
106: for (int ciIndex = 0; ciIndex < nciOuter; ciIndex++) |
107: { |
108: const nbnxn_ci_t* gmx_restrict ciEntry = &ciOuter[ciIndex]; |
109: |
110: /* Copy the original list entry to the pruned entry */ |
111: ciInner[nciInner].ci = ciEntry->ci; |
112: ciInner[nciInner].shift = ciEntry->shift; |
113: ciInner[nciInner].cj_ind_start = ncjInner; |
[...] |
144: for (int cjind = ciEntry->cj_ind_start; cjind < ciEntry->cj_ind_end; cjind++) |
145: { |
146: /* j-cluster index */ |
147: int cj = cjOuter[cjind].cj; |
[...] |
154: ajx = aj * DIM; |
[...] |
203: cjInner[ncjInner] = cjOuter[cjind]; |
204: if (anyTrue(wco[0])) |
[...] |
210: if (ncjInner > ciInner[nciInner].cj_ind_start) |
211: { |
212: ciInner[nciInner].cj_ind_end = ncjInner; |
213: nciInner++; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 370 - 370 |
-------------------------------------------------------------------------------- |
370: return { _mm512_castpd_ps(_mm512_broadcast_f64x4(_mm256_load_pd(reinterpret_cast<const double*>(m)))) }; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 181 - 393 |
-------------------------------------------------------------------------------- |
181: return { _mm512_add_ps(a.simdInternal_, b.simdInternal_) }; |
182: } |
183: |
184: static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b) |
185: { |
186: return { _mm512_sub_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
367: return { _mm512_cmp_ps_mask(a.simdInternal_, b.simdInternal_, _CMP_LT_OQ) }; |
[...] |
393: return (avx512Mask2Int(a.simdInternal_) != 0); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►64.52+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►35.48+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►59.18+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►40.82+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►35.29+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►31.37+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►11.76+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►9.80+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►7.84+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►3.92+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►47.37+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►35.09+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►10.53+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►3.51+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►1.75+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►1.75+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►64.18+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►19.40+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►8.96+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►5.97+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►1.49+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►70.59+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►29.41+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►61.84+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►19.74+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►9.21+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►5.26+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►3.95+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►82.65+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►17.35+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.01 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.71 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.03 |
Bottlenecks | |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_prune_kernel.cpp:106-108,simd_prune_kernel.cpp:111-113,simd_prune_kernel.cpp:144-144,simd_prune_kernel.cpp:210-213 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 7.39 |
CQA cycles if no scalar integer | 7.33 |
CQA cycles if FP arith vectorized | 7.39 |
CQA cycles if fully vectorized | 4.32 |
Front-end cycles | 7.33 |
P0 cycles | 4.25 |
P1 cycles | 4.25 |
P2 cycles | 4.17 |
P3 cycles | 4.17 |
P4 cycles | 1.50 |
P5 cycles | 6.22 |
P6 cycles | 6.22 |
P7 cycles | 6.22 |
P8 cycles | 0.00 |
P9 cycles | 3.33 |
P10 cycles | 4.00 |
P11 cycles | 6.67 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 45.33 |
Nb uops | 44.00 |
Nb loads | 16.33 |
Nb stores | 2.33 |
Nb stack references | 1.33 |
FLOP/cycle | 8.66 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 12.47 |
Bytes prefetched | 0.00 |
Bytes loaded | 74.67 |
Bytes stored | 13.33 |
Stride 0 | 0.67 |
Stride 1 | 0.67 |
Stride n | 1.67 |
Stride unknown | 2.67 |
Stride indirect | 1.00 |
Vectorization ratio all | 23.55 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 57.14 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 17.03 |
Vector-efficiency ratio all | 24.60 |
Vector-efficiency ratio load | 8.57 |
Vector-efficiency ratio store | 9.03 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 62.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.96 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 7.38 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.14 |
Bottlenecks | micro-operation queue, |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_prune_kernel.cpp:106-108,simd_prune_kernel.cpp:111-113,simd_prune_kernel.cpp:144-144,simd_prune_kernel.cpp:210-213 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 2.00 |
CQA cycles if no scalar integer | 2.00 |
CQA cycles if FP arith vectorized | 2.00 |
CQA cycles if fully vectorized | 0.27 |
Front-end cycles | 2.00 |
P0 cycles | 1.75 |
P1 cycles | 1.75 |
P2 cycles | 1.75 |
P3 cycles | 1.75 |
P4 cycles | 1.00 |
P5 cycles | 1.67 |
P6 cycles | 1.67 |
P7 cycles | 1.67 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 13.00 |
Nb uops | 12.00 |
Nb loads | 3.00 |
Nb stores | 2.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 14.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 16.00 |
Bytes stored | 12.00 |
Stride 0 | 0.00 |
Stride 1 | 0.00 |
Stride n | 1.00 |
Stride unknown | 0.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.25 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 9.38 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.60 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | micro-operation queue, |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_prune_kernel.cpp:106-108,simd_prune_kernel.cpp:111-113,simd_prune_kernel.cpp:144-144,simd_prune_kernel.cpp:210-213 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.17 |
CQA cycles if no scalar integer | 10.00 |
CQA cycles if FP arith vectorized | 10.17 |
CQA cycles if fully vectorized | 6.34 |
Front-end cycles | 10.17 |
P0 cycles | 5.50 |
P1 cycles | 5.50 |
P2 cycles | 5.50 |
P3 cycles | 5.50 |
P4 cycles | 2.00 |
P5 cycles | 8.67 |
P6 cycles | 8.67 |
P7 cycles | 8.67 |
P8 cycles | 0.00 |
P9 cycles | 5.00 |
P10 cycles | 6.00 |
P11 cycles | 10.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 63.00 |
Nb uops | 61.00 |
Nb loads | 23.00 |
Nb stores | 3.00 |
Nb stack references | 2.00 |
FLOP/cycle | 9.44 |
Nb FLOP add-sub | 96.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.80 |
Bytes prefetched | 0.00 |
Bytes loaded | 104.00 |
Bytes stored | 16.00 |
Stride 0 | 1.00 |
Stride 1 | 1.00 |
Stride n | 2.00 |
Stride unknown | 4.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 34.29 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 85.71 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 25.00 |
Vector-efficiency ratio all | 30.54 |
Vector-efficiency ratio load | 6.60 |
Vector-efficiency ratio store | 8.33 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 87.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 17.45 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.58 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | P11, |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_prune_kernel.cpp:106-108,simd_prune_kernel.cpp:111-113,simd_prune_kernel.cpp:144-144,simd_prune_kernel.cpp:210-213 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.00 |
CQA cycles if no scalar integer | 10.00 |
CQA cycles if FP arith vectorized | 10.00 |
CQA cycles if fully vectorized | 6.34 |
Front-end cycles | 9.83 |
P0 cycles | 5.50 |
P1 cycles | 5.50 |
P2 cycles | 5.25 |
P3 cycles | 5.25 |
P4 cycles | 1.50 |
P5 cycles | 8.33 |
P6 cycles | 8.33 |
P7 cycles | 8.33 |
P8 cycles | 0.00 |
P9 cycles | 5.00 |
P10 cycles | 6.00 |
P11 cycles | 10.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 60.00 |
Nb uops | 59.00 |
Nb loads | 23.00 |
Nb stores | 2.00 |
Nb stack references | 2.00 |
FLOP/cycle | 9.60 |
Nb FLOP add-sub | 96.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.60 |
Bytes prefetched | 0.00 |
Bytes loaded | 104.00 |
Bytes stored | 12.00 |
Stride 0 | 1.00 |
Stride 1 | 1.00 |
Stride n | 2.00 |
Stride unknown | 4.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 36.36 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 85.71 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 26.09 |
Vector-efficiency ratio all | 32.01 |
Vector-efficiency ratio load | 6.62 |
Vector-efficiency ratio store | 9.38 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 87.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 17.93 |
Path / |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:106-213 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 45.33 |
nb uops | 44 |
loop length | 232.67 |
used x86 registers | 12.67 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 3.33 |
used zmm registers | 5.33 |
nb stack references | 1.33 |
micro-operation queue | 7.33 cycles |
front end | 7.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.25 | 4.25 | 4.17 | 4.17 | 1.50 | 6.22 | 6.22 | 6.22 | 0.00 | 3.33 | 3.33 | 3.33 | 0.00 | 0.00 |
cycles | 4.25 | 4.25 | 4.17 | 4.17 | 1.50 | 6.22 | 6.22 | 6.22 | 0.00 | 3.33 | 4.00 | 6.67 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 7.33 |
Dispatch | 7.25 |
Data deps. | 2.00 |
Overall L1 | 7.39 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 44% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 23% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 57% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 36% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 24% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 62% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:106-213 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 13 |
nb uops | 12 |
loop length | 51 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 2.00 cycles |
front end | 2.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.75 | 1.75 | 1.75 | 1.75 | 1.00 | 1.67 | 1.67 | 1.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.75 | 1.75 | 1.75 | 1.75 | 1.00 | 1.67 | 1.67 | 1.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 2.00 |
Dispatch | 1.75 |
Data deps. | 2.00 |
Overall L1 | 2.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMP %RDI,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 66f296 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x326> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x4,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOVSXD %R9D,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV (%RSI,%R10,1),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R14,(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13D,0x8(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD 0x8(%RSI,%R10,1),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
CMP 0xc(%RSI,%R10,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JGE 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:106-213 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 63 |
nb uops | 61 |
loop length | 330 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 5 |
used zmm registers | 8 |
nb stack references | 2 |
micro-operation queue | 10.17 cycles |
front end | 10.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.50 | 5.50 | 2.00 | 8.67 | 8.67 | 8.67 | 0.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 |
cycles | 5.50 | 5.50 | 5.50 | 5.50 | 2.00 | 8.67 | 8.67 | 8.67 | 0.00 | 5.00 | 6.00 | 10.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 10.17 |
Dispatch | 10.00 |
Data deps. | 2.00 |
Overall L1 | 10.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 44% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 34% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 85% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 8% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 36% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 30% |
load | 6% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 87% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMP %RDI,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 66f296 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x326> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x4,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOVSXD %R9D,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV (%RSI,%R10,1),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R14,(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13D,0x8(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD 0x8(%RSI,%R10,1),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP 0xc(%RSI,%R10,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
JGE 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV (%RSI,%R10,1),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x4(%RSI,%R10,1),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
AND $0x7f,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R12,2),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VBROADCASTSS (%RDI,%R12,4),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS 0x4(%RDI,%R12,4),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS 0x8(%RDI,%R12,4),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
SAL $0x2,%R14D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R12,2),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
OR %R12D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD %R14D,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSS (%R8,%R12,4),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
LEA (%RCX,%R11,1),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSS 0x4(%R8,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM2,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x20(%R8,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM1,%ZMM4,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x24(%R8,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM3,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x40(%R8,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM2,%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x44(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x8(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM3,%ZMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0xc(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x28(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM7,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x2c(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x48(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM7,%ZMM5,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x4c(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VADDPS %ZMM7,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
ADD %RSI,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0xc,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
CMP (%R14),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JLE 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13D,0xc(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
INC %R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:106-213 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 60 |
nb uops | 59 |
loop length | 317 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 5 |
used zmm registers | 8 |
nb stack references | 2 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.50 | 5.25 | 5.25 | 1.50 | 8.33 | 8.33 | 8.33 | 0.00 | 5.00 | 5.00 | 5.00 | 0.00 | 0.00 |
cycles | 5.50 | 5.50 | 5.25 | 5.25 | 1.50 | 8.33 | 8.33 | 8.33 | 0.00 | 5.00 | 6.00 | 10.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 9.83 |
Dispatch | 10.00 |
Data deps. | 2.00 |
Overall L1 | 10.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 44% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 36% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 85% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 26% |
all | 10% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 36% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 32% |
load | 6% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 87% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMP %RDI,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 66f296 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x326> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x4,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOVSXD %R9D,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV (%RSI,%R10,1),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R14,(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13D,0x8(%RCX,%R11,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD 0x8(%RSI,%R10,1),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP 0xc(%RSI,%R10,1),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
JGE 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV (%RSI,%R10,1),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x4(%RSI,%R10,1),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
AND $0x7f,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R12,2),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VBROADCASTSS (%RDI,%R12,4),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS 0x4(%RDI,%R12,4),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS 0x8(%RDI,%R12,4),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
SAL $0x2,%R14D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R12,2),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
OR %R12D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD %R14D,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSS (%R8,%R12,4),%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
LEA (%RCX,%R11,1),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSS 0x4(%R8,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM2,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x20(%R8,%R12,4),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM1,%ZMM4,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x24(%R8,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM3,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x40(%R8,%R12,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM2,%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x44(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x8(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM3,%ZMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0xc(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x28(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM7,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x2c(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VBROADCASTSS 0x48(%R8,%R12,4),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VADDPS %ZMM7,%ZMM5,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS 0x4c(%R8,%R12,4),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
VINSERTF64X4 $0x1,%YMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | vect (50.0%) |
VADDPS %ZMM7,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
ADD %RSI,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0xc,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
CMP (%R14),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JLE 66f090 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 0.58 | 0.01 | 0.37 | 0.02 | 0.35 | 0.01 | 0.31 | 0.01 | 0.27 | 0.01 | 0.21 | 0.01 | 0.23 | 0.01 | 0.18 | 0.02 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 0.089999996125698 | 0.010796484537423 |
2x1 | 2 | 0.58 | 1.16 | 2 | 0.079999998211861 | 0.017098043113947 |
4x1 | 4 | 0.37 | 1.47 | 4 | 0.080000005662441 | 0.024600489065051 |
8x1 | 8 | 0.35 | 2.82 | 8 | 0.054999995976686 | 0.02060885168612 |
16x1 | 16 | 0.31 | 5.03 | 16 | 0.035000000149012 | 0.021633701398969 |
32x1 | 20 | 0.27 | 8.59 | 32 | 0.034999992698431 | 0.020393488928676 |
64x1 | 35 | 0.21 | 13.51 | 64 | 0.024999998509884 | 0.016765404492617 |
128x1 | 47 | 0.23 | 29.89 | 128 | 0.019999999552965 | 0.017719486728311 |
192x1 | 66 | 0.18 | 34.91 | 192 | 0.01999999769032 | 0.019139062613249 |