Loop Id: 3353 | Module: libgromacs_mpi.so.9.0.0 | Source: localtopology.cpp:506-531 [...] | Coverage: 0.05% |
---|
Loop Id: 3353 | Module: libgromacs_mpi.so.9.0.0 | Source: localtopology.cpp:506-531 [...] | Coverage: 0.05% |
---|
0x6aedcf MOV %RDI,-0xd0(%RBP) |
0x6aedd6 MOV (%RDX,%RDI,4),%EDI |
0x6aedd9 ADD -0x58(%RBP),%EDI |
0x6aeddc SUB -0x90(%RBP),%EDI |
0x6aede2 AND %EDI,%ESI |
0x6aede4 MOV -0x54(%RBP),%R12D |
0x6aede8 NOPL (%RAX,%RAX,1) |
(3354) 0x6aedf0 MOVSXD %ESI,%RSI |
(3354) 0x6aedf3 SAL $0x4,%RSI |
(3354) 0x6aedf7 CMP %EDI,(%R8,%RSI,1) |
(3354) 0x6aedfb JE 6aee10 |
(3354) 0x6aedfd MOV 0xc(%R8,%RSI,1),%ESI |
(3354) 0x6aee02 TEST %ESI,%ESI |
(3354) 0x6aee04 JNS 6aedf0 |
0x6aee10 MOVSXD 0x8(%R8,%RSI,1),%RDI |
0x6aee15 CMP -0x8c(%RBP),%EDI |
0x6aee1b JGE 6aeeae |
0x6aee21 MOV 0x4(%R8,%RSI,1),%ESI |
0x6aee26 MOV -0xd0(%RBP),%R12 |
0x6aee2d MOV %ESI,-0xb0(%RBP,%R12,4) |
0x6aee35 LEA (%RDI,%RDI,2),%RSI |
0x6aee39 MOV -0xf8(%RBP),%RDI |
0x6aee40 VPSHUFD $-0x5c,(%RDI,%RSI,4),%XMM0 |
0x6aee46 VPTESTNMD %XMM0,%XMM0,%K0 |
0x6aee4c KSHIFTRB $0x2,%K0,%K1 |
0x6aee52 KSHIFTRB $0x1,%K0,%K2 |
0x6aee58 KMOVD %K0,%ESI |
0x6aee5c TEST $0x1,%SIL |
0x6aee60 KMOVD %K2,%ESI |
0x6aee64 CMOVNE %R12D,%EAX |
0x6aee68 CMOVE %R12D,%ECX |
0x6aee6c CMOVNE %R12D,%R9D |
0x6aee70 TEST $0x1,%SIL |
0x6aee74 KMOVD %K1,%ESI |
0x6aee78 CMOVNE %R12D,%EBX |
0x6aee7c CMOVE %R12D,%R15D |
0x6aee80 CMOVNE %R12D,%R11D |
0x6aee84 TEST $0x1,%SIL |
0x6aee88 CMOVNE %R12D,%R14D |
0x6aee8c CMOVE %R12D,%R13D |
0x6aee90 CMOVNE %R12D,%R10D |
0x6aee94 CMP -0x50(%RBP),%R12 |
0x6aee98 LEA 0x1(%R12),%RDI |
0x6aee9d MOV -0xd4(%RBP),%ESI |
0x6aeea3 JNE 6aedcf |
/home/eoseret/gromacs-2024.2/src/gromacs/domdec/hashedmap.h: 274 - 282 |
-------------------------------------------------------------------------------- |
274: int ind = (key & bitMask_); |
275: do |
276: { |
277: if (table_[ind].key == key) |
278: { |
279: return &table_[ind].value; |
280: } |
281: ind = table_[ind].next; |
282: } while (ind >= 0); |
/home/eoseret/gromacs-2024.2/src/gromacs/domdec/localtopology.cpp: 506 - 531 |
-------------------------------------------------------------------------------- |
506: for (int k = 1; k <= nral && bUse; k++) |
507: { |
508: /* Get the global index using the offset in the molecule */ |
509: const int k_gl = atomIndexSet.global + iatoms[k] - atomIndexSet.withinMolecule; |
[...] |
518: if (entry == nullptr || entry->cell >= zones.n) |
[...] |
528: tiatoms[k] = entry->la; |
529: for (int d = 0; d < DIM; d++) |
530: { |
531: if (zones.shift[entry->cell][d] == 0) |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►84.21+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►15.04+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►95.81+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►3.59+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►80.58+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►10.19+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►8.25+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.60+ | int make_bondeds_zone<false>(g[...] | localtopology.cpp:666 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0x6ad440 | iterator_interface.hpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | dd_make_local_top(gmx_domdec_t[...] | localtopology.cpp:846 | libgromacs_mpi.so.9.0.0 |
○ | gmx::dd_partition_system(_IO_F[...] | partition.cpp:3199 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1001 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | NA |
CQA speedup if FP arith vectorized | NA |
CQA speedup if fully vectorized | NA |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | NA |
Bottlenecks | NA |
Function | int assignInteractionsForAtom |
Source | hashedmap.h:274-274,hashedmap.h:282-282,localtopology.cpp:506-506,localtopology.cpp:509-509,localtopology.cpp:518-518,localtopology.cpp:528-528,localtopology.cpp:531-531 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | NA |
CQA cycles if no scalar integer | NA |
CQA cycles if FP arith vectorized | NA |
CQA cycles if fully vectorized | NA |
Front-end cycles | NA |
P0 cycles | NA |
P1 cycles | NA |
P2 cycles | NA |
P3 cycles | NA |
P4 cycles | NA |
P5 cycles | NA |
P6 cycles | NA |
P7 cycles | NA |
P8 cycles | NA |
P9 cycles | NA |
P10 cycles | NA |
P11 cycles | NA |
P12 cycles | NA |
P13 cycles | NA |
DIV/SQRT cycles | NA |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | NA |
Nb uops | NA |
Nb loads | NA |
Nb stores | NA |
Nb stack references | NA |
FLOP/cycle | NA |
Nb FLOP add-sub | NA |
Nb FLOP mul | NA |
Nb FLOP fma | NA |
Nb FLOP div | NA |
Nb FLOP rcp | NA |
Nb FLOP sqrt | NA |
Nb FLOP rsqrt | NA |
Bytes/cycle | NA |
Bytes prefetched | NA |
Bytes loaded | NA |
Bytes stored | NA |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | NA |
Vectorization ratio load | NA |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | NA |
Vector-efficiency ratio all | NA |
Vector-efficiency ratio load | NA |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | NA |
Path / |
Function | int assignInteractionsForAtom |
Source file and lines | hashedmap.h:274-274,hashedmap.h:282-282,localtopology.cpp:506-506,localtopology.cpp:509-509,localtopology.cpp:518-518,localtopology.cpp:528-528,localtopology.cpp:531-531 |
Module | libgromacs_mpi.so.9.0.0 |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | ||||||
2x1 | ||||||
4x1 | ||||||
8x1 | ||||||
16x1 | ||||||
32x1 | 19 | 1 | 1 | 32 | 0.07499998062849 | 0.04048253595829 |
64x1 | 34 | 1 | 1 | 64 | 0.074999995529652 | 0.03293875977397 |
128x1 | 61 | 1 | 1 | 128 | 0.049999993294477 | 0.048029039055109 |
192x1 | 82 | 1 | 1 | 192 | 0.049999989569187 | 0.048433911055326 |