Function: .omp_outlined.#0xc0af60 | Module: libgromacs_mpi.so.9.0.0 | Source: pme_redistribute.cpp:81-138 [...] | Coverage (incl. loops): 0.24% | (excl. loops): 0.00% |
---|
Function: .omp_outlined.#0xc0af60 | Module: libgromacs_mpi.so.9.0.0 | Source: pme_redistribute.cpp:81-138 [...] | Coverage (incl. loops): 0.24% | (excl. loops): 0.00% |
---|
/home/eoseret/gromacs-2024.2/src/gromacs/ewald/pme_redistribute.cpp: 81 - 138 |
-------------------------------------------------------------------------------- |
81: nslab = atc->nslab; |
82: pd = atc->pd.data(); |
83: |
84: /* Reset the count */ |
85: for (i = 0; i < nslab; i++) |
86: { |
87: count[i] = 0; |
88: } |
89: |
90: if (atc->dimind == 0) |
[...] |
96: for (i = start; i < end; i++) |
97: { |
98: xptr = x[i]; |
99: /* Fractional coordinates along box vectors */ |
100: s = nslab * (xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx); |
101: si = static_cast<int>(s + 2 * nslab) % nslab; |
102: pd[i] = si; |
103: count[si]++; |
[...] |
111: for (i = start; i < end; i++) |
112: { |
113: xptr = x[i]; |
114: /* Fractional coordinates along box vectors */ |
115: s = nslab * (xptr[YY] * ryy + xptr[ZZ] * rzy); |
116: si = static_cast<int>(s + 2 * nslab) % nslab; |
117: pd[i] = si; |
118: count[si]++; |
[...] |
128: #pragma omp parallel for num_threads(nthread) schedule(static) |
129: for (int thread = 0; thread < nthread; thread++) |
130: { |
131: try |
132: { |
133: const int natoms = x.ssize(); |
134: pme_calc_pidx(natoms * thread / nthread, |
135: natoms * (thread + 1) / nthread, |
136: recipbox, |
137: x, |
138: atc, |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1046 - 1169 |
-------------------------------------------------------------------------------- |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/utility/arrayref.h: 82 - 254 |
-------------------------------------------------------------------------------- |
82: it_ += i; |
83: return *this; |
84: } |
85: constexpr auto operator-(ArrayRefIter other) const noexcept { return it_ - other.it_; } |
[...] |
254: size_type size() const { return end_ - begin_; } |
0xc0af60 PUSH %RBP |
0xc0af61 MOV %RSP,%RBP |
0xc0af64 PUSH %R15 |
0xc0af66 PUSH %R14 |
0xc0af68 PUSH %R13 |
0xc0af6a PUSH %R12 |
0xc0af6c PUSH %RBX |
0xc0af6d SUB $0x58,%RSP |
0xc0af71 MOV %R9,-0x68(%RBP) |
0xc0af75 MOV %R8,-0x60(%RBP) |
0xc0af79 MOV %RCX,-0x58(%RBP) |
0xc0af7d MOV %RDX,-0x50(%RBP) |
0xc0af81 MOV (%RDX),%EBX |
0xc0af83 TEST %EBX,%EBX |
0xc0af85 JLE c0aff7 |
0xc0af87 DEC %EBX |
0xc0af89 MOVL $0,-0x30(%RBP) |
0xc0af90 MOV %EBX,-0x2c(%RBP) |
0xc0af93 MOVL $0x1,-0x44(%RBP) |
0xc0af9a MOVL $0,-0x40(%RBP) |
0xc0afa1 MOV (%RDI),%ESI |
0xc0afa3 SUB $0x8,%RSP |
0xc0afa7 LEA -0x44(%RBP),%RAX |
0xc0afab LEA 0x3d3a6e(%RIP),%RDI |
0xc0afb2 LEA -0x40(%RBP),%RCX |
0xc0afb6 LEA -0x30(%RBP),%R8 |
0xc0afba LEA -0x2c(%RBP),%R9 |
0xc0afbe MOV %ESI,-0x34(%RBP) |
0xc0afc1 MOV $0x22,%EDX |
0xc0afc6 PUSH $0x1 |
0xc0afc8 PUSH $0x1 |
0xc0afca PUSH %RAX |
0xc0afcb CALL fa67e0 <@plt_start@+0x3330> |
0xc0afd0 ADD $0x20,%RSP |
0xc0afd4 MOV -0x2c(%RBP),%EAX |
0xc0afd7 CMP %EBX,%EAX |
0xc0afd9 CMOVL %EAX,%EBX |
0xc0afdc MOV %EBX,-0x2c(%RBP) |
0xc0afdf MOVSXD -0x30(%RBP),%R8 |
0xc0afe3 CMP %EBX,%R8D |
0xc0afe6 JLE c0b019 |
0xc0afe8 LEA 0x3d3a49(%RIP),%RDI |
0xc0afef MOV -0x34(%RBP),%ESI |
0xc0aff2 CALL fa67f0 <@plt_start@+0x3340> |
0xc0aff7 ADD $0x58,%RSP |
0xc0affb POP %RBX |
0xc0affc POP %R12 |
0xc0affe POP %R13 |
0xc0b000 POP %R14 |
0xc0b002 POP %R15 |
0xc0b004 POP %RBP |
0xc0b005 RET |
(1401) 0xc0b010 MOVSXD -0x2c(%RBP),%RAX |
(1401) 0xc0b014 CMP %RAX,%R14 |
(1401) 0xc0b017 JGE c0afe8 |
(1401) 0xc0b019 MOV %R8,%R14 |
(1401) 0xc0b01c MOV -0x58(%RBP),%RAX |
(1401) 0xc0b020 MOV (%RAX),%R10 |
(1401) 0xc0b023 MOV 0x8(%RAX),%RAX |
(1401) 0xc0b027 SUB %R10,%RAX |
(1401) 0xc0b02a SHR $0x2,%RAX |
(1401) 0xc0b02e IMUL $-0x55555555,%EAX,%ECX |
(1401) 0xc0b034 MOV -0x50(%RBP),%RAX |
(1401) 0xc0b038 MOV (%RAX),%ESI |
(1401) 0xc0b03a INC %R8 |
(1401) 0xc0b03d MOV -0x60(%RBP),%RAX |
(1401) 0xc0b041 MOV (%RAX),%RDI |
(1401) 0xc0b044 MOV %R8D,%EAX |
(1401) 0xc0b047 IMUL %ECX,%EAX |
(1401) 0xc0b04a CLTD |
(1401) 0xc0b04b IDIV %ESI |
(1401) 0xc0b04d MOV %EAX,%R11D |
(1401) 0xc0b050 MOV %R14D,%EAX |
(1401) 0xc0b053 IMUL %ECX,%EAX |
(1401) 0xc0b056 CLTD |
(1401) 0xc0b057 IDIV %ESI |
(1401) 0xc0b059 MOV %EAX,%EDX |
(1401) 0xc0b05b MOV -0x68(%RBP),%RAX |
(1401) 0xc0b05f MOV (%RAX),%RBX |
(1401) 0xc0b062 MOV 0x60(%RBX),%RAX |
(1401) 0xc0b066 LEA (%R14,%R14,2),%RCX |
(1401) 0xc0b06a MOV (%RAX,%RCX,8),%R13 |
(1401) 0xc0b06e MOV 0x4(%RBX),%R15D |
(1401) 0xc0b072 MOV 0x48(%RBX),%R12 |
(1401) 0xc0b076 TEST %R15D,%R15D |
(1401) 0xc0b079 JLE c0b0b3 |
(1401) 0xc0b07b MOV %EDX,-0x38(%RBP) |
(1401) 0xc0b07e LEA (,%R15,4),%RDX |
(1401) 0xc0b086 MOV %RDI,-0x70(%RBP) |
(1401) 0xc0b08a MOV %R13,%RDI |
(1401) 0xc0b08d XOR %ESI,%ESI |
(1401) 0xc0b08f MOV %R8,-0x80(%RBP) |
(1401) 0xc0b093 MOV %R10,-0x78(%RBP) |
(1401) 0xc0b097 MOV %R11D,-0x3c(%RBP) |
(1401) 0xc0b09b CALL fa3cb0 <@plt_start@+0x800> |
(1401) 0xc0b0a0 MOV -0x70(%RBP),%RDI |
(1401) 0xc0b0a4 MOV -0x38(%RBP),%EDX |
(1401) 0xc0b0a7 MOV -0x3c(%RBP),%R11D |
(1401) 0xc0b0ab MOV -0x78(%RBP),%R10 |
(1401) 0xc0b0af MOV -0x80(%RBP),%R8 |
(1401) 0xc0b0b3 CMPL $0,(%RBX) |
(1401) 0xc0b0b6 JE c0b1a0 |
(1401) 0xc0b0bc CMP %R11D,%EDX |
(1401) 0xc0b0bf JGE c0b010 |
(1401) 0xc0b0c5 VMOVSS 0x10(%RDI),%XMM0 |
(1401) 0xc0b0ca VMOVSS 0x1c(%RDI),%XMM1 |
(1401) 0xc0b0cf VCVTSI2SS %R15D,%XMM6,%XMM2 |
(1401) 0xc0b0d4 LEA (%R15,%R15,1),%EAX |
(1401) 0xc0b0d8 VCVTSI2SS %EAX,%XMM6,%XMM3 |
(1401) 0xc0b0dc MOVSXD %EDX,%RDI |
(1401) 0xc0b0df MOV %R11D,%EAX |
(1401) 0xc0b0e2 SUB %EDX,%EAX |
(1401) 0xc0b0e4 MOV %RDI,%RCX |
(1401) 0xc0b0e7 TEST $0x1,%AL |
(1401) 0xc0b0e9 JE c0b11d |
(1401) 0xc0b0eb LEA (%RDI,%RDI,2),%RAX |
(1401) 0xc0b0ef VMULSS 0x8(%R10,%RAX,4),%XMM1,%XMM4 |
(1401) 0xc0b0f6 VFMADD231SS 0x4(%R10,%RAX,4),%XMM0,%XMM4 |
(1401) 0xc0b0fd VMULSS %XMM2,%XMM4,%XMM4 |
(1401) 0xc0b101 VADDSS %XMM3,%XMM4,%XMM4 |
(1401) 0xc0b105 VCVTTSS2SI %XMM4,%EAX |
(1401) 0xc0b109 CLTD |
(1401) 0xc0b10a IDIV %R15D |
(1401) 0xc0b10d MOV %EDX,(%R12,%RDI,4) |
(1401) 0xc0b111 MOVSXD %EDX,%RAX |
(1401) 0xc0b114 INCL (%R13,%RAX,4) |
(1401) 0xc0b119 LEA 0x1(%RDI),%RCX |
(1401) 0xc0b11d MOVSXD %R11D,%RSI |
(1401) 0xc0b120 NOT %RDI |
(1401) 0xc0b123 ADD %RSI,%RDI |
(1401) 0xc0b126 JE c0b010 |
(1401) 0xc0b12c LEA (%RCX,%RCX,2),%RAX |
(1401) 0xc0b130 LEA (%R10,%RAX,4),%RDI |
(1401) 0xc0b134 ADD $0x14,%RDI |
(1401) 0xc0b138 NOPL (%RAX,%RAX,1) |
(1403) 0xc0b140 VMULSS -0xc(%RDI),%XMM1,%XMM4 |
(1403) 0xc0b145 VFMADD231SS -0x10(%RDI),%XMM0,%XMM4 |
(1403) 0xc0b14b VMULSS %XMM2,%XMM4,%XMM4 |
(1403) 0xc0b14f VADDSS %XMM3,%XMM4,%XMM4 |
(1403) 0xc0b153 VCVTTSS2SI %XMM4,%EAX |
(1403) 0xc0b157 CLTD |
(1403) 0xc0b158 IDIV %R15D |
(1403) 0xc0b15b MOV %EDX,(%R12,%RCX,4) |
(1403) 0xc0b15f MOVSXD %EDX,%RAX |
(1403) 0xc0b162 INCL (%R13,%RAX,4) |
(1403) 0xc0b167 VMULSS (%RDI),%XMM1,%XMM4 |
(1403) 0xc0b16b VFMADD231SS -0x4(%RDI),%XMM0,%XMM4 |
(1403) 0xc0b171 VMULSS %XMM2,%XMM4,%XMM4 |
(1403) 0xc0b175 VADDSS %XMM3,%XMM4,%XMM4 |
(1403) 0xc0b179 VCVTTSS2SI %XMM4,%EAX |
(1403) 0xc0b17d CLTD |
(1403) 0xc0b17e IDIV %R15D |
(1403) 0xc0b181 MOVSXD %EDX,%RAX |
(1403) 0xc0b184 MOV %EDX,0x4(%R12,%RCX,4) |
(1403) 0xc0b189 INCL (%R13,%RAX,4) |
(1403) 0xc0b18e ADD $0x2,%RCX |
(1403) 0xc0b192 ADD $0x18,%RDI |
(1403) 0xc0b196 CMP %RCX,%RSI |
(1403) 0xc0b199 JNE c0b140 |
(1401) 0xc0b19b JMP c0b010 |
(1401) 0xc0b1a0 CMP %R11D,%EDX |
(1401) 0xc0b1a3 JGE c0b010 |
(1401) 0xc0b1a9 VMOVSS (%RDI),%XMM0 |
(1401) 0xc0b1ad VMOVSS 0xc(%RDI),%XMM1 |
(1401) 0xc0b1b2 VMOVSS 0x18(%RDI),%XMM2 |
(1401) 0xc0b1b7 VCVTSI2SS %R15D,%XMM6,%XMM3 |
(1401) 0xc0b1bc LEA (%R15,%R15,1),%EAX |
(1401) 0xc0b1c0 VCVTSI2SS %EAX,%XMM6,%XMM4 |
(1401) 0xc0b1c4 MOVSXD %EDX,%RDI |
(1401) 0xc0b1c7 MOV %R11D,%EAX |
(1401) 0xc0b1ca SUB %EDX,%EAX |
(1401) 0xc0b1cc MOV %RDI,%RCX |
(1401) 0xc0b1cf TEST $0x1,%AL |
(1401) 0xc0b1d1 JE c0b20b |
(1401) 0xc0b1d3 LEA (%RDI,%RDI,2),%RAX |
(1401) 0xc0b1d7 VMULSS 0x4(%R10,%RAX,4),%XMM1,%XMM5 |
(1401) 0xc0b1de VFMADD231SS (%R10,%RAX,4),%XMM0,%XMM5 |
(1401) 0xc0b1e4 VFMADD231SS 0x8(%R10,%RAX,4),%XMM2,%XMM5 |
(1401) 0xc0b1eb VMULSS %XMM3,%XMM5,%XMM5 |
(1401) 0xc0b1ef VADDSS %XMM4,%XMM5,%XMM5 |
(1401) 0xc0b1f3 VCVTTSS2SI %XMM5,%EAX |
(1401) 0xc0b1f7 CLTD |
(1401) 0xc0b1f8 IDIV %R15D |
(1401) 0xc0b1fb MOV %EDX,(%R12,%RDI,4) |
(1401) 0xc0b1ff MOVSXD %EDX,%RAX |
(1401) 0xc0b202 INCL (%R13,%RAX,4) |
(1401) 0xc0b207 LEA 0x1(%RDI),%RCX |
(1401) 0xc0b20b MOVSXD %R11D,%RSI |
(1401) 0xc0b20e NOT %RDI |
(1401) 0xc0b211 ADD %RSI,%RDI |
(1401) 0xc0b214 JE c0b010 |
(1401) 0xc0b21a LEA (%RCX,%RCX,2),%RAX |
(1401) 0xc0b21e LEA (%R10,%RAX,4),%RDI |
(1401) 0xc0b222 ADD $0x14,%RDI |
(1401) 0xc0b226 NOPW %CS:(%RAX,%RAX,1) |
(1402) 0xc0b230 VMULSS -0x10(%RDI),%XMM1,%XMM5 |
(1402) 0xc0b235 VFMADD231SS -0x14(%RDI),%XMM0,%XMM5 |
(1402) 0xc0b23b VFMADD231SS -0xc(%RDI),%XMM2,%XMM5 |
(1402) 0xc0b241 VMULSS %XMM3,%XMM5,%XMM5 |
(1402) 0xc0b245 VADDSS %XMM4,%XMM5,%XMM5 |
(1402) 0xc0b249 VCVTTSS2SI %XMM5,%EAX |
(1402) 0xc0b24d CLTD |
(1402) 0xc0b24e IDIV %R15D |
(1402) 0xc0b251 MOV %EDX,(%R12,%RCX,4) |
(1402) 0xc0b255 MOVSXD %EDX,%RAX |
(1402) 0xc0b258 INCL (%R13,%RAX,4) |
(1402) 0xc0b25d VMULSS -0x4(%RDI),%XMM1,%XMM5 |
(1402) 0xc0b262 VFMADD231SS -0x8(%RDI),%XMM0,%XMM5 |
(1402) 0xc0b268 VFMADD231SS (%RDI),%XMM2,%XMM5 |
(1402) 0xc0b26d VMULSS %XMM3,%XMM5,%XMM5 |
(1402) 0xc0b271 VADDSS %XMM4,%XMM5,%XMM5 |
(1402) 0xc0b275 VCVTTSS2SI %XMM5,%EAX |
(1402) 0xc0b279 CLTD |
(1402) 0xc0b27a IDIV %R15D |
(1402) 0xc0b27d MOVSXD %EDX,%RAX |
(1402) 0xc0b280 MOV %EDX,0x4(%R12,%RCX,4) |
(1402) 0xc0b285 INCL (%R13,%RAX,4) |
(1402) 0xc0b28a ADD $0x2,%RCX |
(1402) 0xc0b28e ADD $0x18,%RDI |
(1402) 0xc0b292 CMP %RCX,%RSI |
(1402) 0xc0b295 JNE c0b230 |
(1401) 0xc0b297 JMP c0b010 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►78.60+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.26+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►9.14+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►86.30+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►8.52+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►5.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►55.82+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►28.77+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►15.41+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►70.64+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►18.48+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►10.68+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►44.16+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►36.36+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►19.30+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.92+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | do_redist_pos_coeffs(gmx_pme_t[...] | pme_redistribute.cpp:128 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1230 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_redistribute.cpp:81-138 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 52 |
nb uops | 54 |
loop length | 166 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 9.00 cycles |
front end | 9.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 2.50 | 5.67 | 5.67 | 5.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 2.50 | 5.67 | 5.67 | 5.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.00 |
Dispatch | 5.67 |
Overall L1 | 9.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 7% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c0aff7 <.omp_outlined.+0x97> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
DEC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0x1,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x44(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x3d3a6e(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL fa67e0 <@plt_start@+0x3330> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
CMOVL %EAX,%EBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP %EBX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c0b019 <.omp_outlined.+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x3d3a49(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
CALL fa67f0 <@plt_start@+0x3340> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_redistribute.cpp:81-138 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 52 |
nb uops | 54 |
loop length | 166 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 9.00 cycles |
front end | 9.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 2.50 | 5.67 | 5.67 | 5.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 2.50 | 5.67 | 5.67 | 5.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.00 |
Dispatch | 5.67 |
Overall L1 | 9.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 7% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c0aff7 <.omp_outlined.+0x97> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
DEC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0x1,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x44(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x3d3a6e(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL fa67e0 <@plt_start@+0x3330> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
CMOVL %EAX,%EBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP %EBX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c0b019 <.omp_outlined.+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x3d3a49(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
CALL fa67f0 <@plt_start@+0x3340> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | ||||||
2x1 | 2 | 1 | 1 | 2 | 1.2849998474121 | 0.27467241883278 |
4x1 | 4 | 1 | 1 | 4 | 0.83000004291534 | 0.24902287125587 |
8x1 | 8 | 1 | 1 | 8 | 0.35999998450279 | 0.20770587027073 |
16x1 | 16 | 1 | 1 | 16 | 0.23499996960163 | 0.20495128631592 |
32x1 | 12 | 1 | 1 | 32 | 0.3999999165535 | 0.17775659263134 |
64x1 | 24 | 1 | 1 | 64 | 0.19999997317791 | 0.096059374511242 |
128x1 | 44 | 1 | 1 | 128 | 0.18999996781349 | 0.25973334908485 |
192x1 | 64 | 1 | 1 | 192 | 0.15999998152256 | 0.24060502648354 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined.#0xc0af60– | 0.24 | 0.03 |
▼Loop 1401 - pme_redistribute.cpp:81-138 - libgromacs_mpi.so.9.0.0– | 0.00 | 0.01 |
○Loop 1402 - pme_redistribute.cpp:96-103 - libgromacs_mpi.so.9.0.0 | 0.12 | 0.05 |
○Loop 1403 - pme_redistribute.cpp:111-118 - libgromacs_mpi.so.9.0.0 | 0.12 | 0.05 |