Function: int make_bondeds_zone<true>(gmx_reverse_top_t const&, gmx::ArrayRef<int const>, gmx_ga2la_ ... | Module: libgromacs_mpi.so.9.0.0 | Source: localtopology.cpp:104-716 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
---|
Function: int make_bondeds_zone<true>(gmx_reverse_top_t const&, gmx::ArrayRef<int const>, gmx_ga2la_ ... | Module: libgromacs_mpi.so.9.0.0 | Source: localtopology.cpp:104-716 [...] | Coverage (incl. loops): NA% | (excl. loops): NA% |
---|
/home/eoseret/gromacs-2024.2/src/gromacs/domdec/localtopology.cpp: 104 - 716 |
-------------------------------------------------------------------------------- |
104: [globalAtomIndex](const MolblockIndices& mbi) { return mbi.a_end <= globalAtomIndex; }); |
105: |
106: AtomInMolblock aim; |
107: |
108: aim.molblockIndex = std::distance(molblockIndices.begin(), molblockIt); |
109: aim.moleculeType = molblockIt->type; |
110: aim.moleculeIndex = (globalAtomIndex - molblockIt->a_start) / molblockIt->natoms_mol; |
111: aim.atomIndexInMolecule = |
112: (globalAtomIndex - molblockIt->a_start) - (aim.moleculeIndex) * molblockIt->natoms_mol; |
[...] |
653: { |
654: const auto ddBondedChecking = rt.options().ddBondedChecking_; |
655: |
656: int numBondedInteractions = 0; |
657: |
658: for (int atomIndexLocal : atomRange) |
659: { |
660: /* Get the global atom number */ |
661: const int atomIndexGlobal = globalAtomIndices[atomIndexLocal]; |
662: const auto aim = atomInMolblockFromGlobalAtomnr(rt.molblockIndices(), atomIndexGlobal); |
663: |
664: const AtomIndexSet atomIndexMol = { atomIndexLocal, atomIndexGlobal, aim.atomIndexInMolecule }; |
665: const auto& ilistMol = rt.interactionListForMoleculeType(aim.moleculeType); |
666: numBondedInteractions += assignInteractionsForAtom<haveSingleDomain>(atomIndexMol, |
[...] |
681: if (izone == 0 && rt.hasPositionRestraints()) |
682: { |
683: numBondedInteractions += |
684: assignPositionRestraintsForAtom(atomIndexMol, |
685: aim.moleculeIndex, |
686: ilistMol.numAtomsInMolecule, |
687: rt.interactionListForMoleculeType(aim.moleculeType), |
688: molb[aim.molblockIndex], |
689: ip_in, |
690: idef); |
691: } |
692: |
693: if (rt.hasIntermolecularInteractions()) |
694: { |
695: /* Check all intermolecular interactions assigned to this atom. |
696: * Note that we will index the intermolecular reverse ilist with atomIndexGlobal. |
697: */ |
698: const AtomIndexSet atomIndexIntermol = { atomIndexLocal, atomIndexGlobal, atomIndexGlobal }; |
699: numBondedInteractions += assignInteractionsForAtom<haveSingleDomain>( |
700: atomIndexIntermol, |
701: rt.interactionListForIntermolecularInteractions(), |
[...] |
716: return numBondedInteractions; |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/utility/arrayref.h: 82 - 85 |
-------------------------------------------------------------------------------- |
82: it_ += i; |
83: return *this; |
84: } |
85: constexpr auto operator-(ArrayRefIter other) const noexcept { return it_ - other.it_; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algo.h: 568 - 573 |
-------------------------------------------------------------------------------- |
568: while (__len > 0) |
569: { |
570: _DistanceType __half = __len >> 1; |
571: _ForwardIterator __middle = __first; |
572: std::advance(__middle, __half); |
573: if (__pred(*__middle)) |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/utility/range.h: 84 - 114 |
-------------------------------------------------------------------------------- |
84: bool operator!=(const iterator other) { return value_ != other.value_; } |
85: //! Increment operator |
86: iterator& operator++() |
87: { |
88: ++value_; |
[...] |
112: iterator begin() const { return begin_; } |
113: //! End iterator/value |
114: iterator end() const { return end_; } |
0x6ac3a0 PUSH %RBP |
0x6ac3a1 MOV %RSP,%RBP |
0x6ac3a4 PUSH %R15 |
0x6ac3a6 PUSH %R14 |
0x6ac3a8 PUSH %R13 |
0x6ac3aa PUSH %R12 |
0x6ac3ac PUSH %RBX |
0x6ac3ad SUB $0x78,%RSP |
0x6ac3b1 MOV %R9,-0x80(%RBP) |
0x6ac3b5 MOV %RCX,-0x68(%RBP) |
0x6ac3b9 MOV %RSI,-0x88(%RBP) |
0x6ac3c0 MOV %RDI,%R13 |
0x6ac3c3 MOV 0x58(%RBP),%RBX |
0x6ac3c7 CALL fa9260 <@plt_start@+0x5db0> |
0x6ac3cc MOVSXD (%RBX),%R14 |
0x6ac3cf MOV 0x4(%RBX),%ECX |
0x6ac3d2 XOR %EBX,%EBX |
0x6ac3d4 MOV %ECX,-0x44(%RBP) |
0x6ac3d7 CMP %ECX,%R14D |
0x6ac3da JNE 6ac3ed |
0x6ac3dc MOV %EBX,%EAX |
0x6ac3de ADD $0x78,%RSP |
0x6ac3e2 POP %RBX |
0x6ac3e3 POP %R12 |
0x6ac3e5 POP %R13 |
0x6ac3e7 POP %R14 |
0x6ac3e9 POP %R15 |
0x6ac3eb POP %RBP |
0x6ac3ec RET |
0x6ac3ed MOVZX (%RAX),%EAX |
0x6ac3f0 MOV %AL,-0x29(%RBP) |
0x6ac3f3 XOR %R12D,%R12D |
0x6ac3f6 MOV %R13,-0x38(%RBP) |
0x6ac3fa JMP 6ac40c |
0x6ac3fc NOPL (%RAX) |
(3315) 0x6ac400 INC %R14 |
(3315) 0x6ac403 MOV %EBX,%R12D |
(3315) 0x6ac406 CMP %R14D,-0x44(%RBP) |
(3315) 0x6ac40a JE 6ac3dc |
(3315) 0x6ac40c MOV -0x88(%RBP),%RAX |
(3315) 0x6ac413 MOV (%RAX,%R14,4),%R15D |
(3315) 0x6ac417 MOV %R13,%RDI |
(3315) 0x6ac41a CALL fa9270 <@plt_start@+0x5dc0> |
(3315) 0x6ac41f SUB %RAX,%RDX |
(3315) 0x6ac422 TEST %RDX,%RDX |
(3315) 0x6ac425 MOV %R12D,-0x30(%RBP) |
(3315) 0x6ac429 MOV %RAX,-0x90(%RBP) |
(3315) 0x6ac430 JLE 6ac470 |
(3315) 0x6ac432 SHR $0x4,%RDX |
(3315) 0x6ac436 MOV %RAX,%R12 |
(3315) 0x6ac439 MOV %RDX,%RAX |
(3315) 0x6ac43c JMP 6ac448 |
0x6ac43e XCHG %AX,%AX |
(3316) 0x6ac440 MOV %RAX,%RDX |
(3316) 0x6ac443 TEST %RAX,%RAX |
(3316) 0x6ac446 JLE 6ac473 |
(3316) 0x6ac448 SHR $0x1,%RAX |
(3316) 0x6ac44b MOV %RAX,%RCX |
(3316) 0x6ac44e SAL $0x4,%RCX |
(3316) 0x6ac452 CMP %R15D,0x4(%R12,%RCX,1) |
(3316) 0x6ac457 JG 6ac440 |
(3316) 0x6ac459 NOT %RAX |
(3316) 0x6ac45c ADD %RDX,%RAX |
(3316) 0x6ac45f ADD %RCX,%R12 |
(3316) 0x6ac462 ADD $0x10,%R12 |
(3316) 0x6ac466 JMP 6ac440 |
0x6ac468 NOPL (%RAX,%RAX,1) |
(3315) 0x6ac470 MOV %RAX,%R12 |
(3315) 0x6ac473 MOV 0xc(%R12),%ESI |
(3315) 0x6ac478 MOV (%R12),%ECX |
(3315) 0x6ac47c MOV 0x8(%R12),%EDI |
(3315) 0x6ac481 MOV %R15D,%EAX |
(3315) 0x6ac484 SUB %ECX,%EAX |
(3315) 0x6ac486 CLTD |
(3315) 0x6ac487 IDIV %EDI |
(3315) 0x6ac489 MOV %EAX,-0x48(%RBP) |
(3315) 0x6ac48c IMUL %EAX,%EDI |
(3315) 0x6ac48f ADD %ECX,%EDI |
(3315) 0x6ac491 MOV %R15D,%EAX |
(3315) 0x6ac494 SUB %EDI,%EAX |
(3315) 0x6ac496 MOV %R14,-0x40(%RBP) |
(3315) 0x6ac49a MOV %R14D,-0x60(%RBP) |
(3315) 0x6ac49e MOV %R15D,-0x5c(%RBP) |
(3315) 0x6ac4a2 MOV %EAX,-0x58(%RBP) |
(3315) 0x6ac4a5 MOV %R13,%RDI |
(3315) 0x6ac4a8 MOV %ESI,-0x4c(%RBP) |
(3315) 0x6ac4ab CALL fa9280 <@plt_start@+0x5dd0> |
(3315) 0x6ac4b0 MOV %RAX,%R13 |
(3315) 0x6ac4b3 MOVZX -0x29(%RBP),%R9D |
(3315) 0x6ac4b8 LEA -0x60(%RBP),%RDI |
(3315) 0x6ac4bc MOV %RAX,%RSI |
(3315) 0x6ac4bf MOV -0x68(%RBP),%RDX |
(3315) 0x6ac4c3 MOV 0x48(%RBP),%RCX |
(3315) 0x6ac4c7 MOV 0x50(%RBP),%R14D |
(3315) 0x6ac4cb MOV %R14D,%R8D |
(3315) 0x6ac4ce MOV %R9D,-0x50(%RBP) |
(3315) 0x6ac4d2 CALL 6ad8b0 <_ZL25assignInteractionsForAtomILb1EEiRK12AtomIndexSetRK15reverse_ilist_tRK11gmx_ga2la_tRK18gmx_domdec_zones_tbPKibfPK5t_pbcN3gmx8ArrayRefIKNSH_11BasicVectorIfEEEEP22InteractionDefinitionsiNSH_16DDBondedCheckingE> |
(3315) 0x6ac4d7 MOV %EAX,%EBX |
(3315) 0x6ac4d9 ADD -0x30(%RBP),%EBX |
(3315) 0x6ac4dc TEST %R14D,%R14D |
(3315) 0x6ac4df JNE 6ac550 |
(3315) 0x6ac4e1 MOV -0x4c(%RBP),%R14D |
(3315) 0x6ac4e5 MOV -0x38(%RBP),%RDI |
(3315) 0x6ac4e9 CALL fa9290 <@plt_start@+0x5de0> |
(3315) 0x6ac4ee TEST %AL,%AL |
(3315) 0x6ac4f0 JE 6ac550 |
(3315) 0x6ac4f2 SUB -0x90(%RBP),%R12 |
(3315) 0x6ac4f9 MOV 0x30(%R13),%EAX |
(3315) 0x6ac4fd MOV %EAX,-0x30(%RBP) |
(3315) 0x6ac500 MOV -0x38(%RBP),%R13 |
(3315) 0x6ac504 MOV %R13,%RDI |
(3315) 0x6ac507 MOV %R14D,%ESI |
(3315) 0x6ac50a CALL fa9280 <@plt_start@+0x5dd0> |
(3315) 0x6ac50f SHR $0x4,%R12 |
(3315) 0x6ac513 MOVSXD %R12D,%RCX |
(3315) 0x6ac516 IMUL $0x38,%RCX,%R8 |
(3315) 0x6ac51a MOV -0x80(%RBP),%RCX |
(3315) 0x6ac51e ADD (%RCX),%R8 |
(3315) 0x6ac521 MOV 0x48(%RBP),%RCX |
(3315) 0x6ac525 MOV %RCX,(%RSP) |
(3315) 0x6ac529 LEA -0x60(%RBP),%RDI |
(3315) 0x6ac52d MOV -0x48(%RBP),%ESI |
(3315) 0x6ac530 MOV -0x30(%RBP),%EDX |
(3315) 0x6ac533 MOV %RAX,%RCX |
(3315) 0x6ac536 MOV 0x40(%RBP),%R9 |
(3315) 0x6ac53a CALL 6ade70 <_ZL31assignPositionRestraintsForAtomRK12AtomIndexSetiiRK15reverse_ilist_tRK14gmx_molblock_tPK9t_iparamsP22InteractionDefinitions> |
(3315) 0x6ac53f ADD %EAX,%EBX |
(3315) 0x6ac541 JMP 6ac554 |
0x6ac543 NOPW %CS:(%RAX,%RAX,1) |
(3315) 0x6ac550 MOV -0x38(%RBP),%R13 |
(3315) 0x6ac554 MOV -0x40(%RBP),%R14 |
(3315) 0x6ac558 MOV %R13,%RDI |
(3315) 0x6ac55b CALL fa92a0 <@plt_start@+0x5df0> |
(3315) 0x6ac560 TEST %AL,%AL |
(3315) 0x6ac562 JE 6ac400 |
(3315) 0x6ac568 MOV %R14D,-0x78(%RBP) |
(3315) 0x6ac56c MOV %R15D,-0x74(%RBP) |
(3315) 0x6ac570 MOV %R15D,-0x70(%RBP) |
(3315) 0x6ac574 MOV %R13,%RDI |
(3315) 0x6ac577 CALL fa92b0 <@plt_start@+0x5e00> |
(3315) 0x6ac57c LEA -0x78(%RBP),%RDI |
(3315) 0x6ac580 MOV %RAX,%RSI |
(3315) 0x6ac583 MOV -0x68(%RBP),%RDX |
(3315) 0x6ac587 MOV 0x48(%RBP),%RCX |
(3315) 0x6ac58b MOV 0x50(%RBP),%R8D |
(3315) 0x6ac58f MOV -0x50(%RBP),%R9D |
(3315) 0x6ac593 CALL 6ad8b0 <_ZL25assignInteractionsForAtomILb1EEiRK12AtomIndexSetRK15reverse_ilist_tRK11gmx_ga2la_tRK18gmx_domdec_zones_tbPKibfPK5t_pbcN3gmx8ArrayRefIKNSH_11BasicVectorIfEEEEP22InteractionDefinitionsiNSH_16DDBondedCheckingE> |
(3315) 0x6ac598 ADD %EAX,%EBX |
(3315) 0x6ac59a JMP 6ac400 |
0x6ac59f NOP |
Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | localtopology.cpp:104-716 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 39 |
nb uops | 35 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 5.83 cycles |
front end | 5.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 1.75 | 1.75 | 1.50 | 2.00 | 3.67 | 3.67 | 3.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 2.00 | 1.75 | 1.75 | 1.50 | 2.00 | 3.67 | 3.67 | 3.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 5.83 |
Dispatch | 3.67 |
Overall L1 | 5.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 9% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CALL fa9260 <@plt_start@+0x5db0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD (%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV 0x4(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %ECX,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
CMP %ECX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JNE 6ac3ed <_ZL17make_bondeds_zoneILb1EEiRK17gmx_reverse_top_tN3gmx8ArrayRefIKiEERK11gmx_ga2la_tRK18gmx_domdec_zones_tRKSt6vectorI14gmx_molblock_tSaISE_EEbPS5_bfPK5t_pbcNS4_IKNS3_11BasicVectorIfEEEEPK9t_iparamsP22InteractionDefinitionsiRKNS3_5RangeIiEE+0x4d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOVZX (%RAX),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %AL,-0x29(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | N/A |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 6ac40c <_ZL17make_bondeds_zoneILb1EEiRK17gmx_reverse_top_tN3gmx8ArrayRefIKiEERK11gmx_ga2la_tRK18gmx_domdec_zones_tRKSt6vectorI14gmx_molblock_tSaISE_EEbPS5_bfPK5t_pbcNS4_IKNS3_11BasicVectorIfEEEEPK9t_iparamsP22InteractionDefinitionsiRKNS3_5RangeIiEE+0x6c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | localtopology.cpp:104-716 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 39 |
nb uops | 35 |
loop length | 120 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 5.83 cycles |
front end | 5.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 1.75 | 1.75 | 1.50 | 2.00 | 3.67 | 3.67 | 3.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 2.00 | 1.75 | 1.75 | 1.50 | 2.00 | 3.67 | 3.67 | 3.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 5.83 |
Dispatch | 3.67 |
Overall L1 | 5.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 9% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CALL fa9260 <@plt_start@+0x5db0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD (%RBX),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV 0x4(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %ECX,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
CMP %ECX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JNE 6ac3ed <_ZL17make_bondeds_zoneILb1EEiRK17gmx_reverse_top_tN3gmx8ArrayRefIKiEERK11gmx_ga2la_tRK18gmx_domdec_zones_tRKSt6vectorI14gmx_molblock_tSaISE_EEbPS5_bfPK5t_pbcNS4_IKNS3_11BasicVectorIfEEEEPK9t_iparamsP22InteractionDefinitionsiRKNS3_5RangeIiEE+0x4d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOVZX (%RAX),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %AL,-0x29(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | N/A |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 6ac40c <_ZL17make_bondeds_zoneILb1EEiRK17gmx_reverse_top_tN3gmx8ArrayRefIKiEERK11gmx_ga2la_tRK18gmx_domdec_zones_tRKSt6vectorI14gmx_molblock_tSaISE_EEbPS5_bfPK5t_pbcNS4_IKNS3_11BasicVectorIfEEEEPK9t_iparamsP22InteractionDefinitionsiRKNS3_5RangeIiEE+0x6c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 0.099999986588955 | 0.011996093206108 |
2x1 | ||||||
4x1 | ||||||
8x1 | ||||||
16x1 | ||||||
32x1 | ||||||
64x1 | ||||||
128x1 | ||||||
192x1 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼int make_bondeds_zone | 0.00 | 0.00 |
▼Loop 3315 - localtopology.cpp:104-701 - libgromacs_mpi.so.9.0.0– | 0.00 | 0.00 |
○Loop 3316 - stl_algo.h:568-573 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |