Function: .omp_outlined..4 | Module: libgromacs_mpi.so.9.0.0 | Source: md_support.cpp:147-219 [...] | Coverage (incl. loops): 0.04% | (excl. loops): 0.00% |
---|
Function: .omp_outlined..4 | Module: libgromacs_mpi.so.9.0.0 | Source: md_support.cpp:147-219 [...] | Coverage (incl. loops): 0.04% | (excl. loops): 0.00% |
---|
/home/eoseret/gromacs-2024.2/src/gromacs/mdlib/md_support.cpp: 147 - 219 |
-------------------------------------------------------------------------------- |
147: #pragma omp parallel for num_threads(nthread) schedule(static) |
148: for (int thread = 0; thread < nthread; thread++) |
[...] |
160: start_t = ((thread + 0) * md->homenr) / nthread; |
161: end_t = ((thread + 1) * md->homenr) / nthread; |
162: |
163: ekin_sum = ekind->ekin_work[thread]; |
164: dekindl_sum = ekind->dekindl_work[thread]; |
165: |
166: for (gt = 0; gt < opts->ngtc; gt++) |
167: { |
168: clear_mat(ekin_sum[gt]); |
169: } |
170: *dekindl_sum = 0.0; |
[...] |
181: for (n = start_t; n < end_t; n++) |
182: { |
183: if (!md->cTC.empty()) |
184: { |
185: gt = md->cTC[n]; |
186: } |
187: hm = 0.5 * md->massT[n]; |
188: |
189: gmx::RVec vn = v[n]; |
[...] |
209: ekin_sum[gt][m][d] += hm * vn[m] * vn[d]; |
[...] |
217: if (md->nMassPerturbed && md->bPerturbed[n]) |
218: { |
219: *dekindl_sum += 0.5 * (md->massB[n] - md->massA[n]) * iprod(vn, vn); |
/home/eoseret/gromacs-2024.2/src/gromacs/ewald/pme_spread.cpp: 538 - 1023 |
-------------------------------------------------------------------------------- |
538: for (sx = 0; sx >= -pmegrids->nthread_comm[XX]; sx--) |
539: { |
540: fx = pmegrid->ci[XX] + sx; |
541: ox = 0; |
542: bCommX = FALSE; |
543: if (fx < 0) |
[...] |
554: tx1 = std::min(ox + pmegrid_g->n[XX], !bCommX ? localcopy_end[XX] : commcopy_end[XX]); |
[...] |
561: if (fy < 0) |
[...] |
633: for (x = offx; x < tx1; x++) |
[...] |
663: commbuf += buf_my * fft_nx * fft_nz; |
[...] |
683: for (x = offx; x < tx1; x++) |
[...] |
693: for (z = offz; z < tz1; z++) |
[...] |
700: for (z = offz; z < tz1; z++) |
[...] |
1022: } |
1023: GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR |
/home/eoseret/gromacs-2024.2/src/gromacs/utility/include/gromacs/utility/booltype.h: 70 - 70 |
-------------------------------------------------------------------------------- |
70: constexpr operator bool() const { return value_; } |
/home/eoseret/gromacs-2024.2/src/gromacs/mdlib/vsite.cpp: 1193 - 1214 |
-------------------------------------------------------------------------------- |
1193: #pragma omp parallel num_threads(threadingInfo->numThreads()) |
[...] |
1213: } |
1214: GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/utility/arrayref.h: 82 - 82 |
-------------------------------------------------------------------------------- |
82: it_ += i; |
/home/eoseret/gromacs-2024.2/src/external/boost/stl_interfaces/iterator_interface.hpp: 305 - 305 |
-------------------------------------------------------------------------------- |
305: D retval = derived(); |
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/math/vec.h: 328 - 335 |
-------------------------------------------------------------------------------- |
328: a[XX][XX] = a[XX][YY] = a[XX][ZZ] = nul; |
[...] |
335: return (a[XX] * b[XX] + a[YY] * b[YY] + a[ZZ] * b[ZZ]); |
0xb08370 PUSH %RBP |
0xb08371 MOV %RSP,%RBP |
0xb08374 PUSH %R15 |
0xb08376 PUSH %R14 |
0xb08378 PUSH %R13 |
0xb0837a PUSH %R12 |
0xb0837c PUSH %RBX |
0xb0837d SUB $0x68,%RSP |
0xb08381 MOV (%RDX),%EBX |
0xb08383 TEST %EBX,%EBX |
0xb08385 JLE b08405 |
0xb08387 MOV %R9,%R14 |
0xb0838a MOV %R8,%R15 |
0xb0838d MOV %RCX,%R13 |
0xb08390 MOV %RDX,%R12 |
0xb08393 DEC %EBX |
0xb08395 MOVL $0,-0x30(%RBP) |
0xb0839c MOV %EBX,-0x2c(%RBP) |
0xb0839f MOVL $0x1,-0x44(%RBP) |
0xb083a6 MOVL $0,-0x40(%RBP) |
0xb083ad MOV (%RDI),%ESI |
0xb083af SUB $0x8,%RSP |
0xb083b3 LEA -0x44(%RBP),%RAX |
0xb083b7 LEA 0x4d23b2(%RIP),%RDI |
0xb083be LEA -0x40(%RBP),%RCX |
0xb083c2 LEA -0x30(%RBP),%R8 |
0xb083c6 LEA -0x2c(%RBP),%R9 |
0xb083ca MOV %ESI,-0x34(%RBP) |
0xb083cd MOV $0x22,%EDX |
0xb083d2 PUSH $0x1 |
0xb083d4 PUSH $0x1 |
0xb083d6 PUSH %RAX |
0xb083d7 CALL fa67e0 <@plt_start@+0x3330> |
0xb083dc ADD $0x20,%RSP |
0xb083e0 MOV -0x2c(%RBP),%EAX |
0xb083e3 CMP %EBX,%EAX |
0xb083e5 CMOVL %EAX,%EBX |
0xb083e8 MOV %EBX,-0x2c(%RBP) |
0xb083eb MOVSXD -0x30(%RBP),%RCX |
0xb083ef CMP %EBX,%ECX |
0xb083f1 JLE b08414 |
0xb083f3 LEA 0x4d238e(%RIP),%RDI |
0xb083fa MOV -0x34(%RBP),%ESI |
0xb083fd VZEROUPPER |
0xb08400 CALL fa67f0 <@plt_start@+0x3340> |
0xb08405 ADD $0x68,%RSP |
0xb08409 POP %RBX |
0xb0840a POP %R12 |
0xb0840c POP %R13 |
0xb0840e POP %R14 |
0xb08410 POP %R15 |
0xb08412 POP %RBP |
0xb08413 RET |
0xb08414 MOV 0x10(%RBP),%R11 |
0xb08418 MOV %R14,%RSI |
0xb0841b MOV (%R13),%R14 |
0xb0841f MOV 0x280(%R14),%EAX |
0xb08426 MOV %EAX,-0x3c(%RBP) |
0xb08429 MOV (%R12),%EAX |
0xb0842d MOV %EAX,-0x38(%RBP) |
0xb08430 MOV (%R15),%RAX |
0xb08433 MOV 0x40(%RAX),%RDX |
0xb08437 MOV %RDX,-0x78(%RBP) |
0xb0843b MOV 0x48(%RAX),%RAX |
0xb0843f MOV %RAX,-0x70(%RBP) |
0xb08443 MOV (%RSI),%RAX |
0xb08446 MOV (%RAX),%EAX |
0xb08448 MOVSXD %EBX,%RDX |
0xb0844b MOV %RDX,-0x60(%RBP) |
0xb0844f MOV %RAX,-0x68(%RBP) |
0xb08453 SAL $0x2,%RAX |
0xb08457 LEA (%RAX,%RAX,8),%RAX |
0xb0845b MOV %RAX,-0x58(%RBP) |
0xb0845f VMOVSS -0x7abfb7(%RIP),%XMM8 |
0xb08467 VMOVAPS -0x7a7ecf(%RIP),%YMM10 |
0xb0846f VMOVAPS -0x79edb7(%RIP),%YMM11 |
0xb08477 VMOVSD -0x7ab31f(%RIP),%XMM12 |
0xb0847f MOV %R14,-0x50(%RBP) |
0xb08483 JMP b084a1 |
0xb08485 NOPW %CS:(%RAX,%RAX,1) |
(14999) 0xb08490 CMP -0x60(%RBP),%R12 |
(14999) 0xb08494 MOV -0x88(%RBP),%RCX |
(14999) 0xb0849b JGE b083f3 |
(14999) 0xb084a1 MOV %RCX,%R12 |
(14999) 0xb084a4 MOV -0x3c(%RBP),%ECX |
(14999) 0xb084a7 MOV %ECX,%EAX |
(14999) 0xb084a9 IMUL %R12D,%EAX |
(14999) 0xb084ad CLTD |
(14999) 0xb084ae MOV -0x38(%RBP),%ESI |
(14999) 0xb084b1 IDIV %ESI |
(14999) 0xb084b3 MOV %EAX,%R8D |
(14999) 0xb084b6 LEA 0x1(%R12),%RAX |
(14999) 0xb084bb MOV %RAX,-0x88(%RBP) |
(14999) 0xb084c2 IMUL %ECX,%EAX |
(14999) 0xb084c5 CLTD |
(14999) 0xb084c6 IDIV %ESI |
(14999) 0xb084c8 MOV %EAX,%R13D |
(14999) 0xb084cb MOV -0x78(%RBP),%RAX |
(14999) 0xb084cf MOV (%RAX,%R12,8),%R15 |
(14999) 0xb084d3 MOV -0x70(%RBP),%RAX |
(14999) 0xb084d7 MOV (%RAX,%R12,8),%RBX |
(14999) 0xb084db CMPL $0,-0x68(%RBP) |
(14999) 0xb084df JLE b0852a |
(14999) 0xb084e1 MOV %R15,%RDI |
(14999) 0xb084e4 XOR %ESI,%ESI |
(14999) 0xb084e6 MOV -0x58(%RBP),%RDX |
(14999) 0xb084ea MOV %R12,-0x80(%RBP) |
(14999) 0xb084ee MOV %R11,%R12 |
(14999) 0xb084f1 MOV %R8D,%R14D |
(14999) 0xb084f4 VZEROUPPER |
(14999) 0xb084f7 CALL fa3cb0 <@plt_start@+0x800> |
(14999) 0xb084fc MOV %R14D,%R8D |
(14999) 0xb084ff VMOVSD -0x7ab3a7(%RIP),%XMM12 |
(14999) 0xb08507 VMOVAPS -0x79ee4f(%RIP),%YMM11 |
(14999) 0xb0850f VMOVAPS -0x7a7f77(%RIP),%YMM10 |
(14999) 0xb08517 VMOVSS -0x7ac06f(%RIP),%XMM8 |
(14999) 0xb0851f MOV %R12,%R11 |
(14999) 0xb08522 MOV -0x80(%RBP),%R12 |
(14999) 0xb08526 MOV -0x50(%RBP),%R14 |
(14999) 0xb0852a MOVL $0,(%RBX) |
(14999) 0xb08530 CMP %R13D,%R8D |
(14999) 0xb08533 JGE b08490 |
(14999) 0xb08539 MOV 0x1c0(%R14),%RSI |
(14999) 0xb08540 MOV 0x60(%R14),%RAX |
(14999) 0xb08544 MOV 0x1c(%R14),%EDI |
(14999) 0xb08548 MOVSXD %R8D,%RDX |
(14999) 0xb0854b MOVSXD %R13D,%RCX |
(14999) 0xb0854e CMP 0x1c8(%R14),%RSI |
(14999) 0xb08555 JE b0863c |
(14999) 0xb0855b TEST %EDI,%EDI |
(14999) 0xb0855d JE b0870f |
(14999) 0xb08563 MOV 0x160(%R14),%RDI |
(14999) 0xb0856a LEA (%RDX,%RDX,2),%R8 |
(14999) 0xb0856e ADD %R8,%R8 |
(14999) 0xb08571 ADD $0x4,%R8 |
(14999) 0xb08575 JMP b08590 |
0xb08577 NOPW (%RAX,%RAX,1) |
(15003) 0xb08580 INC %RDX |
(15003) 0xb08583 ADD $0x6,%R8 |
(15003) 0xb08587 CMP %RDX,%RCX |
(15003) 0xb0858a JE b08490 |
(15003) 0xb08590 VMULSS (%RAX,%RDX,4),%XMM8,%XMM2 |
(15003) 0xb08595 MOV (%R11),%R9 |
(15003) 0xb08598 VMOVSD -0x8(%R9,%R8,2),%XMM1 |
(15003) 0xb0859f VMOVSS (%R9,%R8,2),%XMM0 |
(15003) 0xb085a5 MOVZX (%RSI,%RDX,2),%R9D |
(15003) 0xb085aa LEA (%R9,%R9,8),%R9 |
(15003) 0xb085ae VBROADCASTSS %XMM2,%XMM3 |
(15003) 0xb085b3 VMULPS %XMM1,%XMM3,%XMM3 |
(15003) 0xb085b7 VMULSS %XMM0,%XMM2,%XMM2 |
(15003) 0xb085bb VBROADCASTSS %XMM2,%YMM4 |
(15003) 0xb085c0 VPERMT2PS %YMM4,%YMM11,%YMM3 |
(15003) 0xb085c6 VMOVAPS %YMM1,%YMM4 |
(15003) 0xb085ca VPERMT2PS %YMM0,%YMM10,%YMM4 |
(15003) 0xb085d0 VFMADD213PS (%R15,%R9,4),%YMM3,%YMM4 |
(15003) 0xb085d6 VMOVUPS %YMM4,(%R15,%R9,4) |
(15003) 0xb085dc VFMADD213SS 0x20(%R15,%R9,4),%XMM0,%XMM2 |
(15003) 0xb085e3 VMOVSS %XMM2,0x20(%R15,%R9,4) |
(15003) 0xb085ea CMPB $0,(%RDI,%RDX,1) |
(15003) 0xb085ee JE b08580 |
(15003) 0xb085f0 MOV 0x30(%R14),%R9 |
(15003) 0xb085f4 MOV 0x48(%R14),%R10 |
(15003) 0xb085f8 VMOVSS (%R10,%RDX,4),%XMM2 |
(15003) 0xb085fe VSUBSS (%R9,%RDX,4),%XMM2,%XMM2 |
(15003) 0xb08604 VCVTSS2SD %XMM2,%XMM2,%XMM2 |
(15003) 0xb08608 VMULPS %XMM1,%XMM1,%XMM3 |
(15003) 0xb0860c VMOVSHDUP %XMM3,%XMM3 |
(15003) 0xb08610 VFMADD231SS %XMM1,%XMM1,%XMM3 |
(15003) 0xb08615 VFMADD213SS %XMM3,%XMM0,%XMM0 |
(15003) 0xb0861a VCVTSS2SD %XMM0,%XMM0,%XMM0 |
(15003) 0xb0861e VMOVSS (%RBX),%XMM1 |
(15003) 0xb08622 VCVTSS2SD %XMM1,%XMM1,%XMM1 |
(15003) 0xb08626 VMULSD %XMM2,%XMM12,%XMM2 |
(15003) 0xb0862a VFMADD231SD %XMM0,%XMM2,%XMM1 |
(15003) 0xb0862f VCVTSD2SS %XMM1,%XMM1,%XMM0 |
(15003) 0xb08633 VMOVSS %XMM0,(%RBX) |
(15003) 0xb08637 JMP b08580 |
(14999) 0xb0863c TEST %EDI,%EDI |
(14999) 0xb0863e JE b08787 |
(14999) 0xb08644 MOV 0x160(%R14),%RSI |
(14999) 0xb0864b LEA (%RDX,%RDX,2),%RDI |
(14999) 0xb0864f ADD $0x2,%RDI |
(14999) 0xb08653 JMP b08670 |
0xb08655 NOPW %CS:(%RAX,%RAX,1) |
(15001) 0xb08660 INC %RDX |
(15001) 0xb08663 ADD $0x3,%RDI |
(15001) 0xb08667 CMP %RDX,%RCX |
(15001) 0xb0866a JE b08490 |
(15001) 0xb08670 VMULSS (%RAX,%RDX,4),%XMM8,%XMM2 |
(15001) 0xb08675 MOV (%R11),%R8 |
(15001) 0xb08678 VMOVSD -0x8(%R8,%RDI,4),%XMM1 |
(15001) 0xb0867f VMOVSS (%R8,%RDI,4),%XMM0 |
(15001) 0xb08685 VBROADCASTSS %XMM2,%XMM3 |
(15001) 0xb0868a VMULPS %XMM1,%XMM3,%XMM3 |
(15001) 0xb0868e VMULSS %XMM0,%XMM2,%XMM2 |
(15001) 0xb08692 VBROADCASTSS %XMM2,%YMM4 |
(15001) 0xb08697 VPERMT2PS %YMM4,%YMM11,%YMM3 |
(15001) 0xb0869d VMOVAPS %YMM1,%YMM4 |
(15001) 0xb086a1 VPERMT2PS %YMM0,%YMM10,%YMM4 |
(15001) 0xb086a7 VFMADD213PS (%R15),%YMM3,%YMM4 |
(15001) 0xb086ac VMOVUPS %YMM4,(%R15) |
(15001) 0xb086b1 VFMADD213SS 0x20(%R15),%XMM0,%XMM2 |
(15001) 0xb086b7 VMOVSS %XMM2,0x20(%R15) |
(15001) 0xb086bd CMPB $0,(%RSI,%RDX,1) |
(15001) 0xb086c1 JE b08660 |
(15001) 0xb086c3 MOV 0x30(%R14),%R8 |
(15001) 0xb086c7 MOV 0x48(%R14),%R9 |
(15001) 0xb086cb VMOVSS (%R9,%RDX,4),%XMM2 |
(15001) 0xb086d1 VSUBSS (%R8,%RDX,4),%XMM2,%XMM2 |
(15001) 0xb086d7 VCVTSS2SD %XMM2,%XMM2,%XMM2 |
(15001) 0xb086db VMULPS %XMM1,%XMM1,%XMM3 |
(15001) 0xb086df VMOVSHDUP %XMM3,%XMM3 |
(15001) 0xb086e3 VFMADD231SS %XMM1,%XMM1,%XMM3 |
(15001) 0xb086e8 VFMADD213SS %XMM3,%XMM0,%XMM0 |
(15001) 0xb086ed VCVTSS2SD %XMM0,%XMM0,%XMM0 |
(15001) 0xb086f1 VMOVSS (%RBX),%XMM1 |
(15001) 0xb086f5 VCVTSS2SD %XMM1,%XMM1,%XMM1 |
(15001) 0xb086f9 VMULSD %XMM2,%XMM12,%XMM2 |
(15001) 0xb086fd VFMADD231SD %XMM0,%XMM2,%XMM1 |
(15001) 0xb08702 VCVTSD2SS %XMM1,%XMM1,%XMM0 |
(15001) 0xb08706 VMOVSS %XMM0,(%RBX) |
(15001) 0xb0870a JMP b08660 |
(14999) 0xb0870f LEA (%RDX,%RDX,2),%RDI |
(14999) 0xb08713 ADD %RDI,%RDI |
(14999) 0xb08716 ADD $0x4,%RDI |
(14999) 0xb0871a NOPW (%RAX,%RAX,1) |
(15002) 0xb08720 MOVZX (%RSI,%RDX,2),%R8D |
(15002) 0xb08725 VMULSS (%RAX,%RDX,4),%XMM8,%XMM0 |
(15002) 0xb0872a MOV (%R11),%R9 |
(15002) 0xb0872d VMOVSD -0x8(%R9,%RDI,2),%XMM1 |
(15002) 0xb08734 VMOVSS (%R9,%RDI,2),%XMM2 |
(15002) 0xb0873a LEA (%R8,%R8,8),%R8 |
(15002) 0xb0873e VBROADCASTSS %XMM0,%XMM3 |
(15002) 0xb08743 VMULPS %XMM1,%XMM3,%XMM3 |
(15002) 0xb08747 VMULSS %XMM2,%XMM0,%XMM0 |
(15002) 0xb0874b VBROADCASTSS %XMM0,%YMM4 |
(15002) 0xb08750 VPERMT2PS %YMM4,%YMM11,%YMM3 |
(15002) 0xb08756 VPERMT2PS %YMM2,%YMM10,%YMM1 |
(15002) 0xb0875c VFMADD213PS (%R15,%R8,4),%YMM3,%YMM1 |
(15002) 0xb08762 VMOVUPS %YMM1,(%R15,%R8,4) |
(15002) 0xb08768 VFMADD213SS 0x20(%R15,%R8,4),%XMM0,%XMM2 |
(15002) 0xb0876f VMOVSS %XMM2,0x20(%R15,%R8,4) |
(15002) 0xb08776 INC %RDX |
(15002) 0xb08779 ADD $0x6,%RDI |
(15002) 0xb0877d CMP %RDX,%RCX |
(15002) 0xb08780 JNE b08720 |
(14999) 0xb08782 JMP b08490 |
(14999) 0xb08787 VMOVSS (%R15),%XMM0 |
(14999) 0xb0878c SUB %R8D,%R13D |
(14999) 0xb0878f TEST $0x1,%R13B |
(14999) 0xb08793 JNE b087a6 |
(14999) 0xb08795 MOV %RDX,%RSI |
(14999) 0xb08798 NOT %RDX |
(14999) 0xb0879b ADD %RCX,%RDX |
(14999) 0xb0879e JE b08490 |
(14999) 0xb087a4 JMP b08817 |
(14999) 0xb087a6 VMULSS (%RAX,%RDX,4),%XMM8,%XMM1 |
(14999) 0xb087ab MOV (%R11),%RSI |
(14999) 0xb087ae LEA (%RDX,%RDX,2),%RDI |
(14999) 0xb087b2 VMOVSS (%RSI,%RDI,4),%XMM2 |
(14999) 0xb087b7 VMOVSD 0x4(%RSI,%RDI,4),%XMM3 |
(14999) 0xb087bd VMULSS %XMM2,%XMM1,%XMM4 |
(14999) 0xb087c1 VFMADD231SS %XMM2,%XMM4,%XMM0 |
(14999) 0xb087c6 VMOVSS %XMM0,(%R15) |
(14999) 0xb087cb VMULSS %XMM3,%XMM1,%XMM5 |
(14999) 0xb087cf VMOVSHDUP %XMM3,%XMM6 |
(14999) 0xb087d3 VMULSS %XMM6,%XMM1,%XMM1 |
(14999) 0xb087d7 VINSERTPS $0x20,%XMM5,%XMM4,%XMM4 |
(14999) 0xb087dd VBROADCASTSS %XMM1,%XMM1 |
(14999) 0xb087e2 VINSERTF128 $0x1,%XMM1,%YMM4,%YMM1 |
(14999) 0xb087e8 VMOVAPS -0x7a8030(%RIP),%YMM4 |
(14999) 0xb087f0 VPERMPS %YMM1,%YMM4,%YMM1 |
(14999) 0xb087f5 VPERMT2PS %YMM2,%YMM10,%YMM3 |
(14999) 0xb087fb VFMADD213PS 0x4(%R15),%YMM1,%YMM3 |
(14999) 0xb08801 VMOVUPS %YMM3,0x4(%R15) |
(14999) 0xb08807 LEA 0x1(%RDX),%RSI |
(14999) 0xb0880b NOT %RDX |
(14999) 0xb0880e ADD %RCX,%RDX |
(14999) 0xb08811 JE b08490 |
(14999) 0xb08817 LEA (,%RSI,4),%RDX |
(14999) 0xb0881f LEA (%RDX,%RDX,2),%RDX |
(14999) 0xb08823 NOPW %CS:(%RAX,%RAX,1) |
(15000) 0xb08830 VMULSS (%RAX,%RSI,4),%XMM8,%XMM1 |
(15000) 0xb08835 MOV (%R11),%RDI |
(15000) 0xb08838 VMOVSS (%RDI,%RDX,1),%XMM2 |
(15000) 0xb0883d VMOVSD 0x4(%RDI,%RDX,1),%XMM3 |
(15000) 0xb08843 VMULSS %XMM2,%XMM1,%XMM4 |
(15000) 0xb08847 VFMADD231SS %XMM2,%XMM4,%XMM0 |
(15000) 0xb0884c VMOVSS %XMM0,(%R15) |
(15000) 0xb08851 VMULSS %XMM3,%XMM1,%XMM5 |
(15000) 0xb08855 VMOVSHDUP %XMM3,%XMM6 |
(15000) 0xb08859 VMULSS %XMM6,%XMM1,%XMM1 |
(15000) 0xb0885d VINSERTPS $0x20,%XMM5,%XMM4,%XMM4 |
(15000) 0xb08863 VBROADCASTSS %XMM1,%XMM1 |
(15000) 0xb08868 VINSERTF128 $0x1,%XMM1,%YMM4,%YMM1 |
(15000) 0xb0886e VMOVAPS -0x7a80b6(%RIP),%YMM9 |
(15000) 0xb08876 VPERMPS %YMM1,%YMM9,%YMM1 |
(15000) 0xb0887b VPERMT2PS %YMM2,%YMM10,%YMM3 |
(15000) 0xb08881 VFMADD213PS 0x4(%R15),%YMM1,%YMM3 |
(15000) 0xb08887 VMOVUPS %YMM3,0x4(%R15) |
(15000) 0xb0888d VMULSS 0x4(%RAX,%RSI,4),%XMM8,%XMM1 |
(15000) 0xb08893 MOV (%R11),%RDI |
(15000) 0xb08896 VMOVSS 0xc(%RDI,%RDX,1),%XMM2 |
(15000) 0xb0889c VMOVSD 0x10(%RDI,%RDX,1),%XMM4 |
(15000) 0xb088a2 VMULSS %XMM2,%XMM1,%XMM5 |
(15000) 0xb088a6 VFMADD231SS %XMM2,%XMM5,%XMM0 |
(15000) 0xb088ab VMOVSS %XMM0,(%R15) |
(15000) 0xb088b0 VMULSS %XMM4,%XMM1,%XMM6 |
(15000) 0xb088b4 VMOVSHDUP %XMM4,%XMM7 |
(15000) 0xb088b8 VMULSS %XMM7,%XMM1,%XMM1 |
(15000) 0xb088bc VINSERTPS $0x20,%XMM6,%XMM5,%XMM5 |
(15000) 0xb088c2 VBROADCASTSS %XMM1,%XMM1 |
(15000) 0xb088c7 VINSERTF128 $0x1,%XMM1,%YMM5,%YMM1 |
(15000) 0xb088cd VPERMPS %YMM1,%YMM9,%YMM1 |
(15000) 0xb088d2 VPERMT2PS %YMM2,%YMM10,%YMM4 |
(15000) 0xb088d8 VFMADD213PS %YMM3,%YMM1,%YMM4 |
(15000) 0xb088dd VMOVUPS %YMM4,0x4(%R15) |
(15000) 0xb088e3 ADD $0x2,%RSI |
(15000) 0xb088e7 ADD $0x18,%RDX |
(15000) 0xb088eb CMP %RSI,%RCX |
(15000) 0xb088ee JNE b08830 |
(14999) 0xb088f4 JMP b08490 |
0xb088f9 NOPL (%RAX) |
0xb4b0f7 JMP b4b0f9 |
0xb4b0f9 CMP $0x2,%EDX |
0xb4b0fc JNE b4b10e |
0xb4b0fe MOV %RAX,%RDI |
0xb4b101 CALL fa3780 <@plt_start@+0x2d0> |
0xb4b106 MOV %RAX,%RDI |
0xb4b109 CALL fa3950 <@plt_start@+0x4a0> |
0xb4b10e MOV %RAX,%RDI |
0xb4b111 CALL 4abc30 <__clang_call_terminate> |
0xb4b116 NOPW %CS:(%RAX,%RAX,1) |
0xc16647 NOPW (%RAX,%RAX,1) |
0xc1669a NOPW (%RAX,%RAX,1) |
0xc1674a NOPW (%RAX,%RAX,1) |
0xc16794 NOPW %CS:(%RAX,%RAX,1) |
0xc1696f NOP |
0xc16b3c NOPL (%RAX) |
0xc16f2d NOPL (%RAX) |
0xc17018 NOPL (%RAX,%RAX,1) |
0xc17123 NOPW %CS:(%RAX,%RAX,1) |
0xc1747d NOPL (%RAX) |
0xc17556 NOPW %CS:(%RAX,%RAX,1) |
0xc17651 NOPW %CS:(%RAX,%RAX,1) |
0xc177e0 CMP $0x2,%EDX |
0xc177e3 JNE c177f5 |
0xc177e5 MOV %RAX,%RDI |
0xc177e8 CALL fa3780 <@plt_start@+0x2d0> |
0xc177ed MOV %RAX,%RDI |
0xc177f0 CALL fa3950 <@plt_start@+0x4a0> |
0xc177f5 MOV %RAX,%RDI |
0xc177f8 CALL 4abc30 <__clang_call_terminate> |
0xc177fd MOV %RAX,%RDI |
0xc17800 CALL 4abc30 <__clang_call_terminate> |
0xc17805 INT $0x3 |
0xc17806 INT $0x3 |
0xc17807 INT $0x3 |
0xc17808 INT $0x3 |
0xc17809 INT $0x3 |
0xc1780a INT $0x3 |
0xc1780b INT $0x3 |
0xc1780c INT $0x3 |
0xc1780d INT $0x3 |
0xc1780e INT $0x3 |
0xc1780f INT $0x3 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►78.69+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►11.48+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►9.84+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►84.53+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►10.50+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►4.97+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►82.49+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.99+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►4.52+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.05+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►1.95+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►82.61+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►8.70+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►7.39+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | compute_globals(gmx_global_sta[...] | md_support.cpp:147 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1778 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | md_support.cpp:147-219 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 126 |
nb uops | 107 |
loop length | 494 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 17.83 cycles |
front end | 17.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 7.50 | 7.25 | 7.25 | 8.00 | 13.67 | 13.67 | 13.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.00 | 7.50 | 7.25 | 7.25 | 8.00 | 13.67 | 13.67 | 13.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 17.83 |
Dispatch | 13.67 |
Overall L1 | 17.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 28% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 9% |
load | 10% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 29% |
load | 29% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 21% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE b08405 <.omp_outlined..4+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
DEC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0x1,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA -0x44(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x4d23b2(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL fa67e0 <@plt_start@+0x3330> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
CMOVL %EAX,%EBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x30(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP %EBX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE b08414 <.omp_outlined..4+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x4d238e(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
CALL fa67f0 <@plt_start@+0x3340> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV 0x10(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV (%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x280(%R14),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%R12),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%R15),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSXD %EBX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SAL $0x2,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%RAX,%RAX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VMOVSS -0x7abfb7(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVAPS -0x7a7ecf(%RIP),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS -0x79edb7(%RIP),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVSD -0x7ab31f(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R14,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP b084a1 <.omp_outlined..4+0x131> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
JMP b4b0f9 <.omp_outlined..4+0x99> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JNE b4b10e <.omp_outlined..4+0xae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3780 <@plt_start@+0x2d0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3950 <@plt_start@+0x4a0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
CMP $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE c177f5 <.omp_outlined..4+0x15c5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3780 <@plt_start@+0x2d0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3950 <@plt_start@+0x4a0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | md_support.cpp:147-219 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 126 |
nb uops | 107 |
loop length | 494 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 17.83 cycles |
front end | 17.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 7.50 | 7.25 | 7.25 | 8.00 | 13.67 | 13.67 | 13.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.00 | 7.50 | 7.25 | 7.25 | 8.00 | 13.67 | 13.67 | 13.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 17.83 |
Dispatch | 13.67 |
Overall L1 | 17.83 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 28% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 9% |
load | 10% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 29% |
load | 29% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 21% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE b08405 <.omp_outlined..4+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
DEC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0x1,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA -0x44(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x4d23b2(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL fa67e0 <@plt_start@+0x3330> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x2c(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
CMOVL %EAX,%EBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOV %EBX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x30(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
CMP %EBX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE b08414 <.omp_outlined..4+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x4d238e(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
CALL fa67f0 <@plt_start@+0x3340> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV 0x10(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV (%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x280(%R14),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%R12),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%R15),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSXD %EBX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SAL $0x2,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%RAX,%RAX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VMOVSS -0x7abfb7(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVAPS -0x7a7ecf(%RIP),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS -0x79edb7(%RIP),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVSD -0x7ab31f(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R14,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP b084a1 <.omp_outlined..4+0x131> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
JMP b4b0f9 <.omp_outlined..4+0x99> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JNE b4b10e <.omp_outlined..4+0xae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3780 <@plt_start@+0x2d0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3950 <@plt_start@+0x4a0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
CMP $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE c177f5 <.omp_outlined..4+0x15c5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3780 <@plt_start@+0x2d0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL fa3950 <@plt_start@+0x4a0> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 4abc30 <__clang_call_terminate> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A | |||||||||||||||||
INT $0x3 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1.27 | 0 | 0.95 | 0.01 | 0.98 | 0 | 0.98 | 0 | 1.01 | -0 | 0.87 | 0.01 | 0.77 | 0.01 | 0.8 | 0.01 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 0.89499992132187 | 0.10736503452063 |
2x1 | 2 | 1.27 | 2.54 | 2 | 0.4099999666214 | 0.077768176794052 |
4x1 | 4 | 0.95 | 3.79 | 4 | 0.27500000596046 | 0.094889149069786 |
8x1 | 8 | 0.98 | 7.82 | 8 | 0.13499999046326 | 0.073949880897999 |
16x1 | 16 | 0.98 | 15.75 | 16 | 0.10999999195337 | 0.068696908652782 |
32x1 | 20 | 1.01 | 32.33 | 32 | 0.069999992847443 | 0.053874641656876 |
64x1 | 40 | 0.87 | 55.69 | 64 | 0.10999997705221 | 0.040434580296278 |
128x1 | 72 | 0.77 | 98.21 | 128 | 0.03999999910593 | 0.053624391555786 |
192x1 | 103 | 0.8 | 153.95 | 192 | 0.029999999329448 | 0.04316097125411 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..4– | 0.04 | 0.01 |
▼Loop 14999 - md_support.cpp:148-219 - libgromacs_mpi.so.9.0.0– | 0.00 | 0.00 |
○Loop 15002 - md_support.cpp:181-209 - libgromacs_mpi.so.9.0.0 | 0.04 | 0.01 |
○Loop 15000 - md_support.cpp:181-209 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
○Loop 15003 - md_support.cpp:181-219 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
○Loop 15001 - md_support.cpp:181-219 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |