Function: gather_f_bsplines(gmx_pme_t const*, float const*, bool, PmeAtomComm const*, splinedata_t c ... | Module: libgromacs_mpi.so.9.0.0 | Source: pme_gather.cpp:103-360 [...] | Coverage (incl. loops): 1.50% | (excl. loops): 0.00% |
---|
Function: gather_f_bsplines(gmx_pme_t const*, float const*, bool, PmeAtomComm const*, splinedata_t c ... | Module: libgromacs_mpi.so.9.0.0 | Source: pme_gather.cpp:103-360 [...] | Coverage (incl. loops): 1.50% | (excl. loops): 0.00% |
---|
/home/eoseret/gromacs-2024.2/api/legacy/include/gromacs/math/vectypes.h: 102 - 102 |
-------------------------------------------------------------------------------- |
102: BasicVector(ValueType x, ValueType y, ValueType z) : x_{ x, y, z } {} |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/home/eoseret/gromacs-2024.2/src/gromacs/ewald/pme_gather.cpp: 103 - 360 |
-------------------------------------------------------------------------------- |
103: for (int ithx = 0; (ithx < order); ithx++) |
104: { |
105: const int index_x = (idxX + ithx) * gridNY * gridNZ; |
106: const real tx = thx[ithx]; |
107: const real dx = dthx[ithx]; |
108: |
109: for (int ithy = 0; (ithy < order); ithy++) |
[...] |
116: for (int ithz = 0; (ithz < order); ithz++) |
117: { |
118: const real gval = grid_[index_xy + (idxZ + ithz)]; |
119: fxy1 += thz[ithz] * gval; |
120: fz1 += dthz[ithz] * gval; |
121: } |
122: f[XX] += dx * ty * fxy1; |
123: f[YY] += tx * dy * fxy1; |
124: f[ZZ] += tx * ty * fz1; |
[...] |
139: const int norder = nn_ * 4; |
140: /* Pointer arithmetic alert, next six statements */ |
141: const real* const gmx_restrict thx = spline_->theta.coefficients[XX] + norder; |
142: const real* const gmx_restrict thy = spline_->theta.coefficients[YY] + norder; |
143: const real* const gmx_restrict thz = spline_->theta.coefficients[ZZ] + norder; |
144: const real* const gmx_restrict dthx = spline_->dtheta.coefficients[XX] + norder; |
145: const real* const gmx_restrict dthy = spline_->dtheta.coefficients[YY] + norder; |
146: const real* const gmx_restrict dthz = spline_->dtheta.coefficients[ZZ] + norder; |
[...] |
164: const int index_xy = index_x + (idxY + ithy) * gridNZ; |
165: |
166: const Simd4NReal ty_S = loadUNDuplicate4(thy + ithy); |
167: const Simd4NReal dy_S = loadUNDuplicate4(dthy + ithy); |
168: |
169: const Simd4NReal gval_S = loadU4NOffset(grid_ + index_xy + idxZ, gridNZ); |
[...] |
195: *S0 = load4U(data - offset); |
[...] |
223: const real* const gmx_restrict thx = spline_->theta.coefficients[XX] + norder; |
224: const real* const gmx_restrict thy = spline_->theta.coefficients[YY] + norder; |
225: const real* const gmx_restrict thz = spline_->theta.coefficients[ZZ] + norder; |
226: const real* const gmx_restrict dthx = spline_->dtheta.coefficients[XX] + norder; |
227: const real* const gmx_restrict dthy = spline_->dtheta.coefficients[YY] + norder; |
228: const real* const gmx_restrict dthz = spline_->dtheta.coefficients[ZZ] + norder; |
229: |
230: struct pme_spline_work* const work = pme_->spline_work; |
231: |
232: const int offset = idxZ & 3; |
[...] |
242: tz_S0 = selectByMask(tz_S0, work->mask_S0[offset]); |
243: dz_S0 = selectByMask(dz_S0, work->mask_S0[offset]); |
244: tz_S1 = selectByMask(tz_S1, work->mask_S1[offset]); |
245: dz_S1 = selectByMask(dz_S1, work->mask_S1[offset]); |
246: |
247: for (int ithx = 0; (ithx < order); ithx++) |
248: { |
249: const int index_x = (idxX + ithx) * gridNY * gridNZ; |
250: const Simd4Real tx_S = Simd4Real(thx[ithx]); |
251: const Simd4Real dx_S = Simd4Real(dthx[ithx]); |
[...] |
259: const Simd4Real gval_S0 = load4(grid_ + index_xy + idxZ - offset); |
260: const Simd4Real gval_S1 = load4(grid_ + index_xy + idxZ - offset + 4); |
[...] |
286: const int gridNY = pme_->pmegrid_ny; |
287: const int gridNZ = pme_->pmegrid_nz; |
288: |
289: const int* const idxptr = atc_->idx[spline_->ind[nn_]]; |
290: const int idxX = idxptr[XX]; |
291: const int idxY = idxptr[YY]; |
292: const int idxZ = idxptr[ZZ]; |
[...] |
302: { |
[...] |
323: for (int nn = 0; nn < spline->n; nn++) |
324: { |
325: const int n = spline->ind[nn]; |
326: const real coefficient = scale * atc->coefficient[n]; |
327: |
328: if (bClearF) |
329: { |
330: force[n][XX] = 0; |
331: force[n][YY] = 0; |
332: force[n][ZZ] = 0; |
333: } |
334: if (coefficient != 0) |
335: { |
336: RVec f; |
337: const auto spline_func = do_fspline(pme, grid, atc, spline, nn); |
338: |
339: switch (order) |
[...] |
346: force[n][XX] += -coefficient * (f[XX] * nx * rxx); |
347: force[n][YY] += -coefficient * (f[XX] * nx * ryx + f[YY] * ny * ryy); |
348: force[n][ZZ] += -coefficient * (f[XX] * nx * rzx + f[YY] * ny * rzy + f[ZZ] * nz * rzz); |
[...] |
360: } |
/home/eoseret/gromacs-2024.2/src/external/boost/stl_interfaces/iterator_interface.hpp: 305 - 305 |
-------------------------------------------------------------------------------- |
305: D retval = derived(); |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd4_float.h: 123 - 287 |
-------------------------------------------------------------------------------- |
123: return { _mm_add_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
138: return { _mm_mul_ps(a.simdInternal_, b.simdInternal_) }; |
139: } |
140: |
141: static inline Simd4Float gmx_simdcall fma(Simd4Float a, Simd4Float b, Simd4Float c) |
142: { |
143: return { _mm_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
[...] |
266: return { _mm512_castps512_ps128(_mm512_mask_mov_ps( |
[...] |
286: b = _mm_add_ps(a.simdInternal_, _mm_permute_ps(a.simdInternal_, _MM_SHUFFLE(1, 0, 3, 2))); |
287: b = _mm_add_ss(b, _mm_permute_ps(b, _MM_SHUFFLE(0, 3, 2, 1))); |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 484 - 492 |
-------------------------------------------------------------------------------- |
484: return { _mm512_broadcast_f32x4(_mm_load_ps(f)) }; |
[...] |
492: return { _mm512_castpd_ps(_mm512_i32gather_pd(gdx, reinterpret_cast<const double*>(f), sizeof(float))) }; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 57 - 352 |
-------------------------------------------------------------------------------- |
57: SimdFloat(float f) : simdInternal_(_mm512_set1_ps(f)) {} |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
198: } |
199: |
200: static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c) |
201: { |
202: return { _mm512_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
[...] |
348: x = _mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, 0xEE)); |
349: x = _mm512_add_ps(x, _mm512_shuffle_f32x4(x, x, 0x11)); |
350: x = _mm512_add_ps(x, _mm512_permute_ps(x, 0xEE)); |
351: x = _mm512_add_ps(x, _mm512_permute_ps(x, 0x11)); |
352: return *reinterpret_cast<float*>(&x); |
0xbfe3d0 PUSH %RBP |
0xbfe3d1 MOV %RSP,%RBP |
0xbfe3d4 PUSH %R15 |
0xbfe3d6 PUSH %R14 |
0xbfe3d8 PUSH %R13 |
0xbfe3da PUSH %R12 |
0xbfe3dc PUSH %RBX |
0xbfe3dd SUB $0x40,%RSP |
0xbfe3e1 MOV %EDX,-0x58(%RBP) |
0xbfe3e4 MOV %RDI,-0x48(%RBP) |
0xbfe3e8 MOV %R8,-0x38(%RBP) |
0xbfe3ec MOV (%R8),%EAX |
0xbfe3ef MOV %RAX,-0x98(%RBP) |
0xbfe3f6 TEST %EAX,%EAX |
0xbfe3f8 JLE bfeecf |
0xbfe3fe MOV -0x48(%RBP),%RDX |
0xbfe402 MOV 0x4c(%RDX),%R9D |
0xbfe406 VMOVSS 0x5d0(%RDX),%XMM14 |
0xbfe40e VMOVSS 0x5dc(%RDX),%XMM4 |
0xbfe416 VMOVSS 0x5e0(%RDX),%XMM5 |
0xbfe41e VMOVSS 0x5e8(%RDX),%XMM6 |
0xbfe426 VMOVSS 0x5ec(%RDX),%XMM7 |
0xbfe42e VMOVSS 0x5f0(%RDX),%XMM8 |
0xbfe436 MOVSXD %R9D,%RDI |
0xbfe439 MOV 0xa0(%RCX),%RAX |
0xbfe440 MOV %RAX,-0x68(%RBP) |
0xbfe444 VCVTSI2SSL 0x3c(%RDX),%XMM1,%XMM9 |
0xbfe449 VCVTSI2SSL 0x40(%RDX),%XMM1,%XMM31 |
0xbfe450 MOV -0x38(%RBP),%RAX |
0xbfe454 MOV 0x8(%RAX),%RAX |
0xbfe458 MOV %RAX,-0x90(%RBP) |
0xbfe45f VCVTSI2SSL 0x44(%RDX),%XMM1,%XMM1 |
0xbfe464 VMOVSS %XMM1,-0x54(%RBP) |
0xbfe469 MOV %R9D,%R14D |
0xbfe46c AND $0x3,%R14D |
0xbfe470 MOV %R9D,%R15D |
0xbfe473 AND $-0x4,%R15D |
0xbfe477 MOV %RDI,-0x70(%RBP) |
0xbfe47b LEA (,%RDI,4),%RAX |
0xbfe483 MOV %RAX,-0x88(%RBP) |
0xbfe48a XOR %EDI,%EDI |
0xbfe48c MOV $0xc,%R10D |
0xbfe492 VXORPS %XMM13,%XMM13,%XMM13 |
0xbfe497 XOR %R8D,%R8D |
0xbfe49a XOR %R11D,%R11D |
0xbfe49d MOV %RCX,-0x80(%RBP) |
0xbfe4a1 MOV %R9,-0x78(%RBP) |
0xbfe4a5 VMOVSS %XMM0,-0x50(%RBP) |
0xbfe4aa VMOVSS %XMM31,-0x4c(%RBP) |
0xbfe4b1 JMP bfe811 |
0xbfe4b6 NOPW %CS:(%RAX,%RAX,1) |
(17265) 0xbfe4c0 MOV -0x40(%RBP),%R10 |
(17265) 0xbfe4c4 VPBROADCASTD %R10D,%YMM10 |
(17265) 0xbfe4ca VPMULLD -0x89b713(%RIP),%YMM10,%YMM10 |
(17265) 0xbfe4d3 VPADDD -0x8985bd(%RIP),%YMM10,%YMM16 |
(17265) 0xbfe4dd MOV %R13D,%EAX |
(17265) 0xbfe4e0 IMUL %EBX,%EAX |
(17265) 0xbfe4e3 MOV -0x30(%RBP),%RCX |
(17265) 0xbfe4e7 ADD %ECX,%EAX |
(17265) 0xbfe4e9 IMUL %R10D,%EAX |
(17265) 0xbfe4ed CLTQ |
(17265) 0xbfe4ef LEA (%RSI,%RAX,4),%RAX |
(17265) 0xbfe4f3 LEA (%RAX,%R12,4),%RAX |
(17265) 0xbfe4f7 VXORPD %XMM18,%XMM18,%XMM18 |
(17265) 0xbfe4fd KXNORW %K0,%K0,%K1 |
(17265) 0xbfe501 VGATHERDPD (%RAX,%YMM16,4),%ZMM18{%K1} |
(17265) 0xbfe508 LEA (,%R11,4),%EAX |
(17265) 0xbfe510 MOV -0x38(%RBP),%RDI |
(17265) 0xbfe514 MOV 0x30(%RDI),%RDX |
(17265) 0xbfe518 VBROADCASTF32X4 (%RDX,%RAX,4),%ZMM17 |
(17265) 0xbfe51f LEA 0x1(%R13),%EDX |
(17265) 0xbfe523 IMUL %EBX,%EDX |
(17265) 0xbfe526 ADD %ECX,%EDX |
(17265) 0xbfe528 IMUL %R10D,%EDX |
(17265) 0xbfe52c MOVSXD %EDX,%RDX |
(17265) 0xbfe52f LEA (%RSI,%RDX,4),%RDX |
(17265) 0xbfe533 LEA (%RDX,%R12,4),%RDX |
(17265) 0xbfe537 VXORPD %XMM12,%XMM12,%XMM12 |
(17265) 0xbfe53c KXNORW %K0,%K0,%K1 |
(17265) 0xbfe540 VGATHERDPD (%RDX,%YMM16,4),%ZMM12{%K1} |
(17265) 0xbfe547 MOV 0x90(%RDI),%RDX |
(17265) 0xbfe54e VBROADCASTF32X4 (%RDX,%RAX,4),%ZMM10 |
(17265) 0xbfe555 LEA 0x2(%R13),%EDX |
(17265) 0xbfe559 IMUL %EBX,%EDX |
(17265) 0xbfe55c ADD %ECX,%EDX |
(17265) 0xbfe55e IMUL %R10D,%EDX |
(17265) 0xbfe562 MOVSXD %EDX,%RDX |
(17265) 0xbfe565 LEA (%RSI,%RDX,4),%RDX |
(17265) 0xbfe569 LEA (%RDX,%R12,4),%RDX |
(17265) 0xbfe56d VXORPS %XMM13,%XMM13,%XMM13 |
(17265) 0xbfe572 KXNORW %K0,%K0,%K1 |
(17265) 0xbfe576 VGATHERDPD (%RDX,%YMM16,4),%ZMM13{%K1} |
(17265) 0xbfe57d MOV 0x28(%RDI),%RDX |
(17265) 0xbfe581 MOV $0x1111,%R8W |
(17265) 0xbfe586 KMOVD %R8D,%K1 |
(17265) 0xbfe58b VEXPANDPS (%RDX,%RAX,4),%ZMM19{%K1}{z} |
(17265) 0xbfe592 MOV 0x88(%RDI),%RDX |
(17265) 0xbfe599 VEXPANDPS (%RDX,%RAX,4),%ZMM20{%K1}{z} |
(17265) 0xbfe5a0 MOV 0x20(%RDI),%RDX |
(17265) 0xbfe5a4 VBROADCASTSS (%RDX,%RAX,4),%ZMM21 |
(17265) 0xbfe5ab VBROADCASTSS 0x4(%RDX,%RAX,4),%ZMM22 |
(17265) 0xbfe5b3 VBROADCASTSS 0x8(%RDX,%RAX,4),%ZMM23 |
(17265) 0xbfe5bb VBROADCASTSS 0xc(%RDX,%RAX,4),%ZMM24 |
(17265) 0xbfe5c3 MOV 0x80(%RDI),%RDX |
(17265) 0xbfe5ca VPSHUFD $0,%ZMM19,%ZMM19 |
(17265) 0xbfe5d1 VMULPS (%RDX,%RAX,4){1to16},%ZMM19,%ZMM25 |
(17265) 0xbfe5d8 VMULPS 0x4(%RDX,%RAX,4){1to16},%ZMM19,%ZMM26 |
(17265) 0xbfe5e0 VMULPS 0x8(%RDX,%RAX,4){1to16},%ZMM19,%ZMM27 |
(17265) 0xbfe5e8 VMULPS 0xc(%RDX,%RAX,4){1to16},%ZMM19,%ZMM28 |
(17265) 0xbfe5f0 ADD $0x3,%R13D |
(17265) 0xbfe5f4 IMUL %EBX,%R13D |
(17265) 0xbfe5f8 ADD %ECX,%R13D |
(17265) 0xbfe5fb IMUL %R10D,%R13D |
(17265) 0xbfe5ff MOVSXD %R13D,%RAX |
(17265) 0xbfe602 LEA (%RSI,%RAX,4),%RAX |
(17265) 0xbfe606 LEA (%RAX,%R12,4),%RAX |
(17265) 0xbfe60a VXORPD %XMM29,%XMM29,%XMM29 |
(17265) 0xbfe610 KXNORW %K0,%K0,%K1 |
(17265) 0xbfe614 VGATHERDPD (%RAX,%YMM16,4),%ZMM29{%K1} |
(17265) 0xbfe61b VPSHUFD $0,%ZMM20,%ZMM16 |
(17265) 0xbfe622 VMULPS %ZMM18,%ZMM17,%ZMM20 |
(17265) 0xbfe628 VXORPS %XMM1,%XMM1,%XMM1 |
(17265) 0xbfe62c VFMADD213PS %ZMM1,%ZMM20,%ZMM25 |
(17265) 0xbfe632 VMULPS %ZMM16,%ZMM21,%ZMM30 |
(17265) 0xbfe638 VFMADD213PS %ZMM1,%ZMM20,%ZMM30 |
(17265) 0xbfe63e VMULPS %ZMM12,%ZMM17,%ZMM20 |
(17265) 0xbfe644 VFMADD213PS %ZMM25,%ZMM20,%ZMM26 |
(17265) 0xbfe64a VMULPS %ZMM16,%ZMM22,%ZMM25 |
(17265) 0xbfe650 VFMADD213PS %ZMM30,%ZMM20,%ZMM25 |
(17265) 0xbfe656 VMULPS %ZMM18,%ZMM10,%ZMM18 |
(17265) 0xbfe65c VMULPS %ZMM19,%ZMM21,%ZMM20 |
(17265) 0xbfe662 VFMADD213PS %ZMM1,%ZMM18,%ZMM20 |
(17265) 0xbfe668 VMULPS %ZMM12,%ZMM10,%ZMM12 |
(17265) 0xbfe66e VMULPS %ZMM19,%ZMM22,%ZMM18 |
(17265) 0xbfe674 VFMADD213PS %ZMM20,%ZMM12,%ZMM18 |
(17265) 0xbfe67a VMULPS %ZMM13,%ZMM17,%ZMM12 |
(17265) 0xbfe680 VFMADD213PS %ZMM26,%ZMM12,%ZMM27 |
(17265) 0xbfe686 VMULPS %ZMM16,%ZMM23,%ZMM20 |
(17265) 0xbfe68c VFMADD213PS %ZMM25,%ZMM12,%ZMM20 |
(17265) 0xbfe692 VMULPS %ZMM13,%ZMM10,%ZMM12 |
(17265) 0xbfe698 VMULPS %ZMM19,%ZMM23,%ZMM13 |
(17265) 0xbfe69e VFMADD213PS %ZMM18,%ZMM12,%ZMM13 |
(17265) 0xbfe6a4 VMULPS %ZMM29,%ZMM17,%ZMM12 |
(17265) 0xbfe6aa VMULPS %ZMM29,%ZMM10,%ZMM10 |
(17265) 0xbfe6b0 VFMADD213PS %ZMM27,%ZMM12,%ZMM28 |
(17265) 0xbfe6b6 VMULPS %ZMM16,%ZMM24,%ZMM16 |
(17265) 0xbfe6bc VFMADD213PS %ZMM20,%ZMM12,%ZMM16 |
(17265) 0xbfe6c2 VMULPS %ZMM19,%ZMM24,%ZMM12 |
(17265) 0xbfe6c8 VFMADD213PS %ZMM13,%ZMM10,%ZMM12 |
(17265) 0xbfe6ce VSHUFF64X2 $-0x12,%ZMM28,%ZMM28,%ZMM10 |
(17265) 0xbfe6d5 VADDPS %ZMM10,%ZMM28,%ZMM10 |
(17265) 0xbfe6db VEXTRACTF128 $0x1,%YMM10,%XMM13 |
(17265) 0xbfe6e1 VADDPS %XMM13,%XMM10,%XMM10 |
(17265) 0xbfe6e6 VPERMILPD $0x3,%XMM10,%XMM13 |
(17265) 0xbfe6ec VADDPS %XMM13,%XMM10,%XMM10 |
(17265) 0xbfe6f1 VMOVSHDUP %XMM10,%XMM13 |
(17265) 0xbfe6f6 VSHUFF64X2 $-0x12,%ZMM16,%ZMM16,%ZMM17 |
(17265) 0xbfe6fd VADDPS %ZMM17,%ZMM16,%ZMM16 |
(17265) 0xbfe703 VEXTRACTF32X4 $0x1,%YMM16,%XMM17 |
(17265) 0xbfe70a VADDPS %XMM17,%XMM16,%XMM16 |
(17265) 0xbfe710 VPERMILPD $0x3,%XMM16,%XMM17 |
(17265) 0xbfe717 VADDPS %XMM17,%XMM16,%XMM16 |
(17265) 0xbfe71d VMOVSHDUP %XMM16,%XMM18 |
(17265) 0xbfe723 VSHUFF64X2 $-0x12,%ZMM12,%ZMM12,%ZMM17 |
(17265) 0xbfe72a VADDPS %ZMM17,%ZMM12,%ZMM12 |
(17265) 0xbfe730 VEXTRACTF32X4 $0x1,%YMM12,%XMM17 |
(17265) 0xbfe737 VADDPS %XMM17,%XMM12,%XMM12 |
(17265) 0xbfe73d VPERMILPD $0x1,%XMM12,%XMM17 |
(17265) 0xbfe744 VADDPS %XMM17,%XMM12,%XMM12 |
(17265) 0xbfe74a VMOVSHDUP %XMM12,%XMM17 |
(17265) 0xbfe750 VADDSS %XMM17,%XMM12,%XMM17 |
(17265) 0xbfe756 VADDPS %XMM18,%XMM16,%XMM12 |
(17265) 0xbfe75c VADDPS %XMM13,%XMM10,%XMM10 |
(17265) 0xbfe761 VXORPS %XMM13,%XMM13,%XMM13 |
(17265) 0xbfe766 VINSERTPS $0x1c,%XMM12,%XMM10,%XMM16 |
(17265) 0xbfe76d VMULSS -0x54(%RBP),%XMM17,%XMM1 |
(17265) 0xbfe774 VMULSS %XMM9,%XMM16,%XMM2 |
(17265) 0xbfe77a VMULSS %XMM2,%XMM14,%XMM3 |
(17265) 0xbfe77e VMOVSHDUP %XMM16,%XMM10 |
(17265) 0xbfe784 VMULSS %XMM31,%XMM10,%XMM10 |
(17265) 0xbfe78a VMULSS %XMM5,%XMM10,%XMM11 |
(17265) 0xbfe78e VFMADD231SS %XMM4,%XMM2,%XMM11 |
(17265) 0xbfe793 MOV -0x68(%RBP),%RAX |
(17265) 0xbfe797 MOV -0xa0(%RBP),%RCX |
(17265) 0xbfe79e VMOVSD (%RAX,%RCX,4),%XMM12 |
(17265) 0xbfe7a3 VINSERTPS $0x10,%XMM11,%XMM3,%XMM3 |
(17265) 0xbfe7a9 VBROADCASTSS %XMM15,%XMM11 |
(17265) 0xbfe7ae VFNMADD213PS %XMM12,%XMM3,%XMM11 |
(17265) 0xbfe7b3 VMOVLPS %XMM11,(%RAX,%RCX,4) |
(17265) 0xbfe7b8 VMULSS %XMM7,%XMM10,%XMM3 |
(17265) 0xbfe7bc VFMADD231SS %XMM2,%XMM6,%XMM3 |
(17265) 0xbfe7c1 VFMADD231SS %XMM1,%XMM8,%XMM3 |
(17265) 0xbfe7c6 VFNMADD213SS 0x8(%RAX,%RCX,4),%XMM15,%XMM3 |
(17265) 0xbfe7cd VMOVSS %XMM3,0x8(%RAX,%RCX,4) |
(17265) 0xbfe7d3 MOV -0x80(%RBP),%RCX |
(17265) 0xbfe7d7 MOV -0xb8(%RBP),%RDI |
(17265) 0xbfe7de MOV -0xb0(%RBP),%R10 |
(17265) 0xbfe7e5 MOV -0x5c(%RBP),%R8D |
(17265) 0xbfe7e9 MOV -0xa8(%RBP),%R11 |
(17265) 0xbfe7f0 INC %R11 |
(17265) 0xbfe7f3 ADD $0x5,%R8D |
(17265) 0xbfe7f7 MOV -0x88(%RBP),%RAX |
(17265) 0xbfe7fe ADD %RAX,%R10 |
(17265) 0xbfe801 ADD %RAX,%RDI |
(17265) 0xbfe804 CMP -0x98(%RBP),%R11 |
(17265) 0xbfe80b JE bfeecf |
(17265) 0xbfe811 MOV -0x90(%RBP),%RAX |
(17265) 0xbfe818 MOVSXD (%RAX,%R11,4),%RAX |
(17265) 0xbfe81c MOV 0x90(%RCX),%RDX |
(17265) 0xbfe823 VMULSS (%RDX,%RAX,4),%XMM0,%XMM15 |
(17265) 0xbfe828 LEA (%RAX,%RAX,2),%RDX |
(17265) 0xbfe82c CMPB $0,-0x58(%RBP) |
(17265) 0xbfe830 JE bfe847 |
(17265) 0xbfe832 MOV -0x68(%RBP),%RAX |
(17265) 0xbfe836 VXORPS %XMM1,%XMM1,%XMM1 |
(17265) 0xbfe83a VMOVLPS %XMM1,(%RAX,%RDX,4) |
(17265) 0xbfe83f MOVL $0,0x8(%RAX,%RDX,4) |
(17265) 0xbfe847 VUCOMISS %XMM13,%XMM15 |
(17265) 0xbfe84c JNE bfe850 |
(17265) 0xbfe84e JNP bfe7f0 |
(17265) 0xbfe850 MOV -0x48(%RBP),%RAX |
(17265) 0xbfe854 MOV 0x54c(%RAX),%EBX |
(17265) 0xbfe85a MOV 0x550(%RAX),%EAX |
(17265) 0xbfe860 MOV %RAX,-0x40(%RBP) |
(17265) 0xbfe864 MOV 0x100(%RCX),%RAX |
(17265) 0xbfe86b MOV (%RAX,%RDX,4),%R13D |
(17265) 0xbfe86f MOV 0x4(%RAX,%RDX,4),%ECX |
(17265) 0xbfe873 MOV %RCX,-0x30(%RBP) |
(17265) 0xbfe877 MOVSXD 0x8(%RAX,%RDX,4),%R12 |
(17265) 0xbfe87c CMP $0x5,%R9D |
(17265) 0xbfe880 MOV %RDI,-0xb8(%RBP) |
(17265) 0xbfe887 MOV %R10,-0xb0(%RBP) |
(17265) 0xbfe88e MOV %R8D,-0x5c(%RBP) |
(17265) 0xbfe892 MOV %R11,-0xa8(%RBP) |
(17265) 0xbfe899 MOV %RDX,-0xa0(%RBP) |
(17265) 0xbfe8a0 JE bfeae0 |
(17265) 0xbfe8a6 CMP $0x4,%R9D |
(17265) 0xbfe8aa JE bfe4c0 |
(17265) 0xbfe8b0 TEST %R9D,%R9D |
(17265) 0xbfe8b3 JLE bfeebe |
(17265) 0xbfe8b9 MOV %R11,%R8 |
(17265) 0xbfe8bc IMUL -0x70(%RBP),%R8 |
(17265) 0xbfe8c1 SAL $0x2,%R8 |
(17265) 0xbfe8c5 MOV -0x38(%RBP),%RAX |
(17265) 0xbfe8c9 MOV 0x20(%RAX),%RDX |
(17265) 0xbfe8cd ADD %R8,%RDX |
(17265) 0xbfe8d0 MOV %RDX,-0xc8(%RBP) |
(17265) 0xbfe8d7 MOV 0x28(%RAX),%RDX |
(17265) 0xbfe8db ADD %R8,%RDX |
(17265) 0xbfe8de MOV 0x80(%RAX),%R9 |
(17265) 0xbfe8e5 ADD %R8,%R9 |
(17265) 0xbfe8e8 MOV %R9,-0xc0(%RBP) |
(17265) 0xbfe8ef ADD 0x88(%RAX),%R8 |
(17265) 0xbfe8f6 MOV 0x30(%RAX),%R9 |
(17265) 0xbfe8fa MOV 0x90(%RAX),%RCX |
(17265) 0xbfe901 IMUL %EBX,%R13D |
(17265) 0xbfe905 MOV -0x30(%RBP),%R11 |
(17265) 0xbfe909 ADD %R13D,%R11D |
(17265) 0xbfe90c MOV -0x40(%RBP),%RAX |
(17265) 0xbfe910 IMUL %EAX,%R11D |
(17265) 0xbfe914 ADD %R12D,%R11D |
(17265) 0xbfe917 IMUL %EAX,%EBX |
(17265) 0xbfe91a MOV %RBX,-0xd0(%RBP) |
(17265) 0xbfe921 LEA (%R9,%R10,1),%R13 |
(17265) 0xbfe925 LEA (%RCX,%R10,1),%R12 |
(17265) 0xbfe929 ADD %RDI,%RCX |
(17265) 0xbfe92c MOV %RCX,-0xe0(%RBP) |
(17265) 0xbfe933 ADD %RDI,%R9 |
(17265) 0xbfe936 MOV %R9,-0xe8(%RBP) |
(17265) 0xbfe93d VXORPS %XMM17,%XMM17,%XMM17 |
(17265) 0xbfe943 VXORPS %XMM16,%XMM16,%XMM16 |
(17265) 0xbfe949 XOR %ECX,%ECX |
(17265) 0xbfe94b JMP bfe96e |
0xbfe94d NOPL (%RAX) |
(17267) 0xbfe950 MOV -0xd8(%RBP),%RCX |
(17267) 0xbfe957 INC %RCX |
(17267) 0xbfe95a MOV -0x30(%RBP),%R11 |
(17267) 0xbfe95e ADD -0xd0(%RBP),%R11 |
(17267) 0xbfe965 CMP %R9,%RCX |
(17267) 0xbfe968 JE bfe76d |
(17267) 0xbfe96e MOV -0xc8(%RBP),%RAX |
(17267) 0xbfe975 VMOVSS (%RAX,%RCX,4),%XMM18 |
(17267) 0xbfe97c MOV -0xc0(%RBP),%RAX |
(17267) 0xbfe983 MOV %RCX,-0xd8(%RBP) |
(17267) 0xbfe98a VMOVSS (%RAX,%RCX,4),%XMM1 |
(17267) 0xbfe98f VINSERTPS $0x10,%XMM18,%XMM1,%XMM19 |
(17267) 0xbfe996 MOV %R11,-0x30(%RBP) |
(17267) 0xbfe99a XOR %R10D,%R10D |
(17267) 0xbfe99d JMP bfe9e4 |
0xbfe99f NOP |
(17268) 0xbfe9a0 VMOVSS (%RDX,%R10,4),%XMM1 |
(17268) 0xbfe9a6 VMOVSS (%R8,%R10,4),%XMM2 |
(17268) 0xbfe9ac VINSERTPS $0x10,%XMM2,%XMM1,%XMM2 |
(17268) 0xbfe9b2 VMULPS %XMM2,%XMM19,%XMM2 |
(17268) 0xbfe9b8 VBROADCASTSS %XMM21,%XMM3 |
(17268) 0xbfe9be VFMADD231PS %XMM3,%XMM2,%XMM16 |
(17268) 0xbfe9c4 VMULSS %XMM1,%XMM18,%XMM1 |
(17268) 0xbfe9ca VFMADD231SS %XMM1,%XMM20,%XMM17 |
(17268) 0xbfe9d0 INC %R10 |
(17268) 0xbfe9d3 ADD -0x40(%RBP),%R11 |
(17268) 0xbfe9d7 MOV -0x78(%RBP),%R9 |
(17268) 0xbfe9db CMP %R9,%R10 |
(17268) 0xbfe9de JE bfe950 |
(17268) 0xbfe9e4 CMPL $0x4,-0x70(%RBP) |
(17268) 0xbfe9e8 JAE bfea00 |
(17268) 0xbfe9ea VXORPS %XMM20,%XMM20,%XMM20 |
(17268) 0xbfe9f0 XOR %R9D,%R9D |
(17268) 0xbfe9f3 VXORPS %XMM21,%XMM21,%XMM21 |
(17268) 0xbfe9f9 JMP bfea87 |
0xbfe9fe XCHG %AX,%AX |
(17268) 0xbfea00 VXORPS %XMM20,%XMM20,%XMM20 |
(17268) 0xbfea06 XOR %R9D,%R9D |
(17268) 0xbfea09 VXORPS %XMM21,%XMM21,%XMM21 |
(17268) 0xbfea0f NOP |
(17266) 0xbfea10 LEA (%R11,%R9,1),%EAX |
(17266) 0xbfea14 CLTQ |
(17266) 0xbfea16 VMOVSS (%RSI,%RAX,4),%XMM1 |
(17266) 0xbfea1b VFMADD231SS -0xc(%R13,%R9,4),%XMM1,%XMM21 |
(17266) 0xbfea23 VFMADD231SS -0xc(%R12,%R9,4),%XMM1,%XMM20 |
(17266) 0xbfea2b LEA 0x1(%R11,%R9,1),%EAX |
(17266) 0xbfea30 CLTQ |
(17266) 0xbfea32 VMOVSS (%RSI,%RAX,4),%XMM1 |
(17266) 0xbfea37 VFMADD231SS -0x8(%R13,%R9,4),%XMM1,%XMM21 |
(17266) 0xbfea3f VFMADD231SS -0x8(%R12,%R9,4),%XMM1,%XMM20 |
(17266) 0xbfea47 LEA 0x2(%R11,%R9,1),%EAX |
(17266) 0xbfea4c CLTQ |
(17266) 0xbfea4e VMOVSS (%RSI,%RAX,4),%XMM1 |
(17266) 0xbfea53 VFMADD231SS -0x4(%R13,%R9,4),%XMM1,%XMM21 |
(17266) 0xbfea5b VFMADD231SS -0x4(%R12,%R9,4),%XMM1,%XMM20 |
(17266) 0xbfea63 LEA 0x3(%R11,%R9,1),%EAX |
(17266) 0xbfea68 CLTQ |
(17266) 0xbfea6a VMOVSS (%RSI,%RAX,4),%XMM1 |
(17266) 0xbfea6f VFMADD231SS (%R13,%R9,4),%XMM1,%XMM21 |
(17266) 0xbfea77 VFMADD231SS (%R12,%R9,4),%XMM1,%XMM20 |
(17266) 0xbfea7e ADD $0x4,%R9 |
(17266) 0xbfea82 CMP %R9,%R15 |
(17266) 0xbfea85 JNE bfea10 |
(17268) 0xbfea87 TEST %R14,%R14 |
(17268) 0xbfea8a JE bfe9a0 |
(17268) 0xbfea90 MOV -0xe0(%RBP),%RAX |
(17268) 0xbfea97 LEA (%RAX,%R9,4),%RCX |
(17268) 0xbfea9b MOV -0xe8(%RBP),%RAX |
(17268) 0xbfeaa2 LEA (%RAX,%R9,4),%RDI |
(17268) 0xbfeaa6 ADD %R11D,%R9D |
(17268) 0xbfeaa9 XOR %EAX,%EAX |
(17268) 0xbfeaab NOPL (%RAX,%RAX,1) |
(17269) 0xbfeab0 LEA (%R9,%RAX,1),%EBX |
(17269) 0xbfeab4 MOVSXD %EBX,%RBX |
(17269) 0xbfeab7 VMOVSS (%RSI,%RBX,4),%XMM1 |
(17269) 0xbfeabc VFMADD231SS (%RDI,%RAX,4),%XMM1,%XMM21 |
(17269) 0xbfeac3 VFMADD231SS (%RCX,%RAX,4),%XMM1,%XMM20 |
(17269) 0xbfeaca INC %RAX |
(17269) 0xbfeacd CMP %RAX,%R14 |
(17269) 0xbfead0 JNE bfeab0 |
(17268) 0xbfead2 JMP bfe9a0 |
0xbfead7 NOPW (%RAX,%RAX,1) |
(17265) 0xbfeae0 VMOVAPS %XMM9,%XMM0 |
(17265) 0xbfeae4 VMOVAPS %XMM8,%XMM9 |
(17265) 0xbfeae9 VMOVAPS %XMM7,%XMM8 |
(17265) 0xbfeaed VMOVAPS %XMM6,%XMM7 |
(17265) 0xbfeaf1 VMOVAPS %XMM5,%XMM6 |
(17265) 0xbfeaf5 VMOVAPS %XMM4,%XMM5 |
(17265) 0xbfeaf9 VMOVAPS %XMM14,%XMM4 |
(17265) 0xbfeafd MOV %R8D,%R8D |
(17265) 0xbfeb00 MOV %R12D,%EDX |
(17265) 0xbfeb03 LEA (%R11,%R11,4),%ECX |
(17265) 0xbfeb07 LEA (,%RCX,4),%RAX |
(17265) 0xbfeb0f MOV -0x48(%RBP),%RDI |
(17265) 0xbfeb13 MOV 0x568(%RDI),%RDI |
(17265) 0xbfeb1a MOVSXD %EDX,%R11 |
(17265) 0xbfeb1d AND $0x3,%EDX |
(17265) 0xbfeb20 KMOVW (%RDI,%RDX,2),%K1 |
(17265) 0xbfeb25 KMOVW 0xc(%RDI,%RDX,2),%K2 |
(17265) 0xbfeb2b MOV -0x38(%RBP),%R9 |
(17265) 0xbfeb2f MOV 0x30(%R9),%RDI |
(17265) 0xbfeb33 ADD %RAX,%RDI |
(17265) 0xbfeb36 LEA (,%RDX,4),%RDX |
(17265) 0xbfeb3e SUB %RDX,%RDI |
(17265) 0xbfeb41 VINSERTF32X4 $0,(%RDI),%ZMM0,%ZMM16{%K1}{z} |
(17265) 0xbfeb48 SAL $0x2,%R8 |
(17265) 0xbfeb4c VINSERTF32X4 $0,0x10(%RDI),%ZMM0,%ZMM17{%K2}{z} |
(17265) 0xbfeb54 MOV 0x28(%R9),%RDI |
(17265) 0xbfeb58 ADD 0x90(%R9),%RAX |
(17265) 0xbfeb5f SUB %RDX,%RAX |
(17265) 0xbfeb62 VINSERTF32X4 $0,(%RAX),%ZMM0,%ZMM18{%K1}{z} |
(17265) 0xbfeb69 VINSERTF32X4 $0,0x10(%RAX),%ZMM0,%ZMM19{%K2}{z} |
(17265) 0xbfeb71 MOV 0x88(%R9),%R10 |
(17265) 0xbfeb78 LEA (%RSI,%R11,4),%RAX |
(17265) 0xbfeb7c SUB %RDX,%RAX |
(17265) 0xbfeb7f VMOVSS (%RDI,%RCX,4),%XMM20 |
(17265) 0xbfeb86 VMOVSS 0x4(%RDI,%RCX,4),%XMM21 |
(17265) 0xbfeb8e VMOVSS (%R10,%RCX,4),%XMM22 |
(17265) 0xbfeb95 VMOVSS 0x4(%R10,%RCX,4),%XMM23 |
(17265) 0xbfeb9d VMOVSS 0x8(%RDI,%RCX,4),%XMM24 |
(17265) 0xbfeba5 VMOVSS 0x8(%R10,%RCX,4),%XMM25 |
(17265) 0xbfebad VMOVSS 0xc(%RDI,%RCX,4),%XMM26 |
(17265) 0xbfebb5 VMOVSS 0xc(%R10,%RCX,4),%XMM27 |
(17265) 0xbfebbd VMOVSS 0x10(%RDI,%RCX,4),%XMM28 |
(17265) 0xbfebc5 VMOVSS 0x10(%R10,%RCX,4),%XMM29 |
(17265) 0xbfebcd MOV %RBX,%R10 |
(17265) 0xbfebd0 IMUL %R10D,%R13D |
(17265) 0xbfebd4 MOV -0x30(%RBP),%R11 |
(17265) 0xbfebd8 LEA (%R11,%R13,1),%EDX |
(17265) 0xbfebdc MOV -0x40(%RBP),%RDI |
(17265) 0xbfebe0 IMUL %EDI,%EDX |
(17265) 0xbfebe3 IMUL %EDI,%R10D |
(17265) 0xbfebe7 LEA (%R11,%R13,1),%ECX |
(17265) 0xbfebeb ADD $0x4,%ECX |
(17265) 0xbfebee IMUL %EDI,%ECX |
(17265) 0xbfebf1 LEA (%R11,%R13,1),%EBX |
(17265) 0xbfebf5 ADD $0x3,%EBX |
(17265) 0xbfebf8 IMUL %EDI,%EBX |
(17265) 0xbfebfb LEA (%R11,%R13,1),%R12D |
(17265) 0xbfebff ADD $0x2,%R12D |
(17265) 0xbfec03 IMUL %EDI,%R12D |
(17265) 0xbfec07 ADD %R13D,%R11D |
(17265) 0xbfec0a INC %R11D |
(17265) 0xbfec0d IMUL %EDI,%R11D |
(17265) 0xbfec11 MOV 0x80(%R9),%RDI |
(17265) 0xbfec18 ADD %R8,%RDI |
(17265) 0xbfec1b ADD 0x20(%R9),%R8 |
(17265) 0xbfec1f VXORPS %XMM12,%XMM12,%XMM12 |
(17265) 0xbfec24 XOR %R13D,%R13D |
(17265) 0xbfec27 VXORPS %XMM10,%XMM10,%XMM10 |
(17265) 0xbfec2c VXORPS %XMM13,%XMM13,%XMM13 |
(17265) 0xbfec31 NOPW %CS:(%RAX,%RAX,1) |
(17264) 0xbfec40 VMOVSS (%R8,%R13,4),%XMM30 |
(17264) 0xbfec47 VMOVSS (%RDI,%R13,4),%XMM31 |
(17264) 0xbfec4e MOVSXD %EDX,%RDX |
(17264) 0xbfec51 VMOVAPS (%RAX,%RDX,4),%XMM14 |
(17264) 0xbfec56 VMOVAPS 0x10(%RAX,%RDX,4),%XMM11 |
(17264) 0xbfec5c VMULPS %XMM14,%XMM16,%XMM1 |
(17264) 0xbfec62 VMULPS %XMM14,%XMM18,%XMM14 |
(17264) 0xbfec68 VMULPS %XMM11,%XMM17,%XMM2 |
(17264) 0xbfec6e VADDPS %XMM2,%XMM1,%XMM1 |
(17264) 0xbfec72 VMULPS %XMM11,%XMM19,%XMM2 |
(17264) 0xbfec78 VADDPS %XMM2,%XMM14,%XMM2 |
(17264) 0xbfec7c VMULSS %XMM20,%XMM31,%XMM11 |
(17264) 0xbfec82 VBROADCASTSS %XMM11,%XMM11 |
(17264) 0xbfec87 VFMADD213PS %XMM12,%XMM1,%XMM11 |
(17264) 0xbfec8c VMULSS %XMM22,%XMM30,%XMM12 |
(17264) 0xbfec92 VBROADCASTSS %XMM12,%XMM12 |
(17264) 0xbfec97 VFMADD213PS %XMM10,%XMM1,%XMM12 |
(17264) 0xbfec9c VMULSS %XMM20,%XMM30,%XMM1 |
(17264) 0xbfeca2 VBROADCASTSS %XMM1,%XMM1 |
(17264) 0xbfeca7 VFMADD213PS %XMM13,%XMM2,%XMM1 |
(17264) 0xbfecac MOVSXD %R11D,%R11 |
(17264) 0xbfecaf VMOVAPS (%RAX,%R11,4),%XMM2 |
(17264) 0xbfecb5 VMOVAPS 0x10(%RAX,%R11,4),%XMM10 |
(17264) 0xbfecbc VMULPS %XMM2,%XMM16,%XMM13 |
(17264) 0xbfecc2 VMULPS %XMM2,%XMM18,%XMM2 |
(17264) 0xbfecc8 VMULPS %XMM10,%XMM17,%XMM14 |
(17264) 0xbfecce VADDPS %XMM14,%XMM13,%XMM13 |
(17264) 0xbfecd3 VMULPS %XMM10,%XMM19,%XMM10 |
(17264) 0xbfecd9 VADDPS %XMM2,%XMM10,%XMM2 |
(17264) 0xbfecdd VMULSS %XMM21,%XMM31,%XMM10 |
(17264) 0xbfece3 VBROADCASTSS %XMM10,%XMM10 |
(17264) 0xbfece8 VFMADD213PS %XMM11,%XMM13,%XMM10 |
(17264) 0xbfeced VMULSS %XMM23,%XMM30,%XMM11 |
(17264) 0xbfecf3 VBROADCASTSS %XMM11,%XMM11 |
(17264) 0xbfecf8 VFMADD213PS %XMM12,%XMM13,%XMM11 |
(17264) 0xbfecfd VMULSS %XMM21,%XMM30,%XMM12 |
(17264) 0xbfed03 VBROADCASTSS %XMM12,%XMM12 |
(17264) 0xbfed08 VFMADD213PS %XMM1,%XMM2,%XMM12 |
(17264) 0xbfed0d MOVSXD %R12D,%R12 |
(17264) 0xbfed10 VMOVAPS (%RAX,%R12,4),%XMM1 |
(17264) 0xbfed16 VMOVAPS 0x10(%RAX,%R12,4),%XMM2 |
(17264) 0xbfed1d VMULPS %XMM1,%XMM16,%XMM13 |
(17264) 0xbfed23 VMULPS %XMM1,%XMM18,%XMM1 |
(17264) 0xbfed29 VMULPS %XMM2,%XMM17,%XMM14 |
(17264) 0xbfed2f VADDPS %XMM14,%XMM13,%XMM13 |
(17264) 0xbfed34 VMULPS %XMM2,%XMM19,%XMM2 |
(17264) 0xbfed3a VADDPS %XMM2,%XMM1,%XMM1 |
(17264) 0xbfed3e VMULSS %XMM24,%XMM31,%XMM2 |
(17264) 0xbfed44 VBROADCASTSS %XMM2,%XMM2 |
(17264) 0xbfed49 VFMADD213PS %XMM10,%XMM13,%XMM2 |
(17264) 0xbfed4e VMULSS %XMM25,%XMM30,%XMM10 |
(17264) 0xbfed54 VBROADCASTSS %XMM10,%XMM10 |
(17264) 0xbfed59 VFMADD213PS %XMM11,%XMM13,%XMM10 |
(17264) 0xbfed5e VMULSS %XMM24,%XMM30,%XMM11 |
(17264) 0xbfed64 VBROADCASTSS %XMM11,%XMM11 |
(17264) 0xbfed69 VFMADD213PS %XMM12,%XMM1,%XMM11 |
(17264) 0xbfed6e MOVSXD %EBX,%RBX |
(17264) 0xbfed71 VMOVAPS (%RAX,%RBX,4),%XMM1 |
(17264) 0xbfed76 VMOVAPS 0x10(%RAX,%RBX,4),%XMM12 |
(17264) 0xbfed7c VMULPS %XMM1,%XMM16,%XMM13 |
(17264) 0xbfed82 VMULPS %XMM1,%XMM18,%XMM1 |
(17264) 0xbfed88 VMULPS %XMM12,%XMM17,%XMM14 |
(17264) 0xbfed8e VADDPS %XMM14,%XMM13,%XMM13 |
(17264) 0xbfed93 VMULPS %XMM12,%XMM19,%XMM12 |
(17264) 0xbfed99 VADDPS %XMM1,%XMM12,%XMM1 |
(17264) 0xbfed9d VMULSS %XMM26,%XMM31,%XMM12 |
(17264) 0xbfeda3 VBROADCASTSS %XMM12,%XMM14 |
(17264) 0xbfeda8 VFMADD213PS %XMM2,%XMM13,%XMM14 |
(17264) 0xbfedad VMULSS %XMM27,%XMM30,%XMM2 |
(17264) 0xbfedb3 VBROADCASTSS %XMM2,%XMM2 |
(17264) 0xbfedb8 VFMADD213PS %XMM10,%XMM13,%XMM2 |
(17264) 0xbfedbd VMULSS %XMM26,%XMM30,%XMM10 |
(17264) 0xbfedc3 VBROADCASTSS %XMM10,%XMM3 |
(17264) 0xbfedc8 VFMADD213PS %XMM11,%XMM1,%XMM3 |
(17264) 0xbfedcd MOVSXD %ECX,%RCX |
(17264) 0xbfedd0 VMOVAPS (%RAX,%RCX,4),%XMM1 |
(17264) 0xbfedd5 VMOVAPS 0x10(%RAX,%RCX,4),%XMM10 |
(17264) 0xbfeddb VMULPS %XMM1,%XMM16,%XMM11 |
(17264) 0xbfede1 VMULPS %XMM1,%XMM18,%XMM1 |
(17264) 0xbfede7 VMULPS %XMM10,%XMM17,%XMM12 |
(17264) 0xbfeded VADDPS %XMM12,%XMM11,%XMM11 |
(17264) 0xbfedf2 VMULPS %XMM10,%XMM19,%XMM10 |
(17264) 0xbfedf8 VADDPS %XMM1,%XMM10,%XMM1 |
(17264) 0xbfedfc VMULSS %XMM28,%XMM31,%XMM10 |
(17264) 0xbfee02 VBROADCASTSS %XMM10,%XMM12 |
(17264) 0xbfee07 VFMADD213PS %XMM14,%XMM11,%XMM12 |
(17264) 0xbfee0c VMULSS %XMM29,%XMM30,%XMM10 |
(17264) 0xbfee12 VBROADCASTSS %XMM10,%XMM10 |
(17264) 0xbfee17 VFMADD213PS %XMM2,%XMM11,%XMM10 |
(17264) 0xbfee1c VMULSS %XMM28,%XMM30,%XMM2 |
(17264) 0xbfee22 VBROADCASTSS %XMM2,%XMM13 |
(17264) 0xbfee27 VFMADD213PS %XMM3,%XMM1,%XMM13 |
(17264) 0xbfee2c INC %R13 |
(17264) 0xbfee2f ADD %R10D,%EDX |
(17264) 0xbfee32 ADD %R10D,%ECX |
(17264) 0xbfee35 ADD %R10D,%EBX |
(17264) 0xbfee38 ADD %R10D,%R12D |
(17264) 0xbfee3b ADD %R10D,%R11D |
(17264) 0xbfee3e CMP $0x5,%R13 |
(17264) 0xbfee42 JNE bfec40 |
(17265) 0xbfee48 VPERMILPD $0x1,%XMM12,%XMM1 |
(17265) 0xbfee4e VADDPS %XMM1,%XMM12,%XMM1 |
(17265) 0xbfee52 VMOVSHDUP %XMM1,%XMM2 |
(17265) 0xbfee56 VADDPS %XMM2,%XMM1,%XMM1 |
(17265) 0xbfee5a VPERMILPD $0x1,%XMM10,%XMM2 |
(17265) 0xbfee60 VADDPS %XMM2,%XMM10,%XMM2 |
(17265) 0xbfee64 VMOVSHDUP %XMM2,%XMM3 |
(17265) 0xbfee68 VADDPS %XMM3,%XMM2,%XMM2 |
(17265) 0xbfee6c VINSERTPS $0x1c,%XMM2,%XMM1,%XMM16 |
(17265) 0xbfee73 VPERMILPD $0x1,%XMM13,%XMM1 |
(17265) 0xbfee79 VADDPS %XMM1,%XMM13,%XMM1 |
(17265) 0xbfee7d VMOVSHDUP %XMM1,%XMM2 |
(17265) 0xbfee81 VADDSS %XMM2,%XMM1,%XMM17 |
(17265) 0xbfee87 MOV -0x78(%RBP),%R9 |
(17265) 0xbfee8b VMOVAPS %XMM4,%XMM14 |
(17265) 0xbfee8f VMOVAPS %XMM5,%XMM4 |
(17265) 0xbfee93 VMOVAPS %XMM6,%XMM5 |
(17265) 0xbfee97 VMOVAPS %XMM7,%XMM6 |
(17265) 0xbfee9b VMOVAPS %XMM8,%XMM7 |
(17265) 0xbfee9f VMOVAPS %XMM9,%XMM8 |
(17265) 0xbfeea4 VMOVAPS %XMM0,%XMM9 |
(17265) 0xbfeea8 VMOVSS -0x50(%RBP),%XMM0 |
(17265) 0xbfeead VMOVSS -0x4c(%RBP),%XMM31 |
(17265) 0xbfeeb4 VXORPS %XMM13,%XMM13,%XMM13 |
(17265) 0xbfeeb9 JMP bfe76d |
(17265) 0xbfeebe VXORPS %XMM17,%XMM17,%XMM17 |
(17265) 0xbfeec4 VXORPS %XMM16,%XMM16,%XMM16 |
(17265) 0xbfeeca JMP bfe76d |
0xbfeecf ADD $0x40,%RSP |
0xbfeed3 POP %RBX |
0xbfeed4 POP %R12 |
0xbfeed6 POP %R13 |
0xbfeed8 POP %R14 |
0xbfeeda POP %R15 |
0xbfeedc POP %RBP |
0xbfeedd VZEROUPPER |
0xbfeee0 RET |
0xbfeee1 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►74.62+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►13.30+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►12.05+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►80.53+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►13.09+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►6.38+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►68.03+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►23.59+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►8.37+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►60.08+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►27.95+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►11.95+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►46.34+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►35.51+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►18.15+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..51 | pme.cpp:1382 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1381 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_gather.cpp:103-360 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 65 |
nb uops | 59 |
loop length | 288 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 11 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 2.25 | 2.25 | 1.50 | 9.33 | 9.33 | 9.33 | 0.00 | 0.00 | 1.50 | 1.50 | 1.50 | 1.50 |
cycles | 2.50 | 2.50 | 2.25 | 2.25 | 1.50 | 9.33 | 9.33 | 9.33 | 0.00 | 0.00 | 1.50 | 1.50 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.83 |
Dispatch | 9.33 |
Overall L1 | 9.83 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 10% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 9% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 8% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 6% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %EDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R8),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE bfeecf <_Z17gather_f_bsplinesPK9gmx_pme_tPKfbPK11PmeAtomCommPK12splinedata_tf+0xaff> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x4c(%RDX),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
VMOVSS 0x5d0(%RDX),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5dc(%RDX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5e0(%RDX),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5e8(%RDX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5ec(%RDX),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5f0(%RDX),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOVSXD %R9D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0xa0(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VCVTSI2SSL 0x3c(%RDX),%XMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
VCVTSI2SSL 0x40(%RDX),%XMM1,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VCVTSI2SSL 0x44(%RDX),%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
VMOVSS %XMM1,-0x54(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $0x3,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (,%RDI,4),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV $0xc,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VXORPS %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VMOVSS %XMM0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
VMOVSS %XMM31,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
JMP bfe811 <_Z17gather_f_bsplinesPK9gmx_pme_tPKfbPK11PmeAtomCommPK12splinedata_tf+0x441> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
ADD $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_gather.cpp:103-360 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 65 |
nb uops | 59 |
loop length | 288 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 11 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 2.25 | 2.25 | 1.50 | 9.33 | 9.33 | 9.33 | 0.00 | 0.00 | 1.50 | 1.50 | 1.50 | 1.50 |
cycles | 2.50 | 2.50 | 2.25 | 2.25 | 1.50 | 9.33 | 9.33 | 9.33 | 0.00 | 0.00 | 1.50 | 1.50 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.83 |
Dispatch | 9.33 |
Overall L1 | 9.83 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 10% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 9% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 8% |
load | 6% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 6% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %EDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R8),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE bfeecf <_Z17gather_f_bsplinesPK9gmx_pme_tPKfbPK11PmeAtomCommPK12splinedata_tf+0xaff> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x48(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x4c(%RDX),%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
VMOVSS 0x5d0(%RDX),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5dc(%RDX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5e0(%RDX),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5e8(%RDX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5ec(%RDX),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
VMOVSS 0x5f0(%RDX),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
MOVSXD %R9D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0xa0(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VCVTSI2SSL 0x3c(%RDX),%XMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
VCVTSI2SSL 0x40(%RDX),%XMM1,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VCVTSI2SSL 0x44(%RDX),%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (6.3%) |
VMOVSS %XMM1,-0x54(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
MOV %R9D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $0x3,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOV %R9D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
MOV %RDI,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (,%RDI,4),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV $0xc,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VXORPS %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VMOVSS %XMM0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
VMOVSS %XMM31,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (6.3%) |
JMP bfe811 <_Z17gather_f_bsplinesPK9gmx_pme_tPKfbPK11PmeAtomCommPK12splinedata_tf+0x441> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
ADD $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 0.98 | 0.07 | 1.02 | 0 | 0.98 | 0.05 | 1 | 0.01 | 1 | -0 | 1.04 | 0 | 1 | 0 | 0.98 | 0.03 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 38.019992828369 | 4.5609140396118 |
2x1 | 2 | 0.98 | 1.97 | 2 | 19.820003509521 | 4.267894744873 |
4x1 | 4 | 1.02 | 4.06 | 4 | 10.109999656677 | 3.7578749656677 |
8x1 | 8 | 0.98 | 7.87 | 8 | 5.1800031661987 | 3.1208491325378 |
16x1 | 16 | 1 | 15.93 | 16 | 2.6400005817413 | 2.8856344223022 |
32x1 | 12 | 1 | 32.05 | 32 | 3.7099986076355 | 2.3081033229828 |
64x1 | 24 | 1.04 | 66.25 | 64 | 1.8199993371964 | 1.4438563585281 |
128x1 | 44 | 1 | 127.7 | 128 | 1.1699998378754 | 1.7519024610519 |
192x1 | 64 | 0.98 | 187.8 | 192 | 0.86000007390976 | 1.5030028820038 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼gather_f_bsplines(gmx_pme_t const*, float const*, bool, PmeAtomComm const*, splinedata_t const*, float)– | 1.50 | 0.20 |
▼Loop 17265 - pme_gather.cpp:103-348 - libgromacs_mpi.so.9.0.0– | 1.50 | 0.60 |
○Loop 17264 - pme_gather.cpp:247-260 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
▼Loop 17267 - pme_gather.cpp:103-124 - libgromacs_mpi.so.9.0.0– | 0.00 | 0.00 |
▼Loop 17268 - pme_gather.cpp:109-124 - libgromacs_mpi.so.9.0.0– | 0.00 | 0.00 |
○Loop 17266 - pme_gather.cpp:116-120 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
○Loop 17269 - pme_gather.cpp:116-120 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |