Function: copy_fftgrid_to_pmegrid(gmx_pme_t*, float const*, float*, int, int, int) | Module: libgromacs_mpi.so.9.0.0 | Source: pme_grid.cpp:367-418 [...] | Coverage (incl. loops): 0.05% | (excl. loops): 0.00% |
---|
Function: copy_fftgrid_to_pmegrid(gmx_pme_t*, float const*, float*, int, int, int) | Module: libgromacs_mpi.so.9.0.0 | Source: pme_grid.cpp:367-418 [...] | Coverage (incl. loops): 0.05% | (excl. loops): 0.00% |
---|
/home/eoseret/gromacs-2024.2/src/gromacs/ewald/pme_grid.cpp: 367 - 418 |
-------------------------------------------------------------------------------- |
367: { |
[...] |
382: gmx_parallel_3dfft_real_limits( |
383: pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset, local_fft_size); |
[...] |
392: ixy0 = ((thread)*local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread; |
393: ixy1 = ((thread + 1) * local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread; |
394: |
395: for (ixy = ixy0; ixy < ixy1; ixy++) |
396: { |
397: ix = ixy / local_fft_ndata[YY]; |
398: iy = ixy - ix * local_fft_ndata[YY]; |
399: |
400: pmeidx = (ix * local_pme_size[YY] + iy) * local_pme_size[ZZ]; |
401: fftidx = (ix * local_fft_size[YY] + iy) * local_fft_size[ZZ]; |
402: for (iz = 0; iz < local_fft_ndata[ZZ]; iz++) |
403: { |
404: pmegrid[pmeidx + iz] = fftgrid[fftidx + iz]; |
[...] |
418: return 0; |
0xc005c0 PUSH %RBP |
0xc005c1 MOV %RSP,%RBP |
0xc005c4 PUSH %R15 |
0xc005c6 PUSH %R14 |
0xc005c8 PUSH %R13 |
0xc005ca PUSH %R12 |
0xc005cc PUSH %RBX |
0xc005cd SUB $0x78,%RSP |
0xc005d1 MOV %R9D,%R13D |
0xc005d4 MOV %R8D,%R12D |
0xc005d7 MOV %RDX,%RBX |
0xc005da MOV %RSI,%R14 |
0xc005dd MOV %RDI,%R15 |
0xc005e0 MOV 0x580(%RDI),%RAX |
0xc005e7 MOVSXD %ECX,%RCX |
0xc005ea MOV (%RAX,%RCX,8),%RDI |
0xc005ee LEA -0x4c(%RBP),%RSI |
0xc005f2 LEA -0x98(%RBP),%RDX |
0xc005f9 LEA -0x8c(%RBP),%RCX |
0xc00600 CALL fb54e0 <@plt_start@+0x12030> |
0xc00605 MOV -0x48(%RBP),%EAX |
0xc00608 MOV -0x4c(%RBP),%ESI |
0xc0060b MOV %EAX,-0x34(%RBP) |
0xc0060e IMUL %EAX,%ESI |
0xc00611 MOV %ESI,%EAX |
0xc00613 IMUL %R13D,%EAX |
0xc00617 CLTD |
0xc00618 IDIV %R12D |
0xc0061b MOV %EAX,%ECX |
0xc0061d INC %R13D |
0xc00620 IMUL %ESI,%R13D |
0xc00624 MOV %R13D,%EAX |
0xc00627 CLTD |
0xc00628 IDIV %R12D |
0xc0062b CMP %EAX,%ECX |
0xc0062d JGE c00896 |
0xc00633 MOV -0x44(%RBP),%R8D |
0xc00637 TEST %R8D,%R8D |
0xc0063a JLE c00896 |
0xc00640 MOV %EAX,%ESI |
0xc00642 MOV 0x54c(%R15),%EAX |
0xc00649 MOV %EAX,-0x30(%RBP) |
0xc0064c MOVSXD 0x550(%R15),%RAX |
0xc00653 MOV %RAX,-0x80(%RBP) |
0xc00657 MOV -0x88(%RBP),%EAX |
0xc0065d MOV %EAX,-0x2c(%RBP) |
0xc00660 MOVSXD -0x84(%RBP),%RAX |
0xc00667 MOV %RAX,-0x78(%RBP) |
0xc0066b MOV %R8D,%EAX |
0xc0066e AND $-0x20,%EAX |
0xc00671 MOV %RAX,-0x40(%RBP) |
0xc00675 MOV %R8D,%R9D |
0xc00678 AND $-0x4,%R9D |
0xc0067c LEA 0x60(%R14),%RAX |
0xc00680 MOV %RAX,-0x60(%RBP) |
0xc00684 LEA 0x60(%RBX),%RAX |
0xc00688 MOV %RAX,-0x58(%RBP) |
0xc0068c LEA (,%R8,4),%R12 |
0xc00694 AND $-0x80,%R12 |
0xc00698 LEA 0x1c(%RBX),%RAX |
0xc0069c MOV %RAX,-0x70(%RBP) |
0xc006a0 LEA 0x1c(%R14),%RAX |
0xc006a4 MOV %RAX,-0x68(%RBP) |
0xc006a8 JMP c006ba |
0xc006aa NOPW (%RAX,%RAX,1) |
(17305) 0xc006b0 INC %ECX |
(17305) 0xc006b2 CMP %ESI,%ECX |
(17305) 0xc006b4 JE c00896 |
(17305) 0xc006ba MOV %ECX,%EAX |
(17305) 0xc006bc CLTD |
(17305) 0xc006bd IDIVL -0x34(%RBP) |
(17305) 0xc006c0 MOV -0x2c(%RBP),%EDI |
(17305) 0xc006c3 IMUL %EAX,%EDI |
(17305) 0xc006c6 IMUL -0x30(%RBP),%EAX |
(17305) 0xc006ca ADD %EDX,%EAX |
(17305) 0xc006cc CLTQ |
(17305) 0xc006ce IMUL -0x80(%RBP),%RAX |
(17305) 0xc006d3 ADD %EDX,%EDI |
(17305) 0xc006d5 MOVSXD %EDI,%RDX |
(17305) 0xc006d8 IMUL -0x78(%RBP),%RDX |
(17305) 0xc006dd CMP $0x4,%R8D |
(17305) 0xc006e1 JB c006fb |
(17305) 0xc006e3 LEA (%RBX,%RAX,4),%RDI |
(17305) 0xc006e7 LEA (%R14,%RDX,4),%R10 |
(17305) 0xc006eb SUB %R10,%RDI |
(17305) 0xc006ee CMP $0x80,%RDI |
(17305) 0xc006f5 JAE c007d0 |
(17305) 0xc006fb XOR %R13D,%R13D |
(17305) 0xc006fe MOV %R8D,%R15D |
(17305) 0xc00701 SUB %R13D,%R15D |
(17305) 0xc00704 MOV %R13,%R10 |
(17305) 0xc00707 NOT %R10 |
(17305) 0xc0070a ADD %R8,%R10 |
(17305) 0xc0070d AND $0x7,%R15 |
(17305) 0xc00711 JE c00734 |
(17305) 0xc00713 LEA (%RBX,%RAX,4),%R11 |
(17305) 0xc00717 LEA (%R14,%RDX,4),%RDI |
(17305) 0xc0071b NOPL (%RAX,%RAX,1) |
(17307) 0xc00720 VMOVSS (%RDI,%R13,4),%XMM0 |
(17307) 0xc00726 VMOVSS %XMM0,(%R11,%R13,4) |
(17307) 0xc0072c INC %R13 |
(17307) 0xc0072f DEC %R15 |
(17307) 0xc00732 JNE c00720 |
(17305) 0xc00734 CMP $0x7,%R10 |
(17305) 0xc00738 JB c006b0 |
(17305) 0xc0073e MOV -0x70(%RBP),%RDI |
(17305) 0xc00742 LEA (%RDI,%RAX,4),%RAX |
(17305) 0xc00746 MOV -0x68(%RBP),%RDI |
(17305) 0xc0074a LEA (%RDI,%RDX,4),%RDX |
(17305) 0xc0074e XCHG %AX,%AX |
(17306) 0xc00750 VMOVSS -0x1c(%RDX,%R13,4),%XMM0 |
(17306) 0xc00757 VMOVSS %XMM0,-0x1c(%RAX,%R13,4) |
(17306) 0xc0075e VMOVSS -0x18(%RDX,%R13,4),%XMM0 |
(17306) 0xc00765 VMOVSS %XMM0,-0x18(%RAX,%R13,4) |
(17306) 0xc0076c VMOVSS -0x14(%RDX,%R13,4),%XMM0 |
(17306) 0xc00773 VMOVSS %XMM0,-0x14(%RAX,%R13,4) |
(17306) 0xc0077a VMOVSS -0x10(%RDX,%R13,4),%XMM0 |
(17306) 0xc00781 VMOVSS %XMM0,-0x10(%RAX,%R13,4) |
(17306) 0xc00788 VMOVSS -0xc(%RDX,%R13,4),%XMM0 |
(17306) 0xc0078f VMOVSS %XMM0,-0xc(%RAX,%R13,4) |
(17306) 0xc00796 VMOVSS -0x8(%RDX,%R13,4),%XMM0 |
(17306) 0xc0079d VMOVSS %XMM0,-0x8(%RAX,%R13,4) |
(17306) 0xc007a4 VMOVSS -0x4(%RDX,%R13,4),%XMM0 |
(17306) 0xc007ab VMOVSS %XMM0,-0x4(%RAX,%R13,4) |
(17306) 0xc007b2 VMOVSS (%RDX,%R13,4),%XMM0 |
(17306) 0xc007b8 VMOVSS %XMM0,(%RAX,%R13,4) |
(17306) 0xc007be ADD $0x8,%R13 |
(17306) 0xc007c2 CMP %R13,%R8 |
(17306) 0xc007c5 JNE c00750 |
(17305) 0xc007c7 JMP c006b0 |
0xc007cc NOPL (%RAX) |
(17305) 0xc007d0 LEA (,%RAX,4),%R10 |
(17305) 0xc007d8 LEA (,%RDX,4),%R15 |
(17305) 0xc007e0 CMP $0x20,%R8D |
(17305) 0xc007e4 JAE c007eb |
(17305) 0xc007e6 XOR %R11D,%R11D |
(17305) 0xc007e9 JMP c0085b |
(17305) 0xc007eb MOV -0x60(%RBP),%RDI |
(17305) 0xc007ef LEA (%RDI,%R15,1),%R11 |
(17305) 0xc007f3 MOV -0x58(%RBP),%RDI |
(17305) 0xc007f7 LEA (%RDI,%R10,1),%R13 |
(17305) 0xc007fb XOR %EDI,%EDI |
(17305) 0xc007fd NOPL (%RAX) |
(17308) 0xc00800 VMOVUPS -0x60(%R11,%RDI,1),%YMM0 |
(17308) 0xc00807 VMOVUPS -0x40(%R11,%RDI,1),%YMM1 |
(17308) 0xc0080e VMOVUPS -0x20(%R11,%RDI,1),%YMM2 |
(17308) 0xc00815 VMOVUPS (%R11,%RDI,1),%YMM3 |
(17308) 0xc0081b VMOVUPS %YMM0,-0x60(%R13,%RDI,1) |
(17308) 0xc00822 VMOVUPS %YMM1,-0x40(%R13,%RDI,1) |
(17308) 0xc00829 VMOVUPS %YMM2,-0x20(%R13,%RDI,1) |
(17308) 0xc00830 VMOVUPS %YMM3,(%R13,%RDI,1) |
(17308) 0xc00837 SUB $-0x80,%RDI |
(17308) 0xc0083b CMP %RDI,%R12 |
(17308) 0xc0083e JNE c00800 |
(17305) 0xc00840 CMP %R8,-0x40(%RBP) |
(17305) 0xc00844 JE c006b0 |
(17305) 0xc0084a MOV -0x40(%RBP),%R13 |
(17305) 0xc0084e MOV %R13,%R11 |
(17305) 0xc00851 TEST $0x1c,%R8B |
(17305) 0xc00855 JE c006fe |
(17305) 0xc0085b ADD %R14,%R15 |
(17305) 0xc0085e ADD %RBX,%R10 |
(17305) 0xc00861 NOPW %CS:(%RAX,%RAX,1) |
(17309) 0xc00870 VMOVUPS (%R15,%R11,4),%XMM0 |
(17309) 0xc00876 VMOVUPS %XMM0,(%R10,%R11,4) |
(17309) 0xc0087c ADD $0x4,%R11 |
(17309) 0xc00880 CMP %R11,%R9 |
(17309) 0xc00883 JNE c00870 |
(17305) 0xc00885 MOV %R9,%R13 |
(17305) 0xc00888 CMP %R8,%R9 |
(17305) 0xc0088b JE c006b0 |
(17305) 0xc00891 JMP c006fe |
0xc00896 XOR %EAX,%EAX |
0xc00898 ADD $0x78,%RSP |
0xc0089c POP %RBX |
0xc0089d POP %R12 |
0xc0089f POP %R13 |
0xc008a1 POP %R14 |
0xc008a3 POP %R15 |
0xc008a5 POP %RBP |
0xc008a6 VZEROUPPER |
0xc008a9 RET |
0xc008aa NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►73.55+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►13.41+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►13.04+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►81.80+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.82+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►5.38+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | CpuPpLongRangeNonbondeds::calc[...] | force.cpp:261 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2073 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►66.41+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►23.02+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►10.57+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►57.26+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►30.65+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►12.10+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►45.52+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►35.13+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
►19.35+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined..49 | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | gmx_pme_do(gmx_pme_t*, gmx::Ar[...] | pme.cpp:1276 | libgromacs_mpi.so.9.0.0 |
○ | gmx_pmeonly(gmx_pme_t**, t_com[...] | stl_vector.h:1169 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2330 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_grid.cpp:367-418 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 77 |
nb uops | 75 |
loop length | 270 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 12.50 cycles |
front end | 12.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.75 | 6.75 | 6.50 | 6.50 | 4.50 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.75 | 6.75 | 6.50 | 6.50 | 4.50 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.50 |
Dispatch | 6.75 |
DIV/SQRT | 12.00 |
Overall L1 | 12.50 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 8% |
load | 6% |
store | 10% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV %R8D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x580(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSXD %ECX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RAX,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x4c(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x98(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x8c(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL fb54e0 <@plt_start@+0x12030> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x48(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOV %EAX,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
CLTD | scal (6.3%) | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 | scal (6.3%) |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
INC %R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
IMUL %ESI,%R13D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CLTD | scal (6.3%) | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 | scal (6.3%) |
CMP %EAX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JGE c00896 <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0x2d6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x44(%RBP),%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
TEST %R8D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c00896 <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0x2d6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV 0x54c(%R15),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD 0x550(%R15),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0x88(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x84(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $-0x4,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
LEA 0x60(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x60(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (,%R8,4),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $-0x80,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA 0x1c(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x1c(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP c006ba <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run 1x1
Source file and lines | pme_grid.cpp:367-418 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 77 |
nb uops | 75 |
loop length | 270 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 12.50 cycles |
front end | 12.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.75 | 6.75 | 6.50 | 6.50 | 4.50 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.75 | 6.75 | 6.50 | 6.50 | 4.50 | 6.67 | 6.67 | 6.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.50 |
Dispatch | 6.75 |
DIV/SQRT | 12.00 |
Overall L1 | 12.50 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 8% |
load | 6% |
store | 10% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV %R8D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x580(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSXD %ECX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RAX,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x4c(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x98(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x8c(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL fb54e0 <@plt_start@+0x12030> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x48(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOV %EAX,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
CLTD | scal (6.3%) | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 | scal (6.3%) |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
INC %R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
IMUL %ESI,%R13D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (6.3%) |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CLTD | scal (6.3%) | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 | scal (6.3%) |
CMP %EAX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JGE c00896 <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0x2d6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x44(%RBP),%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
TEST %R8D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JLE c00896 <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0x2d6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
MOV 0x54c(%R15),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD 0x550(%R15),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0x88(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVSXD -0x84(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
AND $-0x4,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
LEA 0x60(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x60(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (,%R8,4),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $-0x80,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA 0x1c(%RBX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x1c(%R14),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP c006ba <_Z23copy_fftgrid_to_pmegridP9gmx_pme_tPKfPfiii+0xfa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 0.97 | 0 | 1.08 | 0 | 0.97 | 0 | 0.68 | 0.05 | 1.01 | -0 | 1.07 | -0 | 0.94 | 0 | 1.05 | -0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 1.3349996805191 | 0.16014783084393 |
2x1 | 2 | 0.97 | 1.95 | 2 | 0.68499994277954 | 0.15112480521202 |
4x1 | 4 | 1.08 | 4.34 | 4 | 0.33999994397163 | 0.12350573390722 |
8x1 | 8 | 0.97 | 7.74 | 8 | 0.20499996840954 | 0.11153087764978 |
16x1 | 16 | 0.68 | 10.91 | 16 | 0.20999999344349 | 0.14801986515522 |
32x1 | 12 | 1.01 | 32.21 | 32 | 0.15999998152256 | 0.080660462379456 |
64x1 | 24 | 1.07 | 68.66 | 64 | 0.094999998807907 | 0.048917975276709 |
128x1 | 44 | 0.94 | 120.76 | 128 | 0.054999995976686 | 0.065049447119236 |
192x1 | 63 | 1.05 | 201.38 | 192 | 0.050000000745058 | 0.049214690923691 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼copy_fftgrid_to_pmegrid(gmx_pme_t*, float const*, float*, int, int, int)– | 0.05 | 0.01 |
▼Loop 17305 - pme_grid.cpp:395-404 - libgromacs_mpi.so.9.0.0– | 0.01 | 0.01 |
○Loop 17308 - pme_grid.cpp:402-404 - libgromacs_mpi.so.9.0.0 | 0.04 | 0.02 |
○Loop 17306 - pme_grid.cpp:402-404 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
○Loop 17307 - pme_grid.cpp:402-404 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |
○Loop 17309 - pme_grid.cpp:402-404 - libgromacs_mpi.so.9.0.0 | 0.00 | 0.00 |