| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 | OMP1x13 | OMP2x13 | OMP1x26 | OMP2x26 |
picongpu:0x45d3b9 | void picongpu::Particles<pmacc::meta::String<(char)101>, boo... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 361.610 | 190.061 | 182.679 | 95.390 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 2.859 | 6.717 | 1.765 | 3.926 | 2.858 | 6.717 | 1.765 | 3.926 | 0.79 | 3.53 | 0.97 | 4.12 | 1.008 | 1.037 | 1.010 | 1.043 | 1.002 | 1.008 | 1.002 | 1.008 |
picongpu:0x47bfd9 | void picongpu::Particles<pmacc::meta::String<(char)105>, boo... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 357.161 | 186.102 | 179.936 | 93.632 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 2.675 | 5.286 | 1.412 | 2.917 | 2.675 | 5.285 | 1.412 | 2.917 | 0.75 | 2.84 | 0.78 | 3.12 | 1.008 | 1.029 | 1.008 | 1.032 | 1.002 | 1.006 | 1.002 | 1.006 |
picongpu:0x4cdc58 | _ZNK8picongpu13currentSolver7DepositINS0_8strategy23StridedC... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 347.808 | 191.717 | 190.338 | 113.608 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 18.968 | 25.758 | 25.433 | 30.777 | 18.962 | 25.751 | 25.427 | 30.772 | 5.45 | 13.4 | 13.4 | 27.1 | 1.058 | 1.155 | 1.154 | 1.372 | 1.013 | 1.032 | 1.033 | 1.068 |
picongpu:0x4d2958 | _ZNK8picongpu13currentSolver7DepositINS0_8strategy23StridedC... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 331.284 | 183.077 | 181.890 | 107.914 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 20.016 | 25.936 | 25.811 | 29.252 | 20.009 | 25.930 | 25.806 | 29.247 | 6.04 | 14.2 | 14.2 | 27.1 | 1.064 | 1.165 | 1.165 | 1.372 | 1.014 | 1.032 | 1.033 | 1.065 |
picongpu:0x461a78 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 19.339 | 11.822 | 10.986 | 7.275 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 1.659 | 2.213 | 1.914 | 2.351 | 1.653 | 2.208 | 1.909 | 2.346 | 8.58 | 18.7 | 17.4 | 32.3 | 1.094 | 1.230 | 1.211 | 1.477 | 1.001 | 1.003 | 1.002 | 1.005 |
picongpu:0x47f858 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 7.587 | 4.783 | 4.394 | 3.173 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 40.5 E3 | 0.608 | 0.895 | 0.732 | 1.010 | 0.603 | 0.890 | 0.728 | 1.005 | 8.01 | 18.7 | 16.7 | 31.8 | 1.087 | 1.230 | 1.200 | 1.467 | 1.000 | 1.001 | 1.001 | 1.002 |
picongpu:0x4fa0f0 | pmacc::TaskSetValue<pmacc::math::Vector<float, 3u, pmacc::ma... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.728 | 1.416 | 1.417 | 0.757 | 1.71 E3 | 1.71 E3 | 1.71 E3 | 1.71 E3 | 74.8 E-3 | 62.7 E-3 | 73.0 E-3 | 57.2 E-3 | 74.5 E-3 | 62.4 E-3 | 72.8 E-3 | 56.9 E-3 | 2.74 | 4.43 | 5.15 | 7.55 | 1.028 | 1.046 | 1.054 | 1.082 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4a0819 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 1.419 | 0.885 | 0.778 | 0.463 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 0.122 | 0.157 | 75.4 E-3 | 75.6 E-3 | 0.121 | 0.156 | 75.2 E-3 | 75.4 E-3 | 8.57 | 17.7 | 9.68 | 16.3 | 1.094 | 1.215 | 1.107 | 1.195 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4b0319 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 1.343 | 0.786 | 0.721 | 0.434 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 0.117 | 0.128 | 74.1 E-3 | 86.2 E-3 | 0.117 | 0.128 | 73.9 E-3 | 86.0 E-3 | 8.74 | 16.3 | 10.3 | 19.8 | 1.096 | 1.195 | 1.115 | 1.248 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4c20c0 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 1.012 | 0.694 | 0.553 | 0.392 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 19.8 E-3 | 0.124 | 20.3 E-3 | 0.103 | 19.5 E-3 | 0.124 | 20.1 E-3 | 0.103 | 1.95 | 17.9 | 3.68 | 26.4 | 1.020 | 1.218 | 1.038 | 1.359 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4bd000 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.787 | 0.600 | 0.471 | 0.407 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 19.4 E-3 | 0.117 | 30.4 E-3 | 0.118 | 19.2 E-3 | 0.117 | 30.2 E-3 | 0.117 | 2.47 | 19.5 | 6.46 | 28.9 | 1.025 | 1.242 | 1.069 | 1.407 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4bf8a0 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.768 | 0.771 | 0.447 | 0.406 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 0.170 | 0.395 | 0.128 | 0.189 | 0.170 | 0.395 | 0.128 | 0.188 | 22.2 | 51.2 | 28.7 | 46.4 | 1.285 | 2.051 | 1.402 | 1.866 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4ea9e0 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.747 | 0.699 | 0.401 | 0.390 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 0.244 | 0.375 | 0.138 | 0.215 | 0.244 | 0.375 | 0.138 | 0.215 | 32.6 | 53.6 | 34.5 | 55.1 | 1.484 | 2.157 | 1.526 | 2.228 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x507239 | void pmacc::fields::operations::AddExchangeToBorder::operato... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.648 | 0.568 | 0.480 | 0.516 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 0.133 | 0.194 | 0.147 | 0.238 | 0.129 | 0.190 | 0.143 | 0.234 | 20.5 | 34.1 | 30.7 | 46.2 | 1.259 | 1.518 | 1.444 | 1.858 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4e6500 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.606 | 0.381 | 0.322 | 0.214 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 11.3 E-3 | 28.9 E-3 | 15.8 E-3 | 31.2 E-3 | 11.1 E-3 | 28.7 E-3 | 15.6 E-3 | 31.0 E-3 | 1.86 | 7.59 | 4.91 | 14.6 | 1.019 | 1.082 | 1.052 | 1.171 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x509dd9 | void pmacc::fields::operations::CopyGuardToExchange::operato... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.599 | 0.525 | 0.434 | 0.449 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 0.130 | 0.181 | 0.131 | 0.202 | 0.126 | 0.176 | 0.126 | 0.197 | 21.7 | 34.4 | 30.1 | 44.9 | 1.277 | 1.524 | 1.431 | 1.816 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4dc8b3 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.540 | 0.569 | 0.307 | 0.320 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 0.235 | 0.357 | 0.137 | 0.198 | 0.235 | 0.357 | 0.137 | 0.198 | 43.5 | 62.7 | 44.7 | 61.7 | 1.770 | 2.679 | 1.808 | 2.611 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4d8693 | void picongpu::fields::maxwellSolver::fdtd::FDTDBase<picongp... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.457 | 0.302 | 0.256 | 0.178 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 1.50 E3 | 9.67 E-3 | 26.7 E-3 | 12.3 E-3 | 32.7 E-3 | 9.48 E-3 | 26.5 E-3 | 12.1 E-3 | 32.5 E-3 | 2.11 | 8.86 | 4.79 | 18.3 | 1.022 | 1.097 | 1.050 | 1.225 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x41e7ee | void cupla::cupla_omp2_seq_sync::KernelExecutor<pmacc::Kerne... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.408 | 1.033 | 0.629 | 1.219 | 156 E3 | 156 E3 | 156 E3 | 156 E3 | 0.170 | 0.372 | 0.243 | 0.442 | 0.153 | 0.343 | 0.221 | 0.412 | 41.6 | 36.0 | 38.6 | 36.3 | 1.711 | 1.562 | 1.628 | 1.569 | 1.000 | 1.000 | 1.000 | 1.001 |
picongpu:0x4a502c | pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::meta:... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.293 | 0.443 | 0.302 | 0.468 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 0.117 | 0.190 | 0.140 | 0.225 | 0.113 | 0.184 | 0.135 | 0.219 | 39.8 | 42.9 | 46.3 | 48.1 | 1.662 | 1.751 | 1.861 | 1.928 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x49eab3 | pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::meta:... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.281 | 0.444 | 0.315 | 0.543 | 9.00 E3 | 9.00 E3 | 9.00 E3 | 9.00 E3 | 0.172 | 0.298 | 0.215 | 0.405 | 0.171 | 0.297 | 0.214 | 0.404 | 61.1 | 67.1 | 68.2 | 74.5 | 2.571 | 3.043 | 3.142 | 3.925 | 1.000 | 1.000 | 1.000 | 1.001 |
picongpu:0x4b484c | pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::meta:... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.186 | 0.356 | 0.216 | 0.410 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 39.0 E3 | 76.2 E-3 | 0.135 | 86.0 E-3 | 0.160 | 72.0 E-3 | 0.129 | 81.0 E-3 | 0.152 | 41.0 | 38.0 | 39.8 | 38.9 | 1.694 | 1.614 | 1.660 | 1.636 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x567ac9 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.171 | 85.4 E-3 | 85.3 E-3 | 43.3 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 732 E-6 | 460 E-6 | 365 E-6 | 768 E-6 | 732 E-6 | 460 E-6 | 365 E-6 | 768 E-6 | 0.43 | 0.54 | 0.43 | 1.77 | 1.004 | 1.005 | 1.004 | 1.018 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5c7f09 | alpaka::TaskKernelCpuOmp2Blocks<std::integral_constant<unsig... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.129 | 75.6 E-3 | 67.2 E-3 | 39.5 E-3 | 152 | 152 | 152 | 152 | 25.7 E-3 | 15.6 E-3 | 13.8 E-3 | 8.85 E-3 | 25.7 E-3 | 15.6 E-3 | 13.7 E-3 | 8.83 E-3 | 20.0 | 20.7 | 20.5 | 22.4 | 1.250 | 1.261 | 1.258 | 1.289 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4ae5b3 | pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::meta:... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.128 | 0.183 | 0.147 | 0.217 | 1.75 E3 | 1.75 E3 | 1.75 E3 | 1.75 E3 | 80.7 E-3 | 0.133 | 0.104 | 0.168 | 80.5 E-3 | 0.133 | 0.104 | 0.168 | 63.0 | 72.9 | 71.0 | 77.4 | 2.700 | 3.689 | 3.449 | 4.422 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x55fe59 | void picongpu::Particles<pmacc::meta::String<(char)105>, boo... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.121 | 0.170 | 0.104 | 0.163 | 1.00 | 1.00 | 1.00 | 1.00 | 26.2 E-3 | 18.2 E-3 | 10.5 E-3 | 17.2 E-3 | 26.2 E-3 | 18.2 E-3 | 10.5 E-3 | 17.2 E-3 | 21.7 | 10.8 | 10.1 | 10.6 | 1.278 | 1.121 | 1.112 | 1.118 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x55b039 | void picongpu::Particles<pmacc::meta::String<(char)101>, boo... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 0.119 | 0.156 | 0.105 | 0.160 | 1.00 | 1.00 | 1.00 | 1.00 | 4.97 E-3 | 16.7 E-3 | 8.92 E-3 | 17.6 E-3 | 4.97 E-3 | 16.7 E-3 | 8.92 E-3 | 17.6 E-3 | 4.16 | 10.7 | 8.53 | 11.0 | 1.043 | 1.120 | 1.093 | 1.123 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x43ab5d | picongpu::Simulation::fillSimulation() | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 19.2 E-3 | 9.60 E-3 | 9.64 E-3 | 4.92 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 105 E-6 | 53.6 E-6 | 44.1 E-6 | 101 E-6 | 105 E-6 | 53.4 E-6 | 43.9 E-6 | 100 E-6 | 0.55 | 0.56 | 0.46 | 2.04 | 1.006 | 1.006 | 1.005 | 1.021 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x564469 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 15.2 E-3 | 8.89 E-3 | 8.80 E-3 | 4.50 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 3.89 E-3 | 3.20 E-3 | 3.08 E-3 | 1.66 E-3 | 3.89 E-3 | 3.20 E-3 | 3.08 E-3 | 1.66 E-3 | 25.6 | 36.1 | 35.0 | 36.8 | 1.344 | 1.564 | 1.538 | 1.583 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x566509 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 15.2 E-3 | 8.86 E-3 | 8.83 E-3 | 4.50 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 3.83 E-3 | 3.16 E-3 | 3.07 E-3 | 1.64 E-3 | 3.83 E-3 | 3.16 E-3 | 3.07 E-3 | 1.64 E-3 | 25.2 | 35.7 | 34.8 | 36.5 | 1.338 | 1.555 | 1.534 | 1.575 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x511890 | pmacc::TaskSetValue<pmacc::math::Vector<float, 1u, pmacc::ma... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 15.0 E-3 | 8.50 E-3 | 8.16 E-3 | 6.61 E-3 | 157 | 157 | 157 | 157 | 756 E-6 | 818 E-6 | 692 E-6 | 1.54 E-3 | 735 E-6 | 800 E-6 | 675 E-6 | 1.52 E-3 | 5.04 | 9.62 | 8.49 | 23.3 | 1.053 | 1.106 | 1.093 | 1.304 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x565a29 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 10.4 E-3 | 7.66 E-3 | 7.63 E-3 | 4.54 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.86 E-3 | 3.35 E-3 | 3.31 E-3 | 2.34 E-3 | 1.86 E-3 | 3.35 E-3 | 3.31 E-3 | 2.34 E-3 | 17.9 | 43.7 | 43.4 | 51.5 | 1.219 | 1.776 | 1.766 | 2.060 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x563989 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 10.3 E-3 | 7.59 E-3 | 7.59 E-3 | 4.46 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.89 E-3 | 3.35 E-3 | 3.35 E-3 | 2.33 E-3 | 1.88 E-3 | 3.35 E-3 | 3.35 E-3 | 2.33 E-3 | 18.2 | 44.2 | 44.2 | 52.3 | 1.223 | 1.792 | 1.791 | 2.095 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x566fe9 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 10.1 E-3 | 7.58 E-3 | 7.57 E-3 | 4.42 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.99 E-3 | 3.58 E-3 | 3.50 E-3 | 2.45 E-3 | 1.99 E-3 | 3.58 E-3 | 3.50 E-3 | 2.45 E-3 | 19.7 | 47.2 | 46.2 | 55.5 | 1.245 | 1.895 | 1.857 | 2.246 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x564f49 | _ZNK6alpaka23TaskKernelCpuOmp2BlocksISt17integral_constantIm... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 10.1 E-3 | 7.48 E-3 | 7.57 E-3 | 4.42 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 2.07 E-3 | 3.52 E-3 | 3.52 E-3 | 2.47 E-3 | 2.07 E-3 | 3.52 E-3 | 3.52 E-3 | 2.47 E-3 | 20.6 | 47.1 | 46.4 | 55.8 | 1.259 | 1.889 | 1.866 | 2.262 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x56b1b9 | void picongpu::particles::manipulate<picongpu::particles::ma... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 6.97 E-3 | 3.34 E-3 | 3.52 E-3 | 1.72 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 96.5 E-6 | 96.5 E-6 | 56.2 E-6 | 57.6 E-6 | 96.4 E-6 | 96.3 E-6 | 56.0 E-6 | 57.4 E-6 | 1.39 | 2.89 | 1.60 | 3.35 | 1.014 | 1.030 | 1.016 | 1.035 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x56d9b9 | void picongpu::particles::manipulate<picongpu::particles::ma... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 6.93 E-3 | 3.51 E-3 | 3.52 E-3 | 1.81 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 132 E-6 | 67.6 E-6 | 58.1 E-6 | 48.1 E-6 | 132 E-6 | 67.4 E-6 | 57.9 E-6 | 48.0 E-6 | 1.91 | 1.92 | 1.65 | 2.65 | 1.019 | 1.020 | 1.017 | 1.027 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x55d1d9 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 5.60 E-3 | 2.74 E-3 | 2.80 E-3 | 1.42 E-3 | 2.00 | 2.00 | 2.00 | 2.00 | 1.65 E-3 | 829 E-6 | 845 E-6 | 434 E-6 | 1.65 E-3 | 828 E-6 | 844 E-6 | 434 E-6 | 29.5 | 30.3 | 30.2 | 30.5 | 1.418 | 1.434 | 1.433 | 1.440 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x561a59 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 5.47 E-3 | 3.03 E-3 | 2.82 E-3 | 1.45 E-3 | 2.00 | 2.00 | 2.00 | 2.00 | 1.56 E-3 | 1.04 E-3 | 845 E-6 | 444 E-6 | 1.56 E-3 | 1.04 E-3 | 845 E-6 | 444 E-6 | 28.5 | 34.4 | 30.0 | 30.6 | 1.399 | 1.524 | 1.429 | 1.440 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x531830 | pmacc::TaskSetValue<pmacc::ExchangeMemoryIndex<unsigned int,... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.87 E-3 | 3.43 E-3 | 2.73 E-3 | 3.35 E-3 | 104 | 104 | 104 | 104 | 516 E-6 | 776 E-6 | 695 E-6 | 982 E-6 | 505 E-6 | 763 E-6 | 681 E-6 | 967 E-6 | 18.0 | 22.6 | 25.4 | 29.3 | 1.219 | 1.292 | 1.340 | 1.415 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x4eeed9 | pmacc::random::RNGProvider<3u, pmacc::random::methods::Alpak... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.52 E-3 | 1.26 E-3 | 1.26 E-3 | 646 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 10.4 E-6 | 8.30 E-6 | 7.83 E-6 | 13.6 E-6 | 10.2 E-6 | 8.12 E-6 | 7.64 E-6 | 13.4 E-6 | 0.41 | 0.66 | 0.62 | 2.10 | 1.004 | 1.007 | 1.006 | 1.021 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x53dc90 | pmacc::TaskSetValue<pmacc::Frame<pmacc::detail::OperatorCrea... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.41 E-3 | 2.71 E-3 | 2.24 E-3 | 2.87 E-3 | 52.0 | 52.0 | 52.0 | 52.0 | 419 E-6 | 541 E-6 | 488 E-6 | 724 E-6 | 413 E-6 | 534 E-6 | 482 E-6 | 717 E-6 | 17.4 | 20.0 | 21.8 | 25.2 | 1.210 | 1.250 | 1.279 | 1.338 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5502f0 | pmacc::TaskSetValue<pmacc::Frame<pmacc::detail::OperatorCrea... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.37 E-3 | 2.69 E-3 | 2.23 E-3 | 2.71 E-3 | 52.0 | 52.0 | 52.0 | 52.0 | 417 E-6 | 583 E-6 | 530 E-6 | 717 E-6 | 411 E-6 | 576 E-6 | 524 E-6 | 710 E-6 | 17.6 | 21.7 | 23.8 | 26.5 | 1.213 | 1.277 | 1.313 | 1.360 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x526ed0 | pmacc::TaskSetValue<alpaka::rand::engine::cpu::TinyMersenneT... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.22 E-3 | 1.29 E-3 | 2.21 E-3 | 1.56 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 377 E-6 | 173 E-6 | 418 E-6 | 349 E-6 | 377 E-6 | 172 E-6 | 418 E-6 | 349 E-6 | 17.0 | 13.4 | 18.9 | 22.4 | 1.204 | 1.154 | 1.234 | 1.288 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5dd090 | pmacc::TaskSetValue<char, 1u, true>::init() | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 2.18 E-3 | 3.45 E-3 | 3.38 E-3 | 4.76 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 79.1 E-6 | 85.7 E-6 | 112 E-6 | 141 E-6 | 78.7 E-6 | 85.3 E-6 | 112 E-6 | 141 E-6 | 3.62 | 2.48 | 3.32 | 2.96 | 1.038 | 1.025 | 1.034 | 1.031 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5d6c49 | alpaka::TaskKernelCpuOmp2Blocks<std::integral_constant<unsig... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 640 E-6 | 1.05 E-3 | 627 E-6 | 1.12 E-3 | 152 | 152 | 152 | 152 | 243 E-6 | 445 E-6 | 245 E-6 | 460 E-6 | 227 E-6 | 428 E-6 | 229 E-6 | 442 E-6 | 38.0 | 42.5 | 39.1 | 41.1 | 1.612 | 1.740 | 1.643 | 1.698 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x538e50 | pmacc::TaskSetValue<pmacc::SuperCell<pmacc::Frame<pmacc::det... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 142 E-6 | 107 E-6 | 131 E-6 | 107 E-6 | 3.00 | 3.00 | 3.00 | 3.00 | 8.99 E-6 | 16.3 E-6 | 38.0 E-6 | 30.2 E-6 | 8.67 E-6 | 16.0 E-6 | 37.7 E-6 | 29.8 E-6 | 6.34 | 15.3 | 29.1 | 28.3 | 1.068 | 1.181 | 1.410 | 1.394 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x54c230 | _ZN5pmacc12TaskSetValueINS_9SuperCellINS_5FrameINS_6detail29... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 139 E-6 | 104 E-6 | 96.0 E-6 | 111 E-6 | 3.00 | 3.00 | 3.00 | 3.00 | 8.58 E-6 | 18.6 E-6 | 15.9 E-6 | 35.4 E-6 | 8.21 E-6 | 18.2 E-6 | 15.5 E-6 | 35.1 E-6 | 6.19 | 17.9 | 16.5 | 32.0 | 1.066 | 1.217 | 1.198 | 1.471 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x53fa99 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 29.1 E-6 | 22.2 E-6 | 20.7 E-6 | 38.8 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 5.97 E-6 | 5.46 E-6 | 5.58 E-6 | 21.1 E-6 | 5.85 E-6 | 5.30 E-6 | 5.44 E-6 | 20.9 E-6 | 20.5 | 24.5 | 26.9 | 54.3 | 1.257 | 1.325 | 1.368 | 2.188 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x536cf4 | pmacc::HostBuffer<pmacc::SuperCell<pmacc::Frame<pmacc::detai... | HostBuffer.hpp:161 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 18.0 E-6 | 35.0 E-6 | 20.6 E-6 | 52.3 E-6 | 2.00 | 2.00 | 2.00 | 2.00 | 3.69 E-6 | 17.3 E-6 | 5.75 E-6 | 32.8 E-6 | 3.46 E-6 | 17.1 E-6 | 5.52 E-6 | 32.5 E-6 | 20.5 | 49.5 | 27.9 | 62.7 | 1.258 | 1.978 | 1.387 | 2.683 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x551db9 | void pmacc::ParticlesBase<pmacc::ParticleDescription<pmacc::... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 17.4 E-6 | 17.4 E-6 | 14.7 E-6 | 30.0 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 3.10 E-6 | 4.85 E-6 | 4.06 E-6 | 13.4 E-6 | 2.97 E-6 | 4.66 E-6 | 3.91 E-6 | 13.2 E-6 | 17.9 | 27.8 | 27.6 | 44.6 | 1.218 | 1.386 | 1.382 | 1.806 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x54a0d4 | _ZN5pmacc10HostBufferINS_9SuperCellINS_5FrameINS_6detail29Op... | HostBuffer.hpp:161 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 15.7 E-6 | 16.8 E-6 | 17.8 E-6 | 38.9 E-6 | 2.00 | 2.00 | 2.00 | 2.00 | 5.13 E-6 | 4.81 E-6 | 6.92 E-6 | 25.7 E-6 | 4.81 E-6 | 4.25 E-6 | 6.57 E-6 | 25.1 E-6 | 32.7 | 28.6 | 38.7 | 66.0 | 1.486 | 1.401 | 1.633 | 2.940 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5541d9 | pmacc::IdProvider<3u>::setNextId(unsigned long) | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 8.48 E-6 | 17.0 E-6 | 11.2 E-6 | 38.8 E-6 | 2.00 | 2.00 | 2.00 | 2.00 | 2.63 E-6 | 6.56 E-6 | 4.88 E-6 | 25.6 E-6 | 2.38 E-6 | 6.10 E-6 | 4.63 E-6 | 25.3 E-6 | 31.0 | 38.6 | 43.6 | 66.1 | 1.450 | 1.628 | 1.772 | 2.946 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5553f9 | pmacc::IdProvider<3u>::getNewIdHost() | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 6.35 E-6 | 24.6 E-6 | 8.77 E-6 | 44.7 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 2.81 E-6 | 17.6 E-6 | 4.48 E-6 | 32.4 E-6 | 2.68 E-6 | 17.4 E-6 | 4.30 E-6 | 32.2 E-6 | 44.3 | 71.4 | 51.1 | 72.4 | 1.796 | 3.500 | 2.045 | 3.625 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x5714b0 | pmacc::TaskSetValue<picongpu::particles::debyeLength::Estima... | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 5.72 E-6 | 23.8 E-6 | 11.2 E-6 | 61.6 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 2.61 E-6 | 16.0 E-6 | 5.99 E-6 | 44.7 E-6 | 2.48 E-6 | 15.7 E-6 | 5.84 E-6 | 44.5 E-6 | 45.7 | 67.1 | 53.5 | 72.6 | 1.842 | 3.035 | 2.152 | 3.648 | 1.000 | 1.000 | 1.000 | 1.000 |
picongpu:0x558970 | pmacc::TaskSetValue<unsigned long long, 1u, true>::init() | TaskKernelCpuOmp2Blocks.hpp:855 | 0 | 0 | runtime | parallel | 13 | 26 | 26 | 52 | 5.71 E-6 | 11.1 E-6 | 6.37 E-6 | 22.2 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 3.24 E-6 | 5.41 E-6 | 3.15 E-6 | 13.5 E-6 | 3.09 E-6 | 5.17 E-6 | 2.99 E-6 | 13.3 E-6 | 56.8 | 49.0 | 49.5 | 60.7 | 2.315 | 1.959 | 1.979 | 2.542 | 1.000 | 1.000 | 1.000 | 1.000 |