Function: .omp_outlined..6 | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 3.38% |
---|
Function: .omp_outlined..6 | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 3.38% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 117 - 125 |
-------------------------------------------------------------------------------- |
117: #pragma omp parallel for simd collapse(2) |
118: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
119: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
120: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
121: double post_mass_s = pre_mass_s + mass_flux_x(i, j) - mass_flux_x(i + 1, j + 0); |
122: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 1, j + 0)) / post_mass_s; |
123: double advec_vol_s = pre_vol(i, j) + vol_flux_x(i, j) - vol_flux_x(i + 1, j + 0); |
124: density1(i, j) = post_mass_s / advec_vol_s; |
125: energy1(i, j) = post_ener_s; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/update_tile_halo_kernel.cpp: 82 - 84 |
-------------------------------------------------------------------------------- |
82: #pragma omp parallel for simd |
83: for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { |
84: for (int j = 0; j < depth; ++j) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 156 - 158 |
-------------------------------------------------------------------------------- |
156: #pragma omp parallel for simd |
157: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
158: for (int j = 0; j < depth; ++j) { |
0x2334b0 PUSH %RBP |
0x2334b1 MOV %RSP,%RBP |
0x2334b4 PUSH %R15 |
0x2334b6 PUSH %R14 |
0x2334b8 PUSH %R13 |
0x2334ba PUSH %R12 |
0x2334bc PUSH %RBX |
0x2334bd SUB $0x98,%RSP |
0x2334c4 MOV (%RCX),%R15D |
0x2334c7 MOV (%RDX),%R14D |
0x2334ca INC %R14D |
0x2334cd ADD $0x2,%R15D |
0x2334d1 SUB %R14D,%R15D |
0x2334d4 JLE 233b8f |
0x2334da MOV (%R9),%R13D |
0x2334dd MOV (%R8),%R12D |
0x2334e0 INC %R12D |
0x2334e3 ADD $0x2,%R13D |
0x2334e7 CMP %R12D,%R13D |
0x2334ea JLE 233b8f |
0x2334f0 SUB %R12D,%R13D |
0x2334f3 MOV (%RDI),%ESI |
0x2334f5 MOVQ $0,-0x80(%RBP) |
0x2334fd MOVQ $0x1,-0xb8(%RBP) |
0x233508 MOVL $0,-0x50(%RBP) |
0x23350f IMUL %R13,%R15 |
0x233513 DEC %R15 |
0x233516 MOV %R15,-0x48(%RBP) |
0x23351a SUB $0x8,%RSP |
0x23351e LEA -0x50(%RBP),%RCX |
0x233522 LEA -0xb8(%RBP),%RAX |
0x233529 LEA 0x2d570(%RIP),%RDI |
0x233530 LEA -0x80(%RBP),%R8 |
0x233534 LEA -0x48(%RBP),%R9 |
0x233538 MOV %ESI,-0x4c(%RBP) |
0x23353b MOV $0x22,%EDX |
0x233540 PUSH $0x1 |
0x233542 PUSH $0x1 |
0x233544 PUSH %RAX |
0x233545 CALL 25f740 <@plt_start@+0x530> |
0x23354a ADD $0x20,%RSP |
0x23354e MOV -0x48(%RBP),%RAX |
0x233552 MOV -0x80(%RBP),%RCX |
0x233556 CMP %R15,%RAX |
0x233559 CMOVL %RAX,%R15 |
0x23355d MOV %R15,-0x48(%RBP) |
0x233561 CMP %R15,%RCX |
0x233564 JG 233b7d |
0x23356a MOV 0x10(%RBP),%R8 |
0x23356e MOV 0x20(%RBP),%RDI |
0x233572 MOV 0x28(%RBP),%RSI |
0x233576 MOV 0x18(%RBP),%R9 |
0x23357a MOV 0x38(%RBP),%RAX |
0x23357e MOV 0x30(%RBP),%RDX |
0x233582 MOV %R14D,-0x30(%RBP) |
0x233586 MOV %R12D,-0x2c(%RBP) |
0x23358a MOV %R13,-0x70(%RBP) |
0x23358e MOV (%R8),%R10 |
0x233591 MOV (%RDI),%R11 |
0x233594 MOV 0x10(%RDI),%RBX |
0x233598 MOV (%RSI),%RDI |
0x23359b MOV 0x10(%R8),%R8 |
0x23359f MOV %R10,-0x68(%RBP) |
0x2335a3 MOV (%R9),%R10 |
0x2335a6 MOV 0x10(%R9),%R9 |
0x2335aa MOV %R11,-0x60(%RBP) |
0x2335ae MOV %RDI,-0x58(%RBP) |
0x2335b2 MOV 0x10(%RDX),%RDI |
0x2335b6 MOV 0x10(%RAX),%R11 |
0x2335ba MOV %R9,-0x78(%RBP) |
0x2335be MOV 0x10(%RSI),%R9 |
0x2335c2 MOV (%RDX),%RSI |
0x2335c5 MOV (%RAX),%RDX |
0x2335c8 MOV %R15,%RAX |
0x2335cb SUB %RCX,%RAX |
0x2335ce MOV %R10,-0x38(%RBP) |
0x2335d2 INC %RAX |
0x2335d5 MOV %RSI,-0xa0(%RBP) |
0x2335dc MOV %RDX,-0x98(%RBP) |
0x2335e3 CMP $0x4,%RAX |
0x2335e7 JB 233a90 |
0x2335ed VPBROADCASTD %R14D,%XMM2 |
0x2335f3 VPBROADCASTD %R12D,%XMM3 |
0x2335f9 MOV -0x68(%RBP),%R12 |
0x2335fd MOV -0x38(%RBP),%R14 |
0x233601 VPBROADCASTQ %RCX,%YMM0 |
0x233607 VPADDQ -0x21c0f(%RIP),%YMM0,%YMM0 |
0x23360f VPBROADCASTQ -0x21e58(%RIP),%YMM12 |
0x233618 MOV %RAX,%R10 |
0x23361b AND $-0x4,%R10 |
0x23361f VPBROADCASTQ %R13,%YMM1 |
0x233625 VPBROADCASTQ %RSI,%YMM8 |
0x23362b VPBROADCASTQ %RDX,%YMM9 |
0x233631 VPCMPEQD %XMM11,%XMM11,%XMM11 |
0x233636 MOV %RAX,-0x90(%RBP) |
0x23363d ADD %R10,%RCX |
0x233640 VEXTRACTI128 $0x1,%YMM1,%XMM10 |
0x233646 MOV %R10,-0x88(%RBP) |
0x23364d VPBROADCASTQ %R12,%YMM4 |
0x233653 VPBROADCASTQ %R14,%YMM5 |
0x233659 MOV -0x60(%RBP),%R12 |
0x23365d MOV -0x58(%RBP),%R14 |
0x233661 VPBROADCASTQ %R12,%YMM6 |
0x233667 VPBROADCASTQ %R14,%YMM7 |
0x23366d NOPL (%RAX) |
(243) 0x233670 VEXTRACTI128 $0x1,%YMM0,%XMM13 |
(243) 0x233676 VPEXTRQ $0x1,%XMM10,%RSI |
(243) 0x23367c MOV %R10,-0x40(%RBP) |
(243) 0x233680 VMOVQ %XMM10,%R13 |
(243) 0x233685 VPEXTRQ $0x1,%XMM13,%RAX |
(243) 0x23368b CQTO |
(243) 0x23368d IDIV %RSI |
(243) 0x233690 MOV %RAX,%RSI |
(243) 0x233693 VMOVQ %XMM13,%RAX |
(243) 0x233698 VMOVQ %RSI,%XMM13 |
(243) 0x23369d VPEXTRQ $0x1,%XMM1,%RSI |
(243) 0x2336a3 CQTO |
(243) 0x2336a5 IDIV %R13 |
(243) 0x2336a8 VMOVQ %RAX,%XMM14 |
(243) 0x2336ad VPEXTRQ $0x1,%XMM0,%RAX |
(243) 0x2336b3 CQTO |
(243) 0x2336b5 VPUNPCKLQDQ %XMM13,%XMM14,%XMM13 |
(243) 0x2336ba IDIV %RSI |
(243) 0x2336bd VMOVQ %XMM1,%RSI |
(243) 0x2336c2 VMOVQ %RAX,%XMM14 |
(243) 0x2336c7 VMOVQ %XMM0,%RAX |
(243) 0x2336cc CQTO |
(243) 0x2336ce IDIV %RSI |
(243) 0x2336d1 VMOVQ %RAX,%XMM15 |
(243) 0x2336d6 VPUNPCKLQDQ %XMM14,%XMM15,%XMM14 |
(243) 0x2336db VINSERTI128 $0x1,%XMM13,%YMM14,%YMM13 |
(243) 0x2336e1 VPMOVQD %YMM13,%XMM14 |
(243) 0x2336e7 VPMULLQ %YMM1,%YMM13,%YMM13 |
(243) 0x2336ed VPSUBQ %YMM13,%YMM0,%YMM13 |
(243) 0x2336f2 VPADDQ %YMM0,%YMM12,%YMM0 |
(243) 0x2336f6 VPMOVQD %YMM13,%XMM13 |
(243) 0x2336fc VPADDD %XMM2,%XMM14,%XMM15 |
(243) 0x233700 VPMOVSXDQ %XMM15,%YMM15 |
(243) 0x233705 VPMULLQ %YMM15,%YMM6,%YMM19 |
(243) 0x23370b VPADDD %XMM13,%XMM3,%XMM17 |
(243) 0x233711 VPMULLQ %YMM15,%YMM4,%YMM13 |
(243) 0x233717 VPMOVSXDQ %XMM17,%YMM14 |
(243) 0x23371d VPSUBD %XMM11,%XMM17,%XMM17 |
(243) 0x233723 VPADDQ %YMM14,%YMM13,%YMM13 |
(243) 0x233728 VMOVQ %XMM13,%RAX |
(243) 0x23372d VPEXTRQ $0x1,%XMM13,%R10 |
(243) 0x233733 VEXTRACTI128 $0x1,%YMM13,%XMM13 |
(243) 0x233739 VMOVQ %XMM13,%RDX |
(243) 0x23373e VMOVSD (%R8,%RAX,8),%XMM16 |
(243) 0x233745 VPEXTRQ $0x1,%XMM13,%RSI |
(243) 0x23374b MOV %R10,-0xa8(%RBP) |
(243) 0x233752 MOV %RAX,-0xb0(%RBP) |
(243) 0x233759 VMOVSD (%R8,%RDX,8),%XMM13 |
(243) 0x23375f VMOVHPD (%R8,%R10,8),%XMM16,%XMM16 |
(243) 0x233766 MOV -0x78(%RBP),%R10 |
(243) 0x23376a VMOVHPD (%R8,%RSI,8),%XMM13,%XMM13 |
(243) 0x233770 VINSERTF32X4 $0x1,%XMM13,%YMM16,%YMM13 |
(243) 0x233777 VPMULLQ %YMM15,%YMM5,%YMM16 |
(243) 0x23377d VPADDQ %YMM14,%YMM16,%YMM16 |
(243) 0x233783 VMOVQ %XMM16,%RAX |
(243) 0x233789 VPEXTRQ $0x1,%XMM16,%R13 |
(243) 0x233790 VEXTRACTI32X4 $0x1,%YMM16,%XMM16 |
(243) 0x233797 VMOVQ %XMM16,%R12 |
(243) 0x23379d VMOVSD (%R10,%RAX,8),%XMM18 |
(243) 0x2337a4 VPEXTRQ $0x1,%XMM16,%R14 |
(243) 0x2337ab VMOVSD (%R10,%R12,8),%XMM16 |
(243) 0x2337b2 VMOVHPD (%R10,%R13,8),%XMM18,%XMM18 |
(243) 0x2337b9 VMOVHPD (%R10,%R14,8),%XMM16,%XMM16 |
(243) 0x2337c0 MOV -0xb0(%RBP),%R10 |
(243) 0x2337c7 VINSERTF32X4 $0x1,%XMM16,%YMM18,%YMM16 |
(243) 0x2337ce VPADDQ %YMM14,%YMM19,%YMM18 |
(243) 0x2337d4 VMOVQ %XMM18,%RAX |
(243) 0x2337da VPEXTRQ $0x1,%XMM18,%R14 |
(243) 0x2337e1 VEXTRACTI32X4 $0x1,%YMM18,%XMM18 |
(243) 0x2337e8 VMOVQ %XMM18,%R12 |
(243) 0x2337ee VMOVSD (%RBX,%RAX,8),%XMM20 |
(243) 0x2337f5 VPEXTRQ $0x1,%XMM18,%R13 |
(243) 0x2337fc VMULPD %YMM16,%YMM13,%YMM13 |
(243) 0x233802 VMOVSD (%RBX,%R12,8),%XMM18 |
(243) 0x233809 VMOVHPD (%RBX,%R14,8),%XMM20,%XMM20 |
(243) 0x233810 VMOVHPD (%RBX,%R13,8),%XMM18,%XMM18 |
(243) 0x233817 VINSERTF32X4 $0x1,%XMM18,%YMM20,%YMM18 |
(243) 0x23381e VADDPD %YMM18,%YMM13,%YMM20 |
(243) 0x233824 VPMOVSXDQ %XMM17,%YMM18 |
(243) 0x23382a VPADDQ %YMM18,%YMM19,%YMM17 |
(243) 0x233830 VMOVQ %XMM17,%RAX |
(243) 0x233836 VPEXTRQ $0x1,%XMM17,%R14 |
(243) 0x23383d VEXTRACTI32X4 $0x1,%YMM17,%XMM17 |
(243) 0x233844 VMOVQ %XMM17,%R12 |
(243) 0x23384a VMOVSD (%RBX,%RAX,8),%XMM19 |
(243) 0x233851 VPEXTRQ $0x1,%XMM17,%R13 |
(243) 0x233858 VMOVSD (%RBX,%R12,8),%XMM17 |
(243) 0x23385f VMOVHPD (%RBX,%R14,8),%XMM19,%XMM19 |
(243) 0x233866 VMOVHPD (%RBX,%R13,8),%XMM17,%XMM17 |
(243) 0x23386d VINSERTF32X4 $0x1,%XMM17,%YMM19,%YMM17 |
(243) 0x233874 VPMULLQ %YMM15,%YMM7,%YMM19 |
(243) 0x23387a VSUBPD %YMM17,%YMM20,%YMM17 |
(243) 0x233880 VPMULLQ %YMM15,%YMM8,%YMM20 |
(243) 0x233886 VPMULLQ %YMM15,%YMM9,%YMM15 |
(243) 0x23388c VPADDQ %YMM14,%YMM19,%YMM19 |
(243) 0x233892 VMOVQ %XMM19,%RAX |
(243) 0x233898 VPEXTRQ $0x1,%XMM19,%R13 |
(243) 0x23389f VEXTRACTI32X4 $0x1,%YMM19,%XMM19 |
(243) 0x2338a6 VPADDQ %YMM14,%YMM20,%YMM21 |
(243) 0x2338ac VPADDQ %YMM18,%YMM20,%YMM20 |
(243) 0x2338b2 VPADDQ %YMM14,%YMM15,%YMM14 |
(243) 0x2338b7 VPADDQ %YMM18,%YMM15,%YMM15 |
(243) 0x2338bd VEXTRACTI32X4 $0x1,%YMM21,%XMM22 |
(243) 0x2338c4 VMOVQ %XMM21,%R14 |
(243) 0x2338ca VEXTRACTI32X4 $0x1,%YMM15,%XMM18 |
(243) 0x2338d1 VMOVQ %XMM22,%R12 |
(243) 0x2338d7 VMOVSD (%RDI,%R12,8),%XMM23 |
(243) 0x2338de VPEXTRQ $0x1,%XMM22,%R12 |
(243) 0x2338e5 VMOVHPD (%RDI,%R12,8),%XMM23,%XMM22 |
(243) 0x2338ec VPEXTRQ $0x1,%XMM21,%R12 |
(243) 0x2338f3 VMOVSD (%RDI,%R14,8),%XMM21 |
(243) 0x2338fa VEXTRACTI32X4 $0x1,%YMM20,%XMM23 |
(243) 0x233901 VMOVQ %XMM20,%R14 |
(243) 0x233907 VMOVHPD (%RDI,%R12,8),%XMM21,%XMM21 |
(243) 0x23390e VMOVQ %XMM23,%R12 |
(243) 0x233914 VMOVSD (%RDI,%R12,8),%XMM24 |
(243) 0x23391b VPEXTRQ $0x1,%XMM23,%R12 |
(243) 0x233922 VMOVHPD (%RDI,%R12,8),%XMM24,%XMM23 |
(243) 0x233929 VEXTRACTI32X4 $0x1,%YMM14,%XMM24 |
(243) 0x233930 VPEXTRQ $0x1,%XMM20,%R12 |
(243) 0x233937 VMOVSD (%RDI,%R14,8),%XMM20 |
(243) 0x23393e VMOVQ %XMM24,%R14 |
(243) 0x233944 VMOVHPD (%RDI,%R12,8),%XMM20,%XMM20 |
(243) 0x23394b VPEXTRQ $0x1,%XMM24,%R12 |
(243) 0x233952 VMOVSD (%R11,%R14,8),%XMM24 |
(243) 0x233959 VMOVQ %XMM14,%R14 |
(243) 0x23395e VMOVHPD (%R11,%R12,8),%XMM24,%XMM24 |
(243) 0x233965 VPEXTRQ $0x1,%XMM14,%R12 |
(243) 0x23396b VMOVSD (%R11,%R14,8),%XMM14 |
(243) 0x233971 VMOVQ %XMM18,%R14 |
(243) 0x233977 VMOVHPD (%R11,%R12,8),%XMM14,%XMM14 |
(243) 0x23397d VPEXTRQ $0x1,%XMM18,%R12 |
(243) 0x233984 VMOVSD (%R11,%R14,8),%XMM18 |
(243) 0x23398b VMOVQ %XMM15,%R14 |
(243) 0x233990 VMOVHPD (%R11,%R12,8),%XMM18,%XMM18 |
(243) 0x233997 VPEXTRQ $0x1,%XMM15,%R12 |
(243) 0x23399d VMOVSD (%R11,%R14,8),%XMM15 |
(243) 0x2339a3 VMOVQ %XMM19,%R14 |
(243) 0x2339a9 VMOVHPD (%R11,%R12,8),%XMM15,%XMM15 |
(243) 0x2339af VPEXTRQ $0x1,%XMM19,%R12 |
(243) 0x2339b6 VINSERTF32X4 $0x1,%XMM24,%YMM14,%YMM14 |
(243) 0x2339bd VADDPD %YMM14,%YMM16,%YMM14 |
(243) 0x2339c3 VMOVSD (%R9,%R14,8),%XMM16 |
(243) 0x2339ca VINSERTF32X4 $0x1,%XMM18,%YMM15,%YMM15 |
(243) 0x2339d1 VMOVSD (%R9,%RAX,8),%XMM18 |
(243) 0x2339d8 VMOVHPD (%R9,%R12,8),%XMM16,%XMM16 |
(243) 0x2339df VMOVHPD (%R9,%R13,8),%XMM18,%XMM18 |
(243) 0x2339e6 VSUBPD %YMM15,%YMM14,%YMM14 |
(243) 0x2339eb VINSERTF32X4 $0x1,%XMM22,%YMM21,%YMM15 |
(243) 0x2339f2 VDIVPD %YMM14,%YMM17,%YMM14 |
(243) 0x2339f8 VINSERTF32X4 $0x1,%XMM16,%YMM18,%YMM16 |
(243) 0x2339ff VFMADD231PD %YMM16,%YMM13,%YMM15 |
(243) 0x233a05 VINSERTF32X4 $0x1,%XMM23,%YMM20,%YMM13 |
(243) 0x233a0c VSUBPD %YMM13,%YMM15,%YMM13 |
(243) 0x233a11 VDIVPD %YMM17,%YMM13,%YMM13 |
(243) 0x233a17 VMOVLPD %XMM14,(%R8,%R10,8) |
(243) 0x233a1d MOV -0xa8(%RBP),%R10 |
(243) 0x233a24 VMOVHPD %XMM14,(%R8,%R10,8) |
(243) 0x233a2a MOV -0x40(%RBP),%R10 |
(243) 0x233a2e VEXTRACTF128 $0x1,%YMM14,%XMM14 |
(243) 0x233a34 VMOVLPD %XMM14,(%R8,%RDX,8) |
(243) 0x233a3a VMOVHPD %XMM14,(%R8,%RSI,8) |
(243) 0x233a40 ADD $-0x4,%R10 |
(243) 0x233a44 VMOVLPD %XMM13,(%R9,%RAX,8) |
(243) 0x233a4a VMOVHPD %XMM13,(%R9,%R13,8) |
(243) 0x233a50 VEXTRACTF128 $0x1,%YMM13,%XMM13 |
(243) 0x233a56 VMOVLPD %XMM13,(%R9,%R14,8) |
(243) 0x233a5c VMOVHPD %XMM13,(%R9,%R12,8) |
(243) 0x233a62 JNE 233670 |
0x233a68 MOV -0x88(%RBP),%RAX |
0x233a6f MOV -0x30(%RBP),%R14D |
0x233a73 MOV -0x2c(%RBP),%R12D |
0x233a77 MOV -0x70(%RBP),%R13 |
0x233a7b MOV -0x38(%RBP),%R10 |
0x233a7f CMP %RAX,-0x90(%RBP) |
0x233a86 JE 233b7d |
0x233a8c NOPL (%RAX) |
(242) 0x233a90 MOV %RCX,%RAX |
(242) 0x233a93 CQTO |
(242) 0x233a95 IDIV %R13 |
(242) 0x233a98 MOV -0x58(%RBP),%R13 |
(242) 0x233a9c ADD %R12D,%EDX |
(242) 0x233a9f MOVSXD %EDX,%RDX |
(242) 0x233aa2 ADD %R14D,%EAX |
(242) 0x233aa5 MOV %R10,%R14 |
(242) 0x233aa8 MOV -0x78(%RBP),%R10 |
(242) 0x233aac MOVSXD %EAX,%RSI |
(242) 0x233aaf MOV -0x68(%RBP),%RAX |
(242) 0x233ab3 IMUL %RSI,%R14 |
(242) 0x233ab7 IMUL %RSI,%R13 |
(242) 0x233abb ADD %RDX,%R14 |
(242) 0x233abe ADD %RDX,%R13 |
(242) 0x233ac1 VMOVSD (%R10,%R14,8),%XMM0 |
(242) 0x233ac7 MOV -0x60(%RBP),%R14 |
(242) 0x233acb IMUL %RSI,%RAX |
(242) 0x233acf VMOVSD (%R9,%R13,8),%XMM2 |
(242) 0x233ad5 ADD %RDX,%RAX |
(242) 0x233ad8 VMULSD (%R8,%RAX,8),%XMM0,%XMM1 |
(242) 0x233ade IMUL %RSI,%R14 |
(242) 0x233ae2 LEA 0x1(%R14,%RDX,1),%R10 |
(242) 0x233ae7 ADD %RDX,%R14 |
(242) 0x233aea VMOVSD (%RBX,%R14,8),%XMM3 |
(242) 0x233af0 MOV -0x30(%RBP),%R14D |
(242) 0x233af4 MOV %R10,-0x40(%RBP) |
(242) 0x233af8 MOV -0xa0(%RBP),%R10 |
(242) 0x233aff VUNPCKLPD %XMM0,%XMM1,%XMM0 |
(242) 0x233b03 IMUL %RSI,%R10 |
(242) 0x233b07 IMUL -0x98(%RBP),%RSI |
(242) 0x233b0f LEA (%R10,%RDX,1),%R12 |
(242) 0x233b13 LEA 0x1(%R10,%RDX,1),%R10 |
(242) 0x233b18 VFMADD213SD (%RDI,%R12,8),%XMM1,%XMM2 |
(242) 0x233b1e MOV -0x2c(%RBP),%R12D |
(242) 0x233b22 VSUBSD (%RDI,%R10,8),%XMM2,%XMM2 |
(242) 0x233b28 LEA (%RSI,%RDX,1),%R10 |
(242) 0x233b2c LEA 0x1(%RSI,%RDX,1),%RDX |
(242) 0x233b31 MOV -0x40(%RBP),%RSI |
(242) 0x233b35 VMOVHPD (%R11,%R10,8),%XMM3,%XMM3 |
(242) 0x233b3b MOV -0x38(%RBP),%R10 |
(242) 0x233b3f VMOVSD (%RBX,%RSI,8),%XMM4 |
(242) 0x233b44 VMOVHPD (%R11,%RDX,8),%XMM4,%XMM4 |
(242) 0x233b4a VADDPD %XMM3,%XMM0,%XMM0 |
(242) 0x233b4e VSUBPD %XMM4,%XMM0,%XMM0 |
(242) 0x233b52 VPERMILPD $0x1,%XMM0,%XMM1 |
(242) 0x233b58 VDIVSD %XMM1,%XMM0,%XMM1 |
(242) 0x233b5c VDIVSD %XMM0,%XMM2,%XMM0 |
(242) 0x233b60 VMOVSD %XMM1,(%R8,%RAX,8) |
(242) 0x233b66 VMOVSD %XMM0,(%R9,%R13,8) |
(242) 0x233b6c MOV -0x70(%RBP),%R13 |
(242) 0x233b70 CMP %R15,%RCX |
(242) 0x233b73 LEA 0x1(%RCX),%RCX |
(242) 0x233b77 JL 233a90 |
0x233b7d MOV -0x4c(%RBP),%ESI |
0x233b80 LEA 0x2cf31(%RIP),%RDI |
0x233b87 VZEROUPPER |
0x233b8a CALL 25f750 <@plt_start@+0x540> |
0x233b8f ADD $0x98,%RSP |
0x233b96 POP %RBX |
0x233b97 POP %R12 |
0x233b99 POP %R13 |
0x233b9b POP %R14 |
0x233b9d POP %R15 |
0x233b9f POP %RBP |
0x233ba0 RET |
0x233ba1 NOPW %CS:(%RAX,%RAX,1) |
0x23904d NOPL (%RAX) |
0x23dd7d NOPL (%RAX) |
0x243f1b NOPL (%RAX,%RAX,1) |
0x246c99 NOPL (%RAX) |
0x246d31 NOPW %CS:(%RAX,%RAX,1) |
0x246e59 NOPL (%RAX) |
0x24ca79 NOPL (%RAX) |
0x25215d NOPL (%RAX) |
0x2521eb NOPL (%RAX,%RAX,1) |
0x252201 NOPW %CS:(%RAX,%RAX,1) |
0x252376 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 137 |
nb uops | 135 |
loop length | 619 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 22.50 cycles |
front end | 22.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.33 | 19.33 | 19.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.33 | 19.33 | 19.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.50 |
Dispatch | 19.33 |
Overall L1 | 22.50 |
all | 6% |
load | 5% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 233b8f <.omp_outlined..6+0x6df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 233b8f <.omp_outlined..6+0x6df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xb8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2d570(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 233b7d <.omp_outlined..6+0x6cd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14D,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R9),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RDX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 233a90 <.omp_outlined..6+0x5e0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTD %R14D,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x21c0f(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x21e58(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %R12,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %R12,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x90(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 233b7d <.omp_outlined..6+0x6cd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x2cf31(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 137 |
nb uops | 135 |
loop length | 619 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 23 |
micro-operation queue | 22.50 cycles |
front end | 22.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.33 | 19.33 | 19.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.33 | 19.33 | 19.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.50 |
Dispatch | 19.33 |
Overall L1 | 22.50 |
all | 6% |
load | 5% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 233b8f <.omp_outlined..6+0x6df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 233b8f <.omp_outlined..6+0x6df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xb8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2d570(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 233b7d <.omp_outlined..6+0x6cd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14D,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R9),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RDX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 233a90 <.omp_outlined..6+0x5e0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTD %R14D,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x21c0f(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x21e58(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %R12,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %R12,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x90(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 233b7d <.omp_outlined..6+0x6cd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x2cf31(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..6– | 3.38 | 1.73 |
○Loop 243 - advec_cell.cpp:118-125 - exec | 3.38 | 1.72 |
○Loop 242 - advec_cell.cpp:118-125 - exec | 0 | 0 |