Function: .omp_outlined..14 | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 3.36% |
---|
Function: .omp_outlined..14 | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 3.36% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 208 - 216 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/update_tile_halo_kernel.cpp: 130 - 132 |
-------------------------------------------------------------------------------- |
130: #pragma omp parallel for simd |
131: for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { |
132: for (int j = 0; j < depth; ++j) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x236730 PUSH %RBP |
0x236731 MOV %RSP,%RBP |
0x236734 PUSH %R15 |
0x236736 PUSH %R14 |
0x236738 PUSH %R13 |
0x23673a PUSH %R12 |
0x23673c PUSH %RBX |
0x23673d SUB $0xa8,%RSP |
0x236744 MOV (%RDX),%EAX |
0x236746 MOV (%RCX),%R15D |
0x236749 INC %EAX |
0x23674b ADD $0x2,%R15D |
0x23674f SUB %EAX,%R15D |
0x236752 JLE 236e4e |
0x236758 MOV (%R9),%R12D |
0x23675b MOV (%R8),%R14D |
0x23675e INC %R14D |
0x236761 ADD $0x2,%R12D |
0x236765 CMP %R14D,%R12D |
0x236768 JLE 236e4e |
0x23676e SUB %R14D,%R12D |
0x236771 MOV (%RDI),%ESI |
0x236773 MOVQ $0,-0x80(%RBP) |
0x23677b MOVQ $0x1,-0xc8(%RBP) |
0x236786 MOVL $0,-0x50(%RBP) |
0x23678d IMUL %R12,%R15 |
0x236791 DEC %R15 |
0x236794 MOV %R15,-0x48(%RBP) |
0x236798 SUB $0x8,%RSP |
0x23679c LEA -0x50(%RBP),%RCX |
0x2367a0 MOV %RAX,%RBX |
0x2367a3 LEA -0xc8(%RBP),%RAX |
0x2367aa LEA 0x2a40f(%RIP),%RDI |
0x2367b1 LEA -0x80(%RBP),%R8 |
0x2367b5 LEA -0x48(%RBP),%R9 |
0x2367b9 MOV %ESI,-0x4c(%RBP) |
0x2367bc MOV $0x22,%EDX |
0x2367c1 PUSH $0x1 |
0x2367c3 PUSH $0x1 |
0x2367c5 PUSH %RAX |
0x2367c6 CALL 25f740 <@plt_start@+0x530> |
0x2367cb ADD $0x20,%RSP |
0x2367cf MOV -0x48(%RBP),%RAX |
0x2367d3 MOV -0x80(%RBP),%RCX |
0x2367d7 CMP %R15,%RAX |
0x2367da CMOVL %RAX,%R15 |
0x2367de MOV %R15,-0x48(%RBP) |
0x2367e2 CMP %R15,%RCX |
0x2367e5 JG 236e3c |
0x2367eb MOV 0x18(%RBP),%R9 |
0x2367ef MOV 0x20(%RBP),%R8 |
0x2367f3 MOV 0x30(%RBP),%RDX |
0x2367f7 MOV 0x10(%RBP),%RDI |
0x2367fb MOV 0x38(%RBP),%RAX |
0x2367ff MOV 0x28(%RBP),%RSI |
0x236803 MOV %RBX,-0x40(%RBP) |
0x236807 MOV %R14D,-0x2c(%RBP) |
0x23680b MOV %R12,-0x70(%RBP) |
0x23680f MOV (%R9),%R10 |
0x236812 MOV 0x10(%R9),%R9 |
0x236816 MOV 0x10(%R8),%R11 |
0x23681a MOV (%RDI),%R13 |
0x23681d MOV 0x10(%RDI),%RDI |
0x236821 MOV 0x10(%RDX),%RBX |
0x236825 MOV %R10,-0x68(%RBP) |
0x236829 MOV (%R8),%R10 |
0x23682c MOV %RCX,%R8 |
0x23682f MOV (%RDX),%RCX |
0x236832 MOV %R9,-0x78(%RBP) |
0x236836 MOV (%RSI),%R9 |
0x236839 MOV (%RAX),%RDX |
0x23683c MOV 0x10(%RSI),%RSI |
0x236840 MOV %R13,-0xb0(%RBP) |
0x236847 MOV %R10,-0x60(%RBP) |
0x23684b MOV 0x10(%RAX),%R10 |
0x23684f MOV %RCX,-0x58(%RBP) |
0x236853 MOV %R8,%RCX |
0x236856 MOV %R15,%R8 |
0x236859 SUB %RCX,%R8 |
0x23685c MOV %R9,-0xa8(%RBP) |
0x236863 MOV %RDX,-0xa0(%RBP) |
0x23686a INC %R8 |
0x23686d CMP $0x4,%R8 |
0x236871 JB 236d1d |
0x236877 VPBROADCASTQ %RCX,%YMM0 |
0x23687d VPADDQ -0x24e85(%RIP),%YMM0,%YMM0 |
0x236885 VPBROADCASTQ -0x250ce(%RIP),%YMM12 |
0x23688e VPBROADCASTQ %R12,%YMM1 |
0x236894 MOV -0x40(%RBP),%RAX |
0x236898 VPBROADCASTD %R14D,%XMM3 |
0x23689e MOV -0x68(%RBP),%R12 |
0x2368a2 MOV -0x60(%RBP),%R14 |
0x2368a6 VPBROADCASTQ %R9,%YMM7 |
0x2368ac MOV -0x58(%RBP),%R9 |
0x2368b0 MOV %R8,-0x90(%RBP) |
0x2368b7 AND $-0x4,%R8 |
0x2368bb VPBROADCASTQ %R13,%YMM4 |
0x2368c1 VPBROADCASTQ %RDX,%YMM9 |
0x2368c7 VPCMPEQD %XMM11,%XMM11,%XMM11 |
0x2368cc ADD %R8,%RCX |
0x2368cf VEXTRACTI128 $0x1,%YMM1,%XMM10 |
0x2368d5 MOV %R8,-0x88(%RBP) |
0x2368dc VPBROADCASTD %EAX,%XMM2 |
0x2368e2 VPBROADCASTQ %R12,%YMM5 |
0x2368e8 VPBROADCASTQ %R14,%YMM6 |
0x2368ee VPBROADCASTQ %R9,%YMM8 |
0x2368f4 NOPW %CS:(%RAX,%RAX,1) |
(309) 0x236900 VEXTRACTI128 $0x1,%YMM0,%XMM13 |
(309) 0x236906 VPEXTRQ $0x1,%XMM10,%R14 |
(309) 0x23690c VPEXTRQ $0x1,%XMM13,%RAX |
(309) 0x236912 CQTO |
(309) 0x236914 IDIV %R14 |
(309) 0x236917 VMOVQ %XMM10,%R14 |
(309) 0x23691c VMOVQ %RAX,%XMM14 |
(309) 0x236921 VMOVQ %XMM13,%RAX |
(309) 0x236926 CQTO |
(309) 0x236928 IDIV %R14 |
(309) 0x23692b VPEXTRQ $0x1,%XMM1,%R14 |
(309) 0x236931 VMOVQ %RAX,%XMM13 |
(309) 0x236936 VPEXTRQ $0x1,%XMM0,%RAX |
(309) 0x23693c CQTO |
(309) 0x23693e VPUNPCKLQDQ %XMM14,%XMM13,%XMM13 |
(309) 0x236943 IDIV %R14 |
(309) 0x236946 VMOVQ %XMM1,%R14 |
(309) 0x23694b VMOVQ %RAX,%XMM14 |
(309) 0x236950 VMOVQ %XMM0,%RAX |
(309) 0x236955 CQTO |
(309) 0x236957 IDIV %R14 |
(309) 0x23695a ADD $-0x4,%R8 |
(309) 0x23695e VMOVQ %RAX,%XMM15 |
(309) 0x236963 VPUNPCKLQDQ %XMM14,%XMM15,%XMM14 |
(309) 0x236968 VINSERTI128 $0x1,%XMM13,%YMM14,%YMM13 |
(309) 0x23696e VPMOVQD %YMM13,%XMM14 |
(309) 0x236974 VPMULLQ %YMM1,%YMM13,%YMM13 |
(309) 0x23697a VPSUBQ %YMM13,%YMM0,%YMM13 |
(309) 0x23697f VPADDQ %YMM0,%YMM12,%YMM0 |
(309) 0x236983 VPMOVQD %YMM13,%XMM13 |
(309) 0x236989 VPADDD %XMM14,%XMM2,%XMM17 |
(309) 0x23698f VPMOVSXDQ %XMM17,%YMM15 |
(309) 0x236995 VPSUBD %XMM11,%XMM17,%XMM17 |
(309) 0x23699b VPMULLQ %YMM15,%YMM4,%YMM14 |
(309) 0x2369a1 VPADDD %XMM3,%XMM13,%XMM13 |
(309) 0x2369a5 VPMOVSXDQ %XMM13,%YMM13 |
(309) 0x2369aa VPADDQ %YMM13,%YMM14,%YMM14 |
(309) 0x2369af VMOVQ %XMM14,%RAX |
(309) 0x2369b4 VPEXTRQ $0x1,%XMM14,%RDX |
(309) 0x2369ba VEXTRACTI128 $0x1,%YMM14,%XMM14 |
(309) 0x2369c0 VMOVQ %XMM14,%R9 |
(309) 0x2369c5 VPEXTRQ $0x1,%XMM14,%R14 |
(309) 0x2369cb VMOVSD (%RDI,%RAX,8),%XMM16 |
(309) 0x2369d2 MOV %RAX,-0xb8(%RBP) |
(309) 0x2369d9 VMOVSD (%RDI,%R9,8),%XMM14 |
(309) 0x2369df VMOVHPD (%RDI,%RDX,8),%XMM16,%XMM16 |
(309) 0x2369e6 MOV %R9,-0x38(%RBP) |
(309) 0x2369ea MOV -0x78(%RBP),%R9 |
(309) 0x2369ee MOV %R14,-0xc0(%RBP) |
(309) 0x2369f5 VMOVHPD (%RDI,%R14,8),%XMM14,%XMM14 |
(309) 0x2369fb VINSERTF32X4 $0x1,%XMM14,%YMM16,%YMM14 |
(309) 0x236a02 VPMULLQ %YMM15,%YMM5,%YMM16 |
(309) 0x236a08 VPADDQ %YMM13,%YMM16,%YMM16 |
(309) 0x236a0e VMOVQ %XMM16,%RAX |
(309) 0x236a14 VPEXTRQ $0x1,%XMM16,%R14 |
(309) 0x236a1b VEXTRACTI32X4 $0x1,%YMM16,%XMM16 |
(309) 0x236a22 VMOVQ %XMM16,%R12 |
(309) 0x236a28 VMOVSD (%R9,%RAX,8),%XMM18 |
(309) 0x236a2f VPEXTRQ $0x1,%XMM16,%R13 |
(309) 0x236a36 VMOVSD (%R9,%R12,8),%XMM16 |
(309) 0x236a3d VMOVHPD (%R9,%R14,8),%XMM18,%XMM18 |
(309) 0x236a44 VMOVHPD (%R9,%R13,8),%XMM16,%XMM16 |
(309) 0x236a4b MOV -0xb8(%RBP),%R9 |
(309) 0x236a52 VINSERTF32X4 $0x1,%XMM16,%YMM18,%YMM16 |
(309) 0x236a59 VPMULLQ %YMM15,%YMM6,%YMM18 |
(309) 0x236a5f VMULPD %YMM16,%YMM14,%YMM14 |
(309) 0x236a65 VPADDQ %YMM13,%YMM18,%YMM18 |
(309) 0x236a6b VMOVQ %XMM18,%RAX |
(309) 0x236a71 VPEXTRQ $0x1,%XMM18,%R14 |
(309) 0x236a78 VEXTRACTI32X4 $0x1,%YMM18,%XMM18 |
(309) 0x236a7f VMOVQ %XMM18,%R12 |
(309) 0x236a85 VMOVSD (%R11,%RAX,8),%XMM19 |
(309) 0x236a8c VPEXTRQ $0x1,%XMM18,%R13 |
(309) 0x236a93 VMOVSD (%R11,%R12,8),%XMM18 |
(309) 0x236a9a VMOVHPD (%R11,%R14,8),%XMM19,%XMM19 |
(309) 0x236aa1 VMOVHPD (%R11,%R13,8),%XMM18,%XMM18 |
(309) 0x236aa8 VINSERTF32X4 $0x1,%XMM18,%YMM19,%YMM18 |
(309) 0x236aaf VADDPD %YMM18,%YMM14,%YMM19 |
(309) 0x236ab5 VPMOVSXDQ %XMM17,%YMM18 |
(309) 0x236abb VPMULLQ %YMM18,%YMM6,%YMM17 |
(309) 0x236ac1 VPADDQ %YMM13,%YMM17,%YMM17 |
(309) 0x236ac7 VMOVQ %XMM17,%RAX |
(309) 0x236acd VPEXTRQ $0x1,%XMM17,%R14 |
(309) 0x236ad4 VEXTRACTI32X4 $0x1,%YMM17,%XMM17 |
(309) 0x236adb VMOVQ %XMM17,%R12 |
(309) 0x236ae1 VMOVSD (%R11,%RAX,8),%XMM20 |
(309) 0x236ae8 VPEXTRQ $0x1,%XMM17,%R13 |
(309) 0x236aef VMOVSD (%R11,%R12,8),%XMM17 |
(309) 0x236af6 VMOVHPD (%R11,%R14,8),%XMM20,%XMM20 |
(309) 0x236afd VMOVHPD (%R11,%R13,8),%XMM17,%XMM17 |
(309) 0x236b04 VINSERTF32X4 $0x1,%XMM17,%YMM20,%YMM17 |
(309) 0x236b0b VPMULLQ %YMM15,%YMM8,%YMM20 |
(309) 0x236b11 VSUBPD %YMM17,%YMM19,%YMM17 |
(309) 0x236b17 VPMULLQ %YMM15,%YMM7,%YMM19 |
(309) 0x236b1d VPMULLQ %YMM15,%YMM9,%YMM15 |
(309) 0x236b23 VPADDQ %YMM13,%YMM20,%YMM20 |
(309) 0x236b29 VEXTRACTI32X4 $0x1,%YMM20,%XMM21 |
(309) 0x236b30 VPEXTRQ $0x1,%XMM20,%R14 |
(309) 0x236b37 VMOVQ %XMM21,%R12 |
(309) 0x236b3d VPADDQ %YMM13,%YMM15,%YMM15 |
(309) 0x236b42 VPADDQ %YMM13,%YMM19,%YMM19 |
(309) 0x236b48 VMOVSD (%RBX,%R12,8),%XMM22 |
(309) 0x236b4f VPEXTRQ $0x1,%XMM21,%R12 |
(309) 0x236b56 VPEXTRQ $0x1,%XMM19,%RAX |
(309) 0x236b5d VMOVQ %XMM19,%R13 |
(309) 0x236b63 VEXTRACTI32X4 $0x1,%YMM19,%XMM19 |
(309) 0x236b6a VMOVHPD (%RBX,%R12,8),%XMM22,%XMM21 |
(309) 0x236b71 VPMULLQ %YMM18,%YMM8,%YMM22 |
(309) 0x236b77 VMOVQ %XMM20,%R12 |
(309) 0x236b7d VPMULLQ %YMM18,%YMM9,%YMM18 |
(309) 0x236b83 VMOVSD (%RBX,%R12,8),%XMM20 |
(309) 0x236b8a VMOVHPD (%RBX,%R14,8),%XMM20,%XMM20 |
(309) 0x236b91 VPADDQ %YMM13,%YMM22,%YMM22 |
(309) 0x236b97 VPADDQ %YMM13,%YMM18,%YMM13 |
(309) 0x236b9d VEXTRACTI32X4 $0x1,%YMM22,%XMM23 |
(309) 0x236ba4 VMOVQ %XMM22,%R14 |
(309) 0x236baa VEXTRACTI32X4 $0x1,%YMM13,%XMM18 |
(309) 0x236bb1 VMOVQ %XMM23,%R12 |
(309) 0x236bb7 VMOVSD (%RBX,%R12,8),%XMM24 |
(309) 0x236bbe VPEXTRQ $0x1,%XMM23,%R12 |
(309) 0x236bc5 VMOVHPD (%RBX,%R12,8),%XMM24,%XMM23 |
(309) 0x236bcc VEXTRACTI32X4 $0x1,%YMM15,%XMM24 |
(309) 0x236bd3 VPEXTRQ $0x1,%XMM22,%R12 |
(309) 0x236bda VMOVSD (%RBX,%R14,8),%XMM22 |
(309) 0x236be1 VMOVQ %XMM24,%R14 |
(309) 0x236be7 VMOVHPD (%RBX,%R12,8),%XMM22,%XMM22 |
(309) 0x236bee VPEXTRQ $0x1,%XMM24,%R12 |
(309) 0x236bf5 VMOVSD (%R10,%R14,8),%XMM24 |
(309) 0x236bfc VMOVQ %XMM15,%R14 |
(309) 0x236c01 VMOVHPD (%R10,%R12,8),%XMM24,%XMM24 |
(309) 0x236c08 VPEXTRQ $0x1,%XMM15,%R12 |
(309) 0x236c0e VMOVSD (%R10,%R14,8),%XMM15 |
(309) 0x236c14 VMOVQ %XMM18,%R14 |
(309) 0x236c1a VMOVHPD (%R10,%R12,8),%XMM15,%XMM15 |
(309) 0x236c20 VPEXTRQ $0x1,%XMM18,%R12 |
(309) 0x236c27 VMOVSD (%R10,%R14,8),%XMM18 |
(309) 0x236c2e VMOVQ %XMM13,%R14 |
(309) 0x236c33 VMOVHPD (%R10,%R12,8),%XMM18,%XMM18 |
(309) 0x236c3a VPEXTRQ $0x1,%XMM13,%R12 |
(309) 0x236c40 VMOVSD (%R10,%R14,8),%XMM13 |
(309) 0x236c46 VMOVQ %XMM19,%R14 |
(309) 0x236c4c VMOVHPD (%R10,%R12,8),%XMM13,%XMM13 |
(309) 0x236c52 VPEXTRQ $0x1,%XMM19,%R12 |
(309) 0x236c59 VINSERTF32X4 $0x1,%XMM24,%YMM15,%YMM15 |
(309) 0x236c60 VADDPD %YMM15,%YMM16,%YMM15 |
(309) 0x236c66 VINSERTF32X4 $0x1,%XMM18,%YMM13,%YMM13 |
(309) 0x236c6d VMOVSD (%RSI,%R14,8),%XMM16 |
(309) 0x236c74 VMOVHPD (%RSI,%R12,8),%XMM16,%XMM16 |
(309) 0x236c7b VSUBPD %YMM13,%YMM15,%YMM13 |
(309) 0x236c80 VMOVSD (%RSI,%R13,8),%XMM15 |
(309) 0x236c86 VMOVHPD (%RSI,%RAX,8),%XMM15,%XMM15 |
(309) 0x236c8b VDIVPD %YMM13,%YMM17,%YMM13 |
(309) 0x236c91 VMOVLPD %XMM13,(%RDI,%R9,8) |
(309) 0x236c97 VMOVHPD %XMM13,(%RDI,%RDX,8) |
(309) 0x236c9c MOV -0x38(%RBP),%RDX |
(309) 0x236ca0 VEXTRACTF128 $0x1,%YMM13,%XMM13 |
(309) 0x236ca6 VMOVLPD %XMM13,(%RDI,%RDX,8) |
(309) 0x236cab MOV -0xc0(%RBP),%RDX |
(309) 0x236cb2 VMOVHPD %XMM13,(%RDI,%RDX,8) |
(309) 0x236cb7 VINSERTF32X4 $0x1,%XMM16,%YMM15,%YMM13 |
(309) 0x236cbe VINSERTF32X4 $0x1,%XMM21,%YMM20,%YMM16 |
(309) 0x236cc5 VFMADD231PD %YMM13,%YMM14,%YMM16 |
(309) 0x236ccb VINSERTF32X4 $0x1,%XMM23,%YMM22,%YMM13 |
(309) 0x236cd2 VSUBPD %YMM13,%YMM16,%YMM13 |
(309) 0x236cd8 VDIVPD %YMM17,%YMM13,%YMM13 |
(309) 0x236cde VMOVLPD %XMM13,(%RSI,%R13,8) |
(309) 0x236ce4 VMOVHPD %XMM13,(%RSI,%RAX,8) |
(309) 0x236ce9 VEXTRACTF128 $0x1,%YMM13,%XMM13 |
(309) 0x236cef VMOVLPD %XMM13,(%RSI,%R14,8) |
(309) 0x236cf5 VMOVHPD %XMM13,(%RSI,%R12,8) |
(309) 0x236cfb JNE 236900 |
0x236d01 MOV -0x88(%RBP),%RAX |
0x236d08 MOV -0x2c(%RBP),%R14D |
0x236d0c MOV -0x70(%RBP),%R12 |
0x236d10 CMP %RAX,-0x90(%RBP) |
0x236d17 JE 236e3c |
0x236d1d MOV %R15,-0x98(%RBP) |
0x236d24 NOPW %CS:(%RAX,%RAX,1) |
(308) 0x236d30 MOV %RCX,%RAX |
(308) 0x236d33 CQTO |
(308) 0x236d35 MOV %RCX,-0x38(%RBP) |
(308) 0x236d39 IDIV %R12 |
(308) 0x236d3c MOV -0x40(%RBP),%R8 |
(308) 0x236d40 MOV -0x78(%RBP),%R9 |
(308) 0x236d44 MOV -0x58(%RBP),%RCX |
(308) 0x236d48 MOV -0x60(%RBP),%R15 |
(308) 0x236d4c ADD %R14D,%EDX |
(308) 0x236d4f MOV -0x68(%RBP),%R14 |
(308) 0x236d53 MOVSXD %EDX,%R13 |
(308) 0x236d56 MOV -0xb0(%RBP),%RDX |
(308) 0x236d5d MOV %R15,%R12 |
(308) 0x236d60 ADD %EAX,%R8D |
(308) 0x236d63 MOVSXD %R8D,%R8 |
(308) 0x236d66 IMUL %R8,%R14 |
(308) 0x236d6a IMUL %R8,%RDX |
(308) 0x236d6e IMUL %R8,%R12 |
(308) 0x236d72 ADD %R13,%R14 |
(308) 0x236d75 ADD %R13,%RDX |
(308) 0x236d78 ADD %R13,%R12 |
(308) 0x236d7b VMOVSD (%R9,%R14,8),%XMM0 |
(308) 0x236d81 MOV -0x40(%RBP),%R9 |
(308) 0x236d85 VMOVSD (%R11,%R12,8),%XMM3 |
(308) 0x236d8b MOV -0x70(%RBP),%R12 |
(308) 0x236d8f VMULSD (%RDI,%RDX,8),%XMM0,%XMM1 |
(308) 0x236d94 LEA 0x1(%R9,%RAX,1),%EAX |
(308) 0x236d99 MOV %RCX,%R9 |
(308) 0x236d9c IMUL %R8,%R9 |
(308) 0x236da0 MOVSXD %EAX,%R14 |
(308) 0x236da3 MOV -0xa8(%RBP),%RAX |
(308) 0x236daa ADD %R13,%R9 |
(308) 0x236dad IMUL %R14,%RCX |
(308) 0x236db1 ADD %R13,%RCX |
(308) 0x236db4 VUNPCKLPD %XMM0,%XMM1,%XMM0 |
(308) 0x236db8 IMUL %R8,%RAX |
(308) 0x236dbc ADD %R13,%RAX |
(308) 0x236dbf VMOVSD (%RSI,%RAX,8),%XMM2 |
(308) 0x236dc4 VFMADD213SD (%RBX,%R9,8),%XMM1,%XMM2 |
(308) 0x236dca MOV %R15,%R9 |
(308) 0x236dcd IMUL %R14,%R9 |
(308) 0x236dd1 MOV -0x98(%RBP),%R15 |
(308) 0x236dd8 ADD %R13,%R9 |
(308) 0x236ddb VSUBSD (%RBX,%RCX,8),%XMM2,%XMM2 |
(308) 0x236de0 MOV -0xa0(%RBP),%RCX |
(308) 0x236de7 VMOVSD (%R11,%R9,8),%XMM4 |
(308) 0x236ded IMUL %RCX,%R8 |
(308) 0x236df1 IMUL %RCX,%R14 |
(308) 0x236df5 MOV -0x38(%RBP),%RCX |
(308) 0x236df9 ADD %R13,%R8 |
(308) 0x236dfc ADD %R13,%R14 |
(308) 0x236dff VMOVHPD (%R10,%R8,8),%XMM3,%XMM3 |
(308) 0x236e05 VMOVHPD (%R10,%R14,8),%XMM4,%XMM4 |
(308) 0x236e0b MOV -0x2c(%RBP),%R14D |
(308) 0x236e0f VADDPD %XMM3,%XMM0,%XMM0 |
(308) 0x236e13 VSUBPD %XMM4,%XMM0,%XMM0 |
(308) 0x236e17 VPERMILPD $0x1,%XMM0,%XMM1 |
(308) 0x236e1d VDIVSD %XMM1,%XMM0,%XMM1 |
(308) 0x236e21 VDIVSD %XMM0,%XMM2,%XMM0 |
(308) 0x236e25 VMOVSD %XMM1,(%RDI,%RDX,8) |
(308) 0x236e2a VMOVSD %XMM0,(%RSI,%RAX,8) |
(308) 0x236e2f CMP %R15,%RCX |
(308) 0x236e32 LEA 0x1(%RCX),%RCX |
(308) 0x236e36 JL 236d30 |
0x236e3c MOV -0x4c(%RBP),%ESI |
0x236e3f LEA 0x29d92(%RIP),%RDI |
0x236e46 VZEROUPPER |
0x236e49 CALL 25f750 <@plt_start@+0x540> |
0x236e4e ADD $0xa8,%RSP |
0x236e55 POP %RBX |
0x236e56 POP %R12 |
0x236e58 POP %R13 |
0x236e5a POP %R14 |
0x236e5c POP %R15 |
0x236e5e POP %RBP |
0x236e5f RET |
0x23abef NOP |
0x23e476 INT $0x3 |
0x23e477 INT $0x3 |
0x23e478 INT $0x3 |
0x23e479 INT $0x3 |
0x23e47a INT $0x3 |
0x23e47b INT $0x3 |
0x23e47c INT $0x3 |
0x23e47d INT $0x3 |
0x23e47e INT $0x3 |
0x23e47f INT $0x3 |
0x247cbc INT $0x3 |
0x247cbd INT $0x3 |
0x247cbe INT $0x3 |
0x247cbf INT $0x3 |
0x24cf19 NOPL (%RAX) |
0x252fdf NOP |
0x25306b NOPL (%RAX,%RAX,1) |
0x253081 NOPW %CS:(%RAX,%RAX,1) |
0x2531f6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 146 |
nb uops | 136 |
loop length | 600 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 22.67 cycles |
front end | 22.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.00 | 19.00 | 19.00 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.00 | 19.00 | 19.00 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.67 |
Dispatch | 19.00 |
Overall L1 | 22.67 |
all | 6% |
load | 6% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 13% |
store | 10% |
mul | 12% |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 236e4e <.omp_outlined..14+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 236e4e <.omp_outlined..14+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R12,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0xc8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2a40f(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 236e3c <.omp_outlined..14+0x70c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R9),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RAX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 236d1d <.omp_outlined..14+0x5ed> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x24e85(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x250ce(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R12,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %R14D,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %R9,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R12,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R9,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x90(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 236e3c <.omp_outlined..14+0x70c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x29d92(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0xa8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 146 |
nb uops | 136 |
loop length | 600 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 22.67 cycles |
front end | 22.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.00 | 19.00 | 19.00 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 4.00 | 19.00 | 19.00 | 19.00 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.67 |
Dispatch | 19.00 |
Overall L1 | 22.67 |
all | 6% |
load | 6% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 11% |
load | 13% |
store | 10% |
mul | 12% |
add-sub | 13% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 236e4e <.omp_outlined..14+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x2,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 236e4e <.omp_outlined..14+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R12,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0xc8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2a40f(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 236e3c <.omp_outlined..14+0x70c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R9),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RSI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RAX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 236d1d <.omp_outlined..14+0x5ed> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x24e85(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x250ce(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R12,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %R14D,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %R9,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x58(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R12,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R9,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x90(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 236e3c <.omp_outlined..14+0x70c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x29d92(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0xa8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..14– | 3.36 | 1.72 |
○Loop 309 - advec_cell.cpp:209-216 - exec | 3.36 | 1.71 |
○Loop 308 - advec_cell.cpp:209-216 - exec | 0 | 0 |