Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 0.73% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:208-216 [...] | Coverage: 0.73% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 208 - 216 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
0x41c760 PUSH %RBP |
0x41c761 MOV %RSP,%RBP |
0x41c764 PUSH %R15 |
0x41c766 PUSH %R14 |
0x41c768 PUSH %R13 |
0x41c76a PUSH %R12 |
0x41c76c PUSH %RBX |
0x41c76d AND $-0x20,%RSP |
0x41c771 SUB $0x120,%RSP |
0x41c778 MOV %RDX,%R13 |
0x41c77b MOV 0x40(%RBP),%RAX |
0x41c77f MOV 0x30(%RBP),%R10 |
0x41c783 MOV 0x28(%RBP),%RSI |
0x41c787 MOV 0x18(%RBP),%RBX |
0x41c78b MOV 0x10(%RBP),%R14 |
0x41c78f MOV 0x20(%RBP),%EDX |
0x41c792 MOV %EDX,0x14(%RSP) |
0x41c796 MOVL $0,0x3c(%RSP) |
0x41c79e TEST %RAX,%RAX |
0x41c7a1 JS 41cd15 |
0x41c7a7 MOV %R8,%R12 |
0x41c7aa MOV %RCX,0x20(%RSP) |
0x41c7af MOV %R9,0x28(%RSP) |
0x41c7b4 MOV %RSI,0x18(%RSP) |
0x41c7b9 MOV (%RDI),%ESI |
0x41c7bb MOVQ $0,0x60(%RSP) |
0x41c7c4 MOV %RAX,0x58(%RSP) |
0x41c7c9 MOVQ $0x1,0x98(%RSP) |
0x41c7d5 SUB $0x8,%RSP |
0x41c7d9 LEA 0xa0(%RSP),%RAX |
0x41c7e1 LEA 0x44(%RSP),%RCX |
0x41c7e6 LEA 0x68(%RSP),%R8 |
0x41c7eb LEA 0x60(%RSP),%R9 |
0x41c7f0 MOV $0x480400,%EDI |
0x41c7f5 MOV %ESI,0x40(%RSP) |
0x41c7f9 MOV $0x22,%EDX |
0x41c7fe PUSH $0x1 |
0x41c800 PUSH $0x1 |
0x41c802 PUSH %RAX |
0x41c803 MOV %R10,%R15 |
0x41c806 CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x41c80b ADD $0x20,%RSP |
0x41c80f MOV 0x60(%RSP),%RSI |
0x41c814 MOV 0x58(%RSP),%RAX |
0x41c819 MOV %RAX,0x50(%RSP) |
0x41c81e CMP %RAX,%RSI |
0x41c821 JA 41ccf6 |
0x41c827 MOV %R15,%R11 |
0x41c82a SUB 0x18(%RSP),%R11D |
0x41c82f MOV (%R13),%RAX |
0x41c833 MOV %RAX,0x48(%RSP) |
0x41c838 MOV 0x10(%R13),%R13 |
0x41c83c MOV (%R14),%R9 |
0x41c83f MOV 0x10(%R14),%RAX |
0x41c843 MOV %RAX,0x30(%RSP) |
0x41c848 MOV (%R12),%R14 |
0x41c84c MOV 0x10(%R12),%R12 |
0x41c851 MOV 0x20(%RSP),%RAX |
0x41c856 MOV (%RAX),%R10 |
0x41c859 MOV 0x10(%RAX),%RDI |
0x41c85d MOV (%RBX),%R8 |
0x41c860 MOV 0x10(%RBX),%R15 |
0x41c864 LEA 0x1(%RSI),%RAX |
0x41c868 MOV 0x50(%RSP),%RDX |
0x41c86d LEA 0x1(%RDX),%RCX |
0x41c871 CMP %RCX,%RAX |
0x41c874 CMOVG %RAX,%RCX |
0x41c878 MOV 0x28(%RSP),%RAX |
0x41c87d MOV (%RAX),%RDX |
0x41c880 MOV 0x10(%RAX),%RBX |
0x41c884 SUB %RSI,%RCX |
0x41c887 MOV $-0x8,%EAX |
0x41c88c MOV %RCX,0x78(%RSP) |
0x41c891 AND %RCX,%RAX |
0x41c894 MOV %R9,0x40(%RSP) |
0x41c899 MOV %R14,0x28(%RSP) |
0x41c89e MOV %R8,0x20(%RSP) |
0x41c8a3 MOV %RDX,0x88(%RSP) |
0x41c8ab MOV %R10,0x90(%RSP) |
0x41c8b3 JE 41ce0c |
0x41c8b9 MOV %RAX,%RCX |
0x41c8bc MOV %R11,0x70(%RSP) |
0x41c8c1 VPBROADCASTQ %R11,%YMM8 |
0x41c8c7 MOV 0x14(%RSP),%EAX |
0x41c8cb VPBROADCASTD %EAX,%YMM0 |
0x41c8d1 VMOVDQU %YMM0,0xe0(%RSP) |
0x41c8da MOV 0x18(%RSP),%RAX |
0x41c8df VPBROADCASTQ %RAX,%YMM0 |
0x41c8e5 VMOVDQU %YMM0,0xc0(%RSP) |
0x41c8ee MOV 0x48(%RSP),%RAX |
0x41c8f3 VPBROADCASTQ %RAX,%YMM0 |
0x41c8f9 VMOVDQU %YMM0,0xa0(%RSP) |
0x41c902 VPBROADCASTQ %R9,%YMM15 |
0x41c908 VPBROADCASTQ %R14,%YMM16 |
0x41c90e VPBROADCASTQ %R10,%YMM17 |
0x41c914 VPBROADCASTQ %R8,%YMM18 |
0x41c91a MOV %RSI,0x68(%RSP) |
0x41c91f VPBROADCASTQ %RSI,%YMM0 |
0x41c925 VPADDQ 0x47d53(%RIP),%YMM0,%YMM9 |
0x41c92d VPADDQ 0x47bcb(%RIP),%YMM0,%YMM10 |
0x41c935 VPBROADCASTQ %RDX,%YMM19 |
0x41c93b XOR %ESI,%ESI |
0x41c93d MOV %RBX,0x80(%RSP) |
0x41c945 NOPW %CS:(%RAX,%RAX,1) |
(152) 0x41c950 VMOVDQA %YMM10,%YMM0 |
(152) 0x41c954 VMOVDQA %YMM8,%YMM1 |
(152) 0x41c958 MOV %R15,%RBX |
(152) 0x41c95b MOV %RDI,%R15 |
(152) 0x41c95e MOV %R12,%RDI |
(152) 0x41c961 MOV %R13,%R12 |
(152) 0x41c964 MOV %RCX,%R13 |
(152) 0x41c967 MOV $0x452aa0,%R14 |
(152) 0x41c96e CALL %R14 |
(152) 0x41c971 VMOVDQA %YMM0,%YMM11 |
(152) 0x41c975 VMOVDQA %YMM9,%YMM0 |
(152) 0x41c979 VMOVDQA %YMM8,%YMM1 |
(152) 0x41c97d CALL %R14 |
(152) 0x41c980 VPMOVQD %YMM11,%XMM1 |
(152) 0x41c986 VPMOVQD %YMM0,%XMM0 |
(152) 0x41c98c VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(152) 0x41c992 VPADDD 0xe0(%RSP),%YMM0,%YMM21 |
(152) 0x41c99a VMOVDQA %YMM10,%YMM0 |
(152) 0x41c99e VMOVDQA %YMM8,%YMM1 |
(152) 0x41c9a2 MOV $0x452870,%R14 |
(152) 0x41c9a9 CALL %R14 |
(152) 0x41c9ac VMOVDQA %YMM0,%YMM11 |
(152) 0x41c9b0 VMOVDQA %YMM9,%YMM0 |
(152) 0x41c9b4 VMOVDQA %YMM8,%YMM1 |
(152) 0x41c9b8 CALL %R14 |
(152) 0x41c9bb MOV %R13,%RCX |
(152) 0x41c9be MOV %R12,%R13 |
(152) 0x41c9c1 MOV %RDI,%R12 |
(152) 0x41c9c4 MOV %R15,%RDI |
(152) 0x41c9c7 MOV %RBX,%R15 |
(152) 0x41c9ca MOV 0x80(%RSP),%RBX |
(152) 0x41c9d2 VEXTRACTI32X4 $0x1,%YMM21,%XMM1 |
(152) 0x41c9d9 VPMOVSXDQ %XMM1,%YMM2 |
(152) 0x41c9de VMOVDQU 0xa0(%RSP),%YMM1 |
(152) 0x41c9e7 VPMULLQ %YMM2,%YMM1,%YMM5 |
(152) 0x41c9ed VPMOVSXDQ %XMM21,%YMM22 |
(152) 0x41c9f3 VPMULLQ %YMM22,%YMM1,%YMM1 |
(152) 0x41c9f9 VMOVDQU 0xc0(%RSP),%YMM12 |
(152) 0x41ca02 VPADDQ %YMM12,%YMM11,%YMM3 |
(152) 0x41ca07 VPSLLQ $0x20,%YMM3,%YMM3 |
(152) 0x41ca0c VPSRAQ $0x20,%YMM3,%YMM7 |
(152) 0x41ca13 VXORPS %XMM4,%XMM4,%XMM4 |
(152) 0x41ca17 VPMULLQ %YMM22,%YMM15,%YMM4 |
(152) 0x41ca1d VPADDQ %YMM7,%YMM1,%YMM1 |
(152) 0x41ca21 KXNORW %K0,%K0,%K3 |
(152) 0x41ca25 VPXOR %XMM3,%XMM3,%XMM3 |
(152) 0x41ca29 VPMULLQ %YMM22,%YMM16,%YMM6 |
(152) 0x41ca2f VPADDQ %YMM7,%YMM4,%YMM11 |
(152) 0x41ca33 KXNORW %K0,%K0,%K2 |
(152) 0x41ca37 VPXOR %XMM4,%XMM4,%XMM4 |
(152) 0x41ca3b VPADDQ %YMM7,%YMM6,%YMM23 |
(152) 0x41ca41 KXNORW %K0,%K0,%K1 |
(152) 0x41ca45 VPXOR %XMM6,%XMM6,%XMM6 |
(152) 0x41ca49 VGATHERQPD (%R13,%YMM1,8),%YMM3{%K3} |
(152) 0x41ca51 VPCMPEQD %YMM13,%YMM13,%YMM13 |
(152) 0x41ca56 VPSUBD %YMM13,%YMM21,%YMM21 |
(152) 0x41ca5c VPMOVSXDQ %XMM21,%YMM25 |
(152) 0x41ca62 VPMULLQ %YMM25,%YMM16,%YMM26 |
(152) 0x41ca68 MOV 0x30(%RSP),%RDX |
(152) 0x41ca6d VGATHERQPD (%RDX,%YMM11,8),%YMM4{%K2} |
(152) 0x41ca74 VXORPS %XMM11,%XMM11,%XMM11 |
(152) 0x41ca79 VPMULLQ %YMM22,%YMM17,%YMM11 |
(152) 0x41ca7f VPADDQ %YMM7,%YMM11,%YMM11 |
(152) 0x41ca83 KXNORW %K0,%K0,%K2 |
(152) 0x41ca87 VGATHERQPD (%R12,%YMM23,8),%YMM6{%K1} |
(152) 0x41ca8e VXORPD %XMM23,%XMM23,%XMM23 |
(152) 0x41ca94 VPMULLQ %YMM22,%YMM18,%YMM24 |
(152) 0x41ca9a VPADDQ %YMM7,%YMM24,%YMM27 |
(152) 0x41caa0 VGATHERQPD (%RDI,%YMM11,8),%YMM23{%K2} |
(152) 0x41caa7 KXNORW %K0,%K0,%K1 |
(152) 0x41caab VPXORD %XMM24,%XMM24,%XMM24 |
(152) 0x41cab1 VPMULLQ %YMM22,%YMM19,%YMM22 |
(152) 0x41cab7 VGATHERQPD (%R15,%YMM27,8),%YMM24{%K1} |
(152) 0x41cabe VPADDQ %YMM7,%YMM26,%YMM26 |
(152) 0x41cac4 KXNORW %K0,%K0,%K1 |
(152) 0x41cac8 VPADDQ %YMM7,%YMM22,%YMM22 |
(152) 0x41cace KXNORW %K0,%K0,%K2 |
(152) 0x41cad2 VXORPD %XMM27,%XMM27,%XMM27 |
(152) 0x41cad8 VGATHERQPD (%RBX,%YMM22,8),%YMM27{%K2} |
(152) 0x41cadf VXORPD %XMM22,%XMM22,%XMM22 |
(152) 0x41cae5 VPMULLQ %YMM25,%YMM18,%YMM28 |
(152) 0x41caeb VPADDQ %YMM7,%YMM28,%YMM28 |
(152) 0x41caf1 VGATHERQPD (%R12,%YMM26,8),%YMM22{%K1} |
(152) 0x41caf8 KXNORW %K0,%K0,%K1 |
(152) 0x41cafc VXORPD %XMM26,%XMM26,%XMM26 |
(152) 0x41cb02 VPMULLQ %YMM25,%YMM19,%YMM25 |
(152) 0x41cb08 VGATHERQPD (%R15,%YMM28,8),%YMM26{%K1} |
(152) 0x41cb0f VPADDQ %YMM0,%YMM12,%YMM0 |
(152) 0x41cb13 VPSLLQ $0x20,%YMM0,%YMM0 |
(152) 0x41cb18 VPADDQ %YMM7,%YMM25,%YMM7 |
(152) 0x41cb1e KXNORW %K0,%K0,%K1 |
(152) 0x41cb22 VPXORD %XMM25,%XMM25,%XMM25 |
(152) 0x41cb28 VGATHERQPD (%RBX,%YMM7,8),%YMM25{%K1} |
(152) 0x41cb2f VPSRAQ $0x20,%YMM0,%YMM0 |
(152) 0x41cb36 KXNORW %K0,%K0,%K1 |
(152) 0x41cb3a VXORPS %XMM7,%XMM7,%XMM7 |
(152) 0x41cb3e VPMULLQ %YMM2,%YMM15,%YMM7 |
(152) 0x41cb44 VPADDQ %YMM0,%YMM5,%YMM5 |
(152) 0x41cb48 VXORPD %XMM28,%XMM28,%XMM28 |
(152) 0x41cb4e VPADDQ %YMM0,%YMM7,%YMM7 |
(152) 0x41cb52 VPMULLQ %YMM2,%YMM16,%YMM29 |
(152) 0x41cb58 VGATHERQPD (%R13,%YMM5,8),%YMM28{%K1} |
(152) 0x41cb60 KXNORW %K0,%K0,%K1 |
(152) 0x41cb64 VXORPD %XMM30,%XMM30,%XMM30 |
(152) 0x41cb6a VPADDQ %YMM0,%YMM29,%YMM29 |
(152) 0x41cb70 KXNORW %K0,%K0,%K2 |
(152) 0x41cb74 VXORPD %XMM31,%XMM31,%XMM31 |
(152) 0x41cb7a VGATHERQPD (%RDX,%YMM7,8),%YMM30{%K1} |
(152) 0x41cb81 VXORPS %XMM7,%XMM7,%XMM7 |
(152) 0x41cb85 VPMULLQ %YMM2,%YMM17,%YMM7 |
(152) 0x41cb8b VPADDQ %YMM0,%YMM7,%YMM7 |
(152) 0x41cb8f KXNORW %K0,%K0,%K1 |
(152) 0x41cb93 VGATHERQPD (%R12,%YMM29,8),%YMM31{%K2} |
(152) 0x41cb9a VXORPD %XMM29,%XMM29,%XMM29 |
(152) 0x41cba0 VPMULLQ %YMM2,%YMM18,%YMM12 |
(152) 0x41cba6 VPADDQ %YMM0,%YMM12,%YMM12 |
(152) 0x41cbaa VGATHERQPD (%RDI,%YMM7,8),%YMM29{%K1} |
(152) 0x41cbb1 KXNORW %K0,%K0,%K1 |
(152) 0x41cbb5 VXORPD %XMM20,%XMM20,%XMM20 |
(152) 0x41cbbb VPMULLQ %YMM2,%YMM19,%YMM2 |
(152) 0x41cbc1 VGATHERQPD (%R15,%YMM12,8),%YMM20{%K1} |
(152) 0x41cbc8 VEXTRACTI32X4 $0x1,%YMM21,%XMM12 |
(152) 0x41cbcf VPADDQ %YMM0,%YMM2,%YMM2 |
(152) 0x41cbd3 KXNORW %K0,%K0,%K1 |
(152) 0x41cbd7 VPXORD %XMM21,%XMM21,%XMM21 |
(152) 0x41cbdd VGATHERQPD (%RBX,%YMM2,8),%YMM21{%K1} |
(152) 0x41cbe4 VPMOVSXDQ %XMM12,%YMM2 |
(152) 0x41cbe9 VXORPS %XMM12,%XMM12,%XMM12 |
(152) 0x41cbee VPMULLQ %YMM2,%YMM16,%YMM12 |
(152) 0x41cbf4 KXNORW %K0,%K0,%K1 |
(152) 0x41cbf8 VPADDQ %YMM0,%YMM12,%YMM12 |
(152) 0x41cbfc VPXOR %XMM13,%XMM13,%XMM13 |
(152) 0x41cc01 VPMULLQ %YMM2,%YMM18,%YMM14 |
(152) 0x41cc07 VPADDQ %YMM0,%YMM14,%YMM14 |
(152) 0x41cc0b VGATHERQPD (%R12,%YMM12,8),%YMM13{%K1} |
(152) 0x41cc12 KXNORW %K0,%K0,%K1 |
(152) 0x41cc16 VXORPD %XMM12,%XMM12,%XMM12 |
(152) 0x41cc1b VPMULLQ %YMM2,%YMM19,%YMM2 |
(152) 0x41cc21 VGATHERQPD (%R15,%YMM14,8),%YMM12{%K1} |
(152) 0x41cc28 VPADDQ %YMM0,%YMM2,%YMM0 |
(152) 0x41cc2c KXNORW %K0,%K0,%K1 |
(152) 0x41cc30 VPXOR %XMM2,%XMM2,%XMM2 |
(152) 0x41cc34 VGATHERQPD (%RBX,%YMM0,8),%YMM2{%K1} |
(152) 0x41cc3b VMULPD %YMM3,%YMM4,%YMM0 |
(152) 0x41cc3f VADDPD %YMM0,%YMM6,%YMM3 |
(152) 0x41cc43 VSUBPD %YMM22,%YMM3,%YMM3 |
(152) 0x41cc49 VMULPD %YMM28,%YMM30,%YMM6 |
(152) 0x41cc4f VADDPD %YMM6,%YMM31,%YMM14 |
(152) 0x41cc55 VSUBPD %YMM13,%YMM14,%YMM13 |
(152) 0x41cc5a VFMADD231PD %YMM29,%YMM6,%YMM20 |
(152) 0x41cc60 VFMADD231PD %YMM23,%YMM0,%YMM24 |
(152) 0x41cc66 VSUBPD %YMM12,%YMM20,%YMM0 |
(152) 0x41cc6c VSUBPD %YMM26,%YMM24,%YMM6 |
(152) 0x41cc72 VADDPD %YMM30,%YMM21,%YMM12 |
(152) 0x41cc78 VDIVPD %YMM3,%YMM6,%YMM6 |
(152) 0x41cc7c VADDPD %YMM4,%YMM27,%YMM4 |
(152) 0x41cc82 VSUBPD %YMM2,%YMM12,%YMM2 |
(152) 0x41cc86 VSUBPD %YMM25,%YMM4,%YMM4 |
(152) 0x41cc8c VDIVPD %YMM2,%YMM13,%YMM2 |
(152) 0x41cc90 VDIVPD %YMM4,%YMM3,%YMM3 |
(152) 0x41cc94 VDIVPD %YMM13,%YMM0,%YMM0 |
(152) 0x41cc99 KXNORW %K0,%K0,%K1 |
(152) 0x41cc9d VSCATTERQPD %YMM3,(%R13,%YMM1,8){%K1} |
(152) 0x41cca5 KXNORW %K0,%K0,%K1 |
(152) 0x41cca9 VSCATTERQPD %YMM2,(%R13,%YMM5,8){%K1} |
(152) 0x41ccb1 KXNORW %K0,%K0,%K1 |
(152) 0x41ccb5 VSCATTERQPD %YMM6,(%RDI,%YMM11,8){%K1} |
(152) 0x41ccbc KXNORW %K0,%K0,%K1 |
(152) 0x41ccc0 VSCATTERQPD %YMM0,(%RDI,%YMM7,8){%K1} |
(152) 0x41ccc7 VPBROADCASTQ 0x479d0(%RIP),%YMM0 |
(152) 0x41ccd0 VPADDQ %YMM0,%YMM10,%YMM10 |
(152) 0x41ccd4 VPADDQ %YMM0,%YMM9,%YMM9 |
(152) 0x41ccd8 ADD $0x8,%RSI |
(152) 0x41ccdc CMP %RCX,%RSI |
(152) 0x41ccdf JB 41c950 |
0x41cce5 CMP %RCX,0x78(%RSP) |
0x41ccea MOV 0x70(%RSP),%R11 |
0x41ccef MOV 0x68(%RSP),%RSI |
0x41ccf4 JNE 41cd24 |
0x41ccf6 MOV $0x480420,%EDI |
0x41ccfb MOV 0x38(%RSP),%ESI |
0x41ccff LEA -0x28(%RBP),%RSP |
0x41cd03 POP %RBX |
0x41cd04 POP %R12 |
0x41cd06 POP %R13 |
0x41cd08 POP %R14 |
0x41cd0a POP %R15 |
0x41cd0c POP %RBP |
0x41cd0d VZEROUPPER |
0x41cd10 JMP 403050 |
0x41cd15 LEA -0x28(%RBP),%RSP |
0x41cd19 POP %RBX |
0x41cd1a POP %R12 |
0x41cd1c POP %R13 |
0x41cd1e POP %R14 |
0x41cd20 POP %R15 |
0x41cd22 POP %RBP |
0x41cd23 RET |
0x41cd24 ADD %RCX,%RSI |
0x41cd27 JMP 41ce0c |
0x41cd2c NOPL (%RAX) |
(151) 0x41cd30 MOV %RSI,%RAX |
(151) 0x41cd33 CQTO |
(151) 0x41cd35 IDIV %R11 |
(151) 0x41cd38 MOV 0x18(%RSP),%RAX |
(151) 0x41cd3d ADD %EAX,%EDX |
(151) 0x41cd3f MOVSXD %EDX,%RDX |
(151) 0x41cd42 MOVSXD %ECX,%RCX |
(151) 0x41cd45 MOV 0x48(%RSP),%RAX |
(151) 0x41cd4a IMUL %RCX,%RAX |
(151) 0x41cd4e ADD %RDX,%RAX |
(151) 0x41cd51 MOV %R9,%R8 |
(151) 0x41cd54 IMUL %RCX,%R8 |
(151) 0x41cd58 ADD %RDX,%R8 |
(151) 0x41cd5b VMOVSD (%R10,%R8,8),%XMM0 |
(151) 0x41cd61 VMULSD (%R13,%RAX,8),%XMM0,%XMM1 |
(151) 0x41cd68 MOV 0x28(%RSP),%R9 |
(151) 0x41cd6d MOV %R9,%R8 |
(151) 0x41cd70 IMUL %RCX,%R8 |
(151) 0x41cd74 ADD %RDX,%R8 |
(151) 0x41cd77 VADDSD (%R12,%R8,8),%XMM1,%XMM2 |
(151) 0x41cd7d LEA 0x1(%RCX),%R8D |
(151) 0x41cd81 MOVSXD %R8D,%R8 |
(151) 0x41cd84 IMUL %R8,%R9 |
(151) 0x41cd88 ADD %RDX,%R9 |
(151) 0x41cd8b VSUBSD (%R12,%R9,8),%XMM2,%XMM2 |
(151) 0x41cd91 MOV 0x90(%RSP),%R9 |
(151) 0x41cd99 IMUL %RCX,%R9 |
(151) 0x41cd9d ADD %RDX,%R9 |
(151) 0x41cda0 VMOVSD (%RDI,%R9,8),%XMM3 |
(151) 0x41cda6 MOV 0x20(%RSP),%R14 |
(151) 0x41cdab MOV %R14,%R10 |
(151) 0x41cdae IMUL %RCX,%R10 |
(151) 0x41cdb2 ADD %RDX,%R10 |
(151) 0x41cdb5 VFMADD213SD (%R15,%R10,8),%XMM1,%XMM3 |
(151) 0x41cdbb IMUL %R8,%R14 |
(151) 0x41cdbf ADD %RDX,%R14 |
(151) 0x41cdc2 VSUBSD (%R15,%R14,8),%XMM3,%XMM1 |
(151) 0x41cdc8 MOV 0x88(%RSP),%R10 |
(151) 0x41cdd0 IMUL %R10,%RCX |
(151) 0x41cdd4 ADD %RDX,%RCX |
(151) 0x41cdd7 VADDSD (%RBX,%RCX,8),%XMM0,%XMM0 |
(151) 0x41cddc IMUL %R10,%R8 |
(151) 0x41cde0 ADD %RDX,%R8 |
(151) 0x41cde3 VSUBSD (%RBX,%R8,8),%XMM0,%XMM0 |
(151) 0x41cde9 VDIVSD %XMM0,%XMM2,%XMM0 |
(151) 0x41cded VMOVSD %XMM0,(%R13,%RAX,8) |
(151) 0x41cdf4 VDIVSD %XMM2,%XMM1,%XMM0 |
(151) 0x41cdf8 VMOVSD %XMM0,(%RDI,%R9,8) |
(151) 0x41cdfe INC %RSI |
(151) 0x41ce01 CMP 0x50(%RSP),%RSI |
(151) 0x41ce06 JG 41ccf6 |
(151) 0x41ce0c MOV %RSI,%R8 |
(151) 0x41ce0f SHR $0x20,%R8 |
(151) 0x41ce13 JE 41ce30 |
(151) 0x41ce15 MOV %RSI,%RAX |
(151) 0x41ce18 XOR %EDX,%EDX |
(151) 0x41ce1a DIV %R11 |
(151) 0x41ce1d MOV %RAX,%RCX |
(151) 0x41ce20 JMP 41ce39 |
0x41ce22 NOPW %CS:(%RAX,%RAX,1) |
(151) 0x41ce30 MOV %ESI,%EAX |
(151) 0x41ce32 XOR %EDX,%EDX |
(151) 0x41ce34 DIV %R11D |
(151) 0x41ce37 MOV %EAX,%ECX |
(151) 0x41ce39 MOV 0x40(%RSP),%R9 |
(151) 0x41ce3e MOV 0x30(%RSP),%R10 |
(151) 0x41ce43 ADD 0x14(%RSP),%ECX |
(151) 0x41ce47 TEST %R8,%R8 |
(151) 0x41ce4a JNE 41cd30 |
(151) 0x41ce50 MOV %ESI,%EAX |
(151) 0x41ce52 XOR %EDX,%EDX |
(151) 0x41ce54 DIV %R11D |
(151) 0x41ce57 JMP 41cd38 |
0x41ce5c NOPL (%RAX) |
Path / |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 132 |
nb uops | 134 |
loop length | 589 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 30 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
cycles | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.14 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 17.00 |
Overall L1 | 22.33 |
all | 12% |
load | 20% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 19% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41cd15 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5b5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480400,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41ccf6 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x596> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x18(%RSP),%R11D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41ce0c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6ac> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x47d53(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x47bcb(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RDX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41cd24 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5c4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480420,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41ce0c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:208-216 |
Module | exec |
nb instructions | 132 |
nb uops | 134 |
loop length | 589 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 30 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
cycles | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.14 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 17.00 |
Overall L1 | 22.33 |
all | 12% |
load | 20% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 19% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41cd15 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5b5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480400,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41ccf6 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x596> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x18(%RSP),%R11D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41ce0c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6ac> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x47d53(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x47bcb(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RDX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41cd24 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5c4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480420,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41ce0c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted– | 0.73 | 1.03 |
○Loop 152 - advec_cell.cpp:208-216 - exec | 0.73 | 1.02 |
○Loop 151 - advec_cell.cpp:208-216 - exec | 0 | 0 |