Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.9% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.9% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 157 - 202 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
164: upwind = j - 2; |
165: donor = j - 1; |
166: downwind = j; |
167: dif = donor; |
168: } else { |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42b830 PUSH %RBP |
0x42b831 MOV %RSP,%RBP |
0x42b834 PUSH %R15 |
0x42b836 PUSH %R14 |
0x42b838 PUSH %R13 |
0x42b83a PUSH %R12 |
0x42b83c PUSH %RBX |
0x42b83d AND $-0x40,%RSP |
0x42b841 SUB $0x180,%RSP |
0x42b848 MOV 0x50(%RBP),%RAX |
0x42b84c MOV 0x40(%RBP),%R14 |
0x42b850 MOV 0x38(%RBP),%RSI |
0x42b854 MOV 0x28(%RBP),%R13 |
0x42b858 MOV 0x20(%RBP),%RBX |
0x42b85c MOV 0x18(%RBP),%R12 |
0x42b860 MOV 0x10(%RBP),%R15 |
0x42b864 MOV 0x30(%RBP),%R10D |
0x42b868 MOV %R10D,0x14(%RSP) |
0x42b86d MOVL $0,0x44(%RSP) |
0x42b875 TEST %RAX,%RAX |
0x42b878 JS 42be00 |
0x42b87e MOV %RSI,0x20(%RSP) |
0x42b883 MOV %RDX,0x38(%RSP) |
0x42b888 MOV %RCX,0x18(%RSP) |
0x42b88d MOV %R8,0x30(%RSP) |
0x42b892 MOV %R9,0x28(%RSP) |
0x42b897 MOV (%RDI),%ESI |
0x42b899 MOVQ $0,0x68(%RSP) |
0x42b8a2 MOV %RAX,0x60(%RSP) |
0x42b8a7 MOVQ $0x1,0x98(%RSP) |
0x42b8b3 SUB $0x8,%RSP |
0x42b8b7 LEA 0xa0(%RSP),%RAX |
0x42b8bf LEA 0x4c(%RSP),%RCX |
0x42b8c4 LEA 0x70(%RSP),%R8 |
0x42b8c9 LEA 0x68(%RSP),%R9 |
0x42b8ce MOV $0x696460,%EDI |
0x42b8d3 MOV %ESI,0x48(%RSP) |
0x42b8d7 MOV $0x22,%EDX |
0x42b8dc PUSH $0x1 |
0x42b8de PUSH $0x1 |
0x42b8e0 PUSH %RAX |
0x42b8e1 CALL 403090 <__kmpc_for_static_init_8@plt> |
0x42b8e6 ADD $0x20,%RSP |
0x42b8ea MOV 0x68(%RSP),%RSI |
0x42b8ef MOV 0x60(%RSP),%RAX |
0x42b8f4 MOV %RAX,0x50(%RSP) |
0x42b8f9 CMP %RAX,%RSI |
0x42b8fc JA 42be4c |
0x42b902 MOV %R14,%RDX |
0x42b905 SUB 0x20(%RSP),%EDX |
0x42b909 MOV (%R12),%R10 |
0x42b90d MOV 0x10(%R12),%RAX |
0x42b912 MOV %RAX,0x58(%RSP) |
0x42b917 MOV (%RBX),%RAX |
0x42b91a MOV %RAX,0x48(%RSP) |
0x42b91f MOV 0x10(%RBX),%RAX |
0x42b923 MOV %RAX,0x8(%RSP) |
0x42b928 MOV 0x18(%RSP),%RAX |
0x42b92d MOV 0x8(%RAX),%R12 |
0x42b931 MOV 0x30(%RSP),%RAX |
0x42b936 MOV (%RAX),%RBX |
0x42b939 MOV 0x10(%RAX),%RDI |
0x42b93d MOV (%R15),%R8 |
0x42b940 MOV 0x10(%R15),%RAX |
0x42b944 MOV %RAX,0x30(%RSP) |
0x42b949 MOV 0x38(%RSP),%RCX |
0x42b94e ADD $0x2,%ECX |
0x42b951 LEA 0x1(%RSI),%RAX |
0x42b955 MOV 0x50(%RSP),%R9 |
0x42b95a LEA 0x1(%R9),%R14 |
0x42b95e CMP %R14,%RAX |
0x42b961 CMOVG %RAX,%R14 |
0x42b965 MOV 0x28(%RSP),%RAX |
0x42b96a MOV (%RAX),%R9 |
0x42b96d MOV 0x10(%RAX),%R15 |
0x42b971 MOV (%R13),%R11 |
0x42b975 MOV 0x10(%R13),%RAX |
0x42b979 MOV %RAX,0x28(%RSP) |
0x42b97e SUB %RSI,%R14 |
0x42b981 MOV $-0x8,%EAX |
0x42b986 MOV %R14,0x70(%RSP) |
0x42b98b AND %R14,%RAX |
0x42b98e MOV %RCX,0x38(%RSP) |
0x42b993 MOV %RDX,0x18(%RSP) |
0x42b998 MOV %R9,0x80(%RSP) |
0x42b9a0 MOV %R10,0x90(%RSP) |
0x42b9a8 MOV %R8,0x88(%RSP) |
0x42b9b0 MOV %R11,0x78(%RSP) |
0x42b9b5 JE 42be80 |
0x42b9bb MOV %RAX,%R14 |
0x42b9be VPBROADCASTQ %RDX,%ZMM16 |
0x42b9c4 MOV 0x14(%RSP),%EAX |
0x42b9c8 VPBROADCASTD %EAX,%YMM0 |
0x42b9ce VMOVDQU %YMM0,0xa0(%RSP) |
0x42b9d7 MOV 0x20(%RSP),%RAX |
0x42b9dc VPBROADCASTQ %RAX,%ZMM19 |
0x42b9e2 VPBROADCASTQ %R10,%ZMM20 |
0x42b9e8 VPBROADCASTD %ECX,%YMM21 |
0x42b9ee MOV 0x48(%RSP),%RAX |
0x42b9f3 VPBROADCASTQ %RAX,%ZMM22 |
0x42b9f9 VPBROADCASTQ %RBX,%ZMM23 |
0x42b9ff VPBROADCASTQ %R8,%ZMM24 |
0x42ba05 VPBROADCASTQ %R9,%ZMM25 |
0x42ba0b VPBROADCASTQ %R11,%ZMM26 |
0x42ba11 VPBROADCASTQ %RSI,%ZMM0 |
0x42ba17 VPADDQ 0x4cf5f(%RIP),%ZMM0,%ZMM17 |
0x42ba21 XOR %R13D,%R13D |
0x42ba24 VBROADCASTSD 0x4c03a(%RIP),%ZMM28 |
0x42ba2e MOV 0x8(%RSP),%RAX |
0x42ba33 VPBROADCASTQ %RAX,%ZMM0 |
0x42ba39 VMOVDQU64 %ZMM0,0x100(%RSP) |
0x42ba41 VBROADCASTSD 0x4c005(%RIP),%ZMM31 |
0x42ba4b VPBROADCASTQ %RDI,%ZMM0 |
0x42ba51 VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x42ba59 VBROADCASTSD 0x4dd5d(%RIP),%ZMM29 |
0x42ba63 VBROADCASTSD 0x4dd5b(%RIP),%ZMM18 |
0x42ba6d VXORPD %XMM27,%XMM27,%XMM27 |
0x42ba73 JMP 42babf |
0x42ba75 NOPW %CS:(%RAX,%RAX,1) |
(109) 0x42ba80 VADDPD %ZMM10,%ZMM6,%ZMM6{%K1} |
(109) 0x42ba86 VMULPD %ZMM4,%ZMM6,%ZMM2 |
(109) 0x42ba8c VPMULLQ %ZMM1,%ZMM26,%ZMM1 |
(109) 0x42ba92 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(109) 0x42ba98 KXNORW %K0,%K0,%K1 |
(109) 0x42ba9c MOV 0x28(%RSP),%RAX |
(109) 0x42baa1 VSCATTERQPD %ZMM2,(%RAX,%ZMM0,8){%K1} |
(109) 0x42baa8 VPADDQ 0x4ccc6(%RIP){1to8},%ZMM17,%ZMM17 |
(109) 0x42bab2 ADD $0x8,%R13 |
(109) 0x42bab6 CMP %R14,%R13 |
(109) 0x42bab9 JAE 42be40 |
(109) 0x42babf VMOVDQA64 %ZMM17,%ZMM0 |
(109) 0x42bac5 VMOVDQA64 %ZMM16,%ZMM1 |
(109) 0x42bacb MOV $0x466620,%RAX |
(109) 0x42bad2 CALL %RAX |
(109) 0x42bad4 VPMOVQD %ZMM0,%YMM0 |
(109) 0x42bada VPADDD 0xa0(%RSP),%YMM0,%YMM30 |
(109) 0x42bae2 VMOVDQA64 %ZMM17,%ZMM0 |
(109) 0x42bae8 VMOVDQA64 %ZMM16,%ZMM1 |
(109) 0x42baee CALL 4664a0 <__svml_i64rem8_z0> |
(109) 0x42baf4 VPADDQ %ZMM19,%ZMM0,%ZMM0 |
(109) 0x42bafa VPSLLQ $0x20,%ZMM0,%ZMM0 |
(109) 0x42bb01 VPSRAQ $0x20,%ZMM0,%ZMM0 |
(109) 0x42bb08 VPMOVSXDQ %YMM30,%ZMM1 |
(109) 0x42bb0e VPXOR %XMM2,%XMM2,%XMM2 |
(109) 0x42bb12 VPMULLQ %ZMM1,%ZMM20,%ZMM2 |
(109) 0x42bb18 VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(109) 0x42bb1e KXNORW %K0,%K0,%K1 |
(109) 0x42bb22 VXORPD %XMM4,%XMM4,%XMM4 |
(109) 0x42bb26 MOV 0x58(%RSP),%RAX |
(109) 0x42bb2b VGATHERQPD (%RAX,%ZMM2,8),%ZMM4{%K1} |
(109) 0x42bb32 VCMPPD $0x1,%ZMM4,%ZMM27,%K1 |
(109) 0x42bb39 VPCMPEQD %YMM7,%YMM7,%YMM7 |
(109) 0x42bb3d VPADDD %YMM7,%YMM30,%YMM2 |
(109) 0x42bb43 VPMOVSXDQ %YMM2,%ZMM5 |
(109) 0x42bb49 VPBLENDMQ %ZMM5,%ZMM1,%ZMM6{%K1} |
(109) 0x42bb4f VPXOR %XMM2,%XMM2,%XMM2 |
(109) 0x42bb53 VPMULLQ %ZMM6,%ZMM22,%ZMM2 |
(109) 0x42bb59 VPADDQ %ZMM0,%ZMM2,%ZMM3 |
(109) 0x42bb5f KXNORW %K0,%K0,%K2 |
(109) 0x42bb63 VPXOR %XMM2,%XMM2,%XMM2 |
(109) 0x42bb67 MOV 0x8(%RSP),%RAX |
(109) 0x42bb6c VGATHERQPD (%RAX,%ZMM3,8),%ZMM2{%K2} |
(109) 0x42bb73 VPSUBD %YMM7,%YMM30,%YMM7 |
(109) 0x42bb79 VPMINSD %YMM7,%YMM21,%YMM7 |
(109) 0x42bb7f VPMOVSXDQ %YMM7,%ZMM7 |
(109) 0x42bb85 VMOVDQA64 %ZMM7,%ZMM8 |
(109) 0x42bb8b KXNORW %K0,%K0,%K2 |
(109) 0x42bb8f VXORPD %XMM9,%XMM9,%XMM9 |
(109) 0x42bb94 VGATHERDPD (%R12,%YMM30,8),%ZMM9{%K2} |
(109) 0x42bb9b VMOVDQA64 %ZMM5,%ZMM7{%K1} |
(109) 0x42bba1 VANDPD %ZMM28,%ZMM4,%ZMM10 |
(109) 0x42bba7 VDIVPD %ZMM2,%ZMM10,%ZMM10 |
(109) 0x42bbad KXNORW %K0,%K0,%K2 |
(109) 0x42bbb1 VXORPD %XMM2,%XMM2,%XMM2 |
(109) 0x42bbb5 VGATHERQPD (%R12,%ZMM7,8),%ZMM2{%K2} |
(109) 0x42bbbc VFMADD213PD %ZMM9,%ZMM10,%ZMM9 |
(109) 0x42bbc2 VDIVPD %ZMM2,%ZMM9,%ZMM2 |
(109) 0x42bbc8 VPADDD 0x4dc06(%RIP){1to8},%YMM30,%YMM9 |
(109) 0x42bbd2 VPXOR %XMM7,%XMM7,%XMM7 |
(109) 0x42bbd6 VPMULLQ %ZMM6,%ZMM23,%ZMM7 |
(109) 0x42bbdc VPADDQ %ZMM0,%ZMM7,%ZMM7 |
(109) 0x42bbe2 KXNORW %K0,%K0,%K2 |
(109) 0x42bbe6 VXORPD %XMM11,%XMM11,%XMM11 |
(109) 0x42bbeb VGATHERQPD (%RDI,%ZMM7,8),%ZMM11{%K2} |
(109) 0x42bbf2 VPMOVSXDQ %YMM9,%ZMM8{%K1} |
(109) 0x42bbf8 VPXOR %XMM9,%XMM9,%XMM9 |
(109) 0x42bbfd VPMULLQ %ZMM8,%ZMM23,%ZMM9 |
(109) 0x42bc03 VPADDQ %ZMM0,%ZMM9,%ZMM9 |
(109) 0x42bc09 KXNORW %K0,%K0,%K2 |
(109) 0x42bc0d VXORPD %XMM12,%XMM12,%XMM12 |
(109) 0x42bc12 VGATHERQPD (%RDI,%ZMM9,8),%ZMM12{%K2} |
(109) 0x42bc19 VPBLENDMQ %ZMM1,%ZMM5,%ZMM9{%K1} |
(109) 0x42bc1f VPXOR %XMM5,%XMM5,%XMM5 |
(109) 0x42bc23 VPMULLQ %ZMM9,%ZMM23,%ZMM5 |
(109) 0x42bc29 VPADDQ %ZMM0,%ZMM5,%ZMM5 |
(109) 0x42bc2f KXNORW %K0,%K0,%K1 |
(109) 0x42bc33 VXORPD %XMM13,%XMM13,%XMM13 |
(109) 0x42bc38 VGATHERQPD (%RDI,%ZMM5,8),%ZMM13{%K1} |
(109) 0x42bc3f VBROADCASTSD 0x4db6f(%RIP),%ZMM5 |
(109) 0x42bc49 VSUBPD %ZMM10,%ZMM5,%ZMM5 |
(109) 0x42bc4f VSUBPD %ZMM12,%ZMM11,%ZMM12 |
(109) 0x42bc55 VSUBPD %ZMM11,%ZMM13,%ZMM13 |
(109) 0x42bc5b VMULPD %ZMM12,%ZMM13,%ZMM14 |
(109) 0x42bc61 VCMPPD $0x1,%ZMM14,%ZMM27,%K1 |
(109) 0x42bc68 VCMPPD $0x1,%ZMM13,%ZMM27,%K2 |
(109) 0x42bc6f VSUBPD %ZMM10,%ZMM31,%ZMM10 |
(109) 0x42bc75 VXORPD %ZMM29,%ZMM10,%ZMM14 |
(109) 0x42bc7b VMOVAPD %ZMM10,%ZMM14{%K2} |
(109) 0x42bc81 VANDPD %ZMM28,%ZMM12,%ZMM10 |
(109) 0x42bc87 VANDPD %ZMM28,%ZMM13,%ZMM12 |
(109) 0x42bc8d VMINPD %ZMM12,%ZMM10,%ZMM13 |
(109) 0x42bc93 VMULPD %ZMM2,%ZMM10,%ZMM10 |
(109) 0x42bc99 VFMADD231PD %ZMM12,%ZMM5,%ZMM10 |
(109) 0x42bc9f VMULPD %ZMM18,%ZMM10,%ZMM10 |
(109) 0x42bca5 VMINPD %ZMM10,%ZMM13,%ZMM10 |
(109) 0x42bcab VFMADD231PD %ZMM14,%ZMM10,%ZMM11{%K1} |
(109) 0x42bcb1 VMULPD %ZMM4,%ZMM11,%ZMM4 |
(109) 0x42bcb7 VPXOR %XMM10,%XMM10,%XMM10 |
(109) 0x42bcbc VPMULLQ %ZMM1,%ZMM24,%ZMM10 |
(109) 0x42bcc2 VPADDQ %ZMM0,%ZMM10,%ZMM10 |
(109) 0x42bcc8 KXNORW %K0,%K0,%K1 |
(109) 0x42bccc MOV 0x30(%RSP),%RAX |
(109) 0x42bcd1 VSCATTERQPD %ZMM4,(%RAX,%ZMM10,8){%K1} |
(109) 0x42bcd8 VPMULLQ %ZMM6,%ZMM25,%ZMM6 |
(109) 0x42bcde VPADDQ %ZMM0,%ZMM6,%ZMM10 |
(109) 0x42bce4 KXNORW %K0,%K0,%K1 |
(109) 0x42bce8 VPXOR %XMM6,%XMM6,%XMM6 |
(109) 0x42bcec VGATHERQPD (%R15,%ZMM10,8),%ZMM6{%K1} |
(109) 0x42bcf3 VPMULLQ %ZMM8,%ZMM25,%ZMM8 |
(109) 0x42bcf9 VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(109) 0x42bcff KXNORW %K0,%K0,%K1 |
(109) 0x42bd03 VXORPD %XMM10,%XMM10,%XMM10 |
(109) 0x42bd08 VGATHERQPD (%R15,%ZMM8,8),%ZMM10{%K1} |
(109) 0x42bd0f VPXOR %XMM8,%XMM8,%XMM8 |
(109) 0x42bd14 VPMULLQ %ZMM9,%ZMM25,%ZMM8 |
(109) 0x42bd1a VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(109) 0x42bd20 KXNORW %K0,%K0,%K1 |
(109) 0x42bd24 VPXOR %XMM9,%XMM9,%XMM9 |
(109) 0x42bd29 VGATHERQPD (%R15,%ZMM8,8),%ZMM9{%K1} |
(109) 0x42bd30 VSUBPD %ZMM10,%ZMM6,%ZMM8 |
(109) 0x42bd36 VSUBPD %ZMM6,%ZMM9,%ZMM9 |
(109) 0x42bd3c VMULPD %ZMM8,%ZMM9,%ZMM10 |
(109) 0x42bd42 VCMPPD $0x1,%ZMM10,%ZMM27,%K1 |
(109) 0x42bd49 KORTESTB %K1,%K1 |
(109) 0x42bd4d VXORPD %XMM10,%XMM10,%XMM10 |
(109) 0x42bd52 JE 42ba80 |
(109) 0x42bd58 VPSLLQ $0x3,%ZMM3,%ZMM3 |
(109) 0x42bd5f VPADDQ 0x100(%RSP),%ZMM3,%ZMM3 |
(109) 0x42bd67 VPSLLQ $0x3,%ZMM7,%ZMM7 |
(109) 0x42bd6e VPADDQ 0xc0(%RSP),%ZMM7,%ZMM7 |
(109) 0x42bd76 KMOVQ %K1,%K2 |
(109) 0x42bd7b VGATHERQPD (,%ZMM7,1),%ZMM10{%K2} |
(109) 0x42bd86 KMOVQ %K1,%K2 |
(109) 0x42bd8b VXORPD %XMM7,%XMM7,%XMM7 |
(109) 0x42bd8f VGATHERQPD (,%ZMM3,1),%ZMM7{%K2} |
(109) 0x42bd9a VANDPD %ZMM28,%ZMM4,%ZMM3 |
(109) 0x42bda0 VMULPD %ZMM10,%ZMM7,%ZMM7 |
(109) 0x42bda6 VDIVPD %ZMM7,%ZMM3,%ZMM3 |
(109) 0x42bdac VCMPPD $0x1,%ZMM9,%ZMM27,%K2 |
(109) 0x42bdb3 VSUBPD %ZMM3,%ZMM31,%ZMM3 |
(109) 0x42bdb9 VXORPD %ZMM29,%ZMM3,%ZMM7 |
(109) 0x42bdbf VMOVAPD %ZMM3,%ZMM7{%K2} |
(109) 0x42bdc5 VANDPD %ZMM28,%ZMM8,%ZMM3 |
(109) 0x42bdcb VANDPD %ZMM28,%ZMM9,%ZMM8 |
(109) 0x42bdd1 VMINPD %ZMM8,%ZMM3,%ZMM9 |
(109) 0x42bdd7 VMULPD %ZMM2,%ZMM3,%ZMM2 |
(109) 0x42bddd VFMADD213PD %ZMM2,%ZMM8,%ZMM5 |
(109) 0x42bde3 VMULPD %ZMM18,%ZMM5,%ZMM2 |
(109) 0x42bde9 VMINPD %ZMM2,%ZMM9,%ZMM2 |
(109) 0x42bdef VMULPD %ZMM2,%ZMM7,%ZMM10 |
(109) 0x42bdf5 JMP 42ba80 |
0x42bdfa NOPW (%RAX,%RAX,1) |
0x42be00 LEA -0x28(%RBP),%RSP |
0x42be04 POP %RBX |
0x42be05 POP %R12 |
0x42be07 POP %R13 |
0x42be09 POP %R14 |
0x42be0b POP %R15 |
0x42be0d POP %RBP |
0x42be0e RET |
0x42be0f NOPW %CS:(%RAX,%RAX,1) |
0x42be1e NOPW %CS:(%RAX,%RAX,1) |
0x42be2d NOPW %CS:(%RAX,%RAX,1) |
0x42be3c NOPL (%RAX) |
0x42be40 CMP %R14,0x70(%RSP) |
0x42be45 MOV 0x8(%RSP),%R13 |
0x42be4a JNE 42bec0 |
0x42be4c MOV $0x696480,%EDI |
0x42be51 MOV 0x40(%RSP),%ESI |
0x42be55 LEA -0x28(%RBP),%RSP |
0x42be59 POP %RBX |
0x42be5a POP %R12 |
0x42be5c POP %R13 |
0x42be5e POP %R14 |
0x42be60 POP %R15 |
0x42be62 POP %RBP |
0x42be63 VZEROUPPER |
0x42be66 JMP 402ef0 |
0x42be6b NOPW %CS:(%RAX,%RAX,1) |
0x42be7a NOPW (%RAX,%RAX,1) |
0x42be80 MOV 0x8(%RSP),%R13 |
0x42be85 JMP 42bec3 |
0x42be87 NOPW %CS:(%RAX,%RAX,1) |
0x42be96 NOPW %CS:(%RAX,%RAX,1) |
0x42bea5 NOPW %CS:(%RAX,%RAX,1) |
0x42beb4 NOPW %CS:(%RAX,%RAX,1) |
0x42bec0 ADD %R14,%RSI |
0x42bec3 VPXOR %XMM0,%XMM0,%XMM0 |
0x42bec7 VMOVDDUP 0x4bb99(%RIP),%XMM1 |
0x42becf VMOVSD 0x4d8e1(%RIP),%XMM2 |
0x42bed7 VMOVSD 0x4bb71(%RIP),%XMM3 |
0x42bedf VMOVDDUP 0x4d8d9(%RIP),%XMM4 |
0x42bee7 VMOVDDUP 0x4bb79(%RIP),%XMM5 |
0x42beef VMOVSD 0x4d8d1(%RIP),%XMM6 |
0x42bef7 JMP 42bf2d |
0x42bef9 NOPL (%RAX) |
(108) 0x42bf00 VADDSD %XMM11,%XMM10,%XMM8 |
(108) 0x42bf05 VMULSD %XMM7,%XMM8,%XMM7 |
(108) 0x42bf09 IMUL 0x78(%RSP),%RDX |
(108) 0x42bf0f ADD %RAX,%RDX |
(108) 0x42bf12 MOV 0x28(%RSP),%RAX |
(108) 0x42bf17 VMOVSD %XMM7,(%RAX,%RDX,8) |
(108) 0x42bf1c INC %RSI |
(108) 0x42bf1f CMP 0x50(%RSP),%RSI |
(108) 0x42bf24 MOV %R14,%RBX |
(108) 0x42bf27 JG 42be4c |
(108) 0x42bf2d MOV %RSI,%R8 |
(108) 0x42bf30 SHR $0x20,%R8 |
(108) 0x42bf34 JE 42bf80 |
(108) 0x42bf36 MOV %RSI,%RAX |
(108) 0x42bf39 XOR %EDX,%EDX |
(108) 0x42bf3b MOV 0x18(%RSP),%R9 |
(108) 0x42bf40 DIV %R9 |
(108) 0x42bf43 MOV %RAX,%RCX |
(108) 0x42bf46 MOV 0x20(%RSP),%R10 |
(108) 0x42bf4b TEST %R8,%R8 |
(108) 0x42bf4e JE 42bf98 |
(108) 0x42bf50 MOV %RSI,%RAX |
(108) 0x42bf53 CQTO |
(108) 0x42bf55 IDIV %R9 |
(108) 0x42bf58 JMP 42bf9f |
0x42bf5a NOPW %CS:(%RAX,%RAX,1) |
0x42bf69 NOPW %CS:(%RAX,%RAX,1) |
0x42bf78 NOPL (%RAX,%RAX,1) |
(108) 0x42bf80 MOV %ESI,%EAX |
(108) 0x42bf82 XOR %EDX,%EDX |
(108) 0x42bf84 MOV 0x18(%RSP),%R9 |
(108) 0x42bf89 DIV %R9D |
(108) 0x42bf8c MOV %EAX,%ECX |
(108) 0x42bf8e MOV 0x20(%RSP),%R10 |
(108) 0x42bf93 TEST %R8,%R8 |
(108) 0x42bf96 JNE 42bf50 |
(108) 0x42bf98 MOV %ESI,%EAX |
(108) 0x42bf9a XOR %EDX,%EDX |
(108) 0x42bf9c DIV %R9D |
(108) 0x42bf9f ADD 0x14(%RSP),%ECX |
(108) 0x42bfa3 ADD %R10D,%EDX |
(108) 0x42bfa6 MOVSXD %EDX,%RAX |
(108) 0x42bfa9 MOVSXD %ECX,%RDX |
(108) 0x42bfac MOV 0x90(%RSP),%R8 |
(108) 0x42bfb4 IMUL %RDX,%R8 |
(108) 0x42bfb8 ADD %RAX,%R8 |
(108) 0x42bfbb MOV 0x58(%RSP),%R9 |
(108) 0x42bfc0 VMOVSD (%R9,%R8,8),%XMM7 |
(108) 0x42bfc6 LEA -0x1(%RDX),%R8D |
(108) 0x42bfca VUCOMISD %XMM7,%XMM0 |
(108) 0x42bfce JAE 42c000 |
(108) 0x42bfd0 ADD $-0x2,%ECX |
(108) 0x42bfd3 MOVSXD %R8D,%R10 |
(108) 0x42bfd6 MOVSXD %ECX,%RCX |
(108) 0x42bfd9 MOV %RDX,%R8 |
(108) 0x42bfdc MOV %R10,%R11 |
(108) 0x42bfdf JMP 42c01a |
0x42bfe1 NOPW %CS:(%RAX,%RAX,1) |
0x42bff0 NOPW %CS:(%RAX,%RAX,1) |
0x42bfff NOP |
(108) 0x42c000 MOVSXD %R8D,%R8 |
(108) 0x42c003 INC %ECX |
(108) 0x42c005 MOV 0x38(%RSP),%R9 |
(108) 0x42c00a CMP %ECX,%R9D |
(108) 0x42c00d CMOVL %R9D,%ECX |
(108) 0x42c011 MOVSXD %ECX,%RCX |
(108) 0x42c014 MOV %RCX,%R10 |
(108) 0x42c017 MOV %RDX,%R11 |
(108) 0x42c01a VANDPD %XMM1,%XMM7,%XMM8 |
(108) 0x42c01e MOV 0x48(%RSP),%R9 |
(108) 0x42c023 IMUL %R11,%R9 |
(108) 0x42c027 ADD %RAX,%R9 |
(108) 0x42c02a VDIVSD (%R13,%R9,8),%XMM8,%XMM12 |
(108) 0x42c031 VMOVSD (%R12,%RDX,8),%XMM8 |
(108) 0x42c037 VFMADD213SD %XMM8,%XMM12,%XMM8 |
(108) 0x42c03c VDIVSD (%R12,%R10,8),%XMM8,%XMM9 |
(108) 0x42c042 VSUBSD %XMM12,%XMM2,%XMM8 |
(108) 0x42c047 MOV %RBX,%R10 |
(108) 0x42c04a IMUL %R11,%R10 |
(108) 0x42c04e ADD %RAX,%R10 |
(108) 0x42c051 VMOVSD (%RDI,%R10,8),%XMM11 |
(108) 0x42c057 MOV %RBX,%R13 |
(108) 0x42c05a IMUL %RCX,%R13 |
(108) 0x42c05e ADD %RAX,%R13 |
(108) 0x42c061 VSUBSD (%RDI,%R13,8),%XMM11,%XMM13 |
(108) 0x42c067 MOV %RBX,%R14 |
(108) 0x42c06a MOV %RBX,%R13 |
(108) 0x42c06d IMUL %R8,%R13 |
(108) 0x42c071 ADD %RAX,%R13 |
(108) 0x42c074 VMOVSD (%RDI,%R13,8),%XMM10 |
(108) 0x42c07a VSUBSD %XMM11,%XMM10,%XMM14 |
(108) 0x42c07f VMULSD %XMM13,%XMM14,%XMM15 |
(108) 0x42c084 VXORPD %XMM10,%XMM10,%XMM10 |
(108) 0x42c089 VUCOMISD %XMM10,%XMM15 |
(108) 0x42c08e VXORPD %XMM15,%XMM15,%XMM15 |
(108) 0x42c093 JBE 42c0d0 |
(108) 0x42c095 VSUBSD %XMM12,%XMM3,%XMM12 |
(108) 0x42c09a VXORPD %XMM4,%XMM12,%XMM15 |
(108) 0x42c09e VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(108) 0x42c0a5 VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(108) 0x42c0ab VANDPD %XMM5,%XMM13,%XMM12 |
(108) 0x42c0af VANDPD %XMM5,%XMM14,%XMM13 |
(108) 0x42c0b3 VMINSD %XMM13,%XMM12,%XMM14 |
(108) 0x42c0b8 VMULSD %XMM9,%XMM12,%XMM12 |
(108) 0x42c0bd VFMADD231SD %XMM13,%XMM8,%XMM12 |
(108) 0x42c0c2 VMULSD %XMM6,%XMM12,%XMM12 |
(108) 0x42c0c6 VMINSD %XMM12,%XMM14,%XMM12 |
(108) 0x42c0cb VMULSD %XMM15,%XMM12,%XMM15 |
(108) 0x42c0d0 VADDSD %XMM11,%XMM15,%XMM11 |
(108) 0x42c0d5 VMULSD %XMM7,%XMM11,%XMM7 |
(108) 0x42c0d9 MOV 0x88(%RSP),%R13 |
(108) 0x42c0e1 IMUL %RDX,%R13 |
(108) 0x42c0e5 ADD %RAX,%R13 |
(108) 0x42c0e8 MOV 0x30(%RSP),%RBX |
(108) 0x42c0ed VMOVSD %XMM7,(%RBX,%R13,8) |
(108) 0x42c0f3 MOV 0x80(%RSP),%RBX |
(108) 0x42c0fb IMUL %RBX,%R11 |
(108) 0x42c0ff ADD %RAX,%R11 |
(108) 0x42c102 VMOVSD (%R15,%R11,8),%XMM11 |
(108) 0x42c108 IMUL %RBX,%RCX |
(108) 0x42c10c ADD %RAX,%RCX |
(108) 0x42c10f VSUBSD (%R15,%RCX,8),%XMM11,%XMM12 |
(108) 0x42c115 IMUL %RBX,%R8 |
(108) 0x42c119 ADD %RAX,%R8 |
(108) 0x42c11c VMOVSD (%R15,%R8,8),%XMM13 |
(108) 0x42c122 VSUBSD %XMM11,%XMM13,%XMM13 |
(108) 0x42c127 VMULSD %XMM12,%XMM13,%XMM14 |
(108) 0x42c12c VUCOMISD %XMM10,%XMM14 |
(108) 0x42c131 MOV 0x8(%RSP),%R13 |
(108) 0x42c136 JBE 42bf00 |
(108) 0x42c13c VANDPD %XMM5,%XMM7,%XMM10 |
(108) 0x42c140 VMOVSD (%R13,%R9,8),%XMM14 |
(108) 0x42c147 VMULSD (%RDI,%R10,8),%XMM14,%XMM14 |
(108) 0x42c14d VDIVSD %XMM14,%XMM10,%XMM10 |
(108) 0x42c152 VSUBSD %XMM10,%XMM3,%XMM10 |
(108) 0x42c157 VXORPD %XMM4,%XMM10,%XMM14 |
(108) 0x42c15b VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(108) 0x42c162 VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(108) 0x42c168 VANDPD %XMM5,%XMM12,%XMM10 |
(108) 0x42c16c VANDPD %XMM5,%XMM13,%XMM12 |
(108) 0x42c170 VMINSD %XMM12,%XMM10,%XMM13 |
(108) 0x42c175 VMULSD %XMM9,%XMM10,%XMM9 |
(108) 0x42c17a VFMADD213SD %XMM9,%XMM12,%XMM8 |
(108) 0x42c17f VMULSD %XMM6,%XMM8,%XMM8 |
(108) 0x42c183 VMINSD %XMM8,%XMM13,%XMM8 |
(108) 0x42c188 VMULSD %XMM8,%XMM14,%XMM10 |
(108) 0x42c18d JMP 42bf00 |
0x42c192 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 171 |
nb uops | 173 |
loop length | 937 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 28.83 cycles |
front end | 28.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 20.67 | 20.67 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 20.67 |
cycles | 5.50 | 5.60 | 20.67 | 20.67 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 20.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.65 |
Stall cycles | 0.00 |
Front-end | 28.83 |
Dispatch | 20.67 |
Overall L1 | 28.83 |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 5% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 21% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 42be00 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x696460,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403090 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 42be4c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 42be80 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x650> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4cf5f(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x4c03a(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4c005(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4dd5d(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4dd5b(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42babf <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x28f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 42bec0 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x690> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x696480,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402ef0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 42bec3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x693> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4bb99(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4d8e1(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4bb71(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4d8d9(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4bb79(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4d8d1(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 42bf2d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x6fd> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 171 |
nb uops | 173 |
loop length | 937 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 28.83 cycles |
front end | 28.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 20.67 | 20.67 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 20.67 |
cycles | 5.50 | 5.60 | 20.67 | 20.67 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 20.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.65 |
Stall cycles | 0.00 |
Front-end | 28.83 |
Dispatch | 20.67 |
Overall L1 | 28.83 |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 5% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 21% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 42be00 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5d0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x696460,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403090 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 42be4c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 42be80 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x650> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4cf5f(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x4c03a(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4c005(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4dd5d(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4dd5b(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42babf <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x28f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 42bec0 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x690> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x696480,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402ef0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 42bec3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x693> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R14,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4bb99(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4d8e1(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4bb71(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4d8d9(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4bb79(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4d8d1(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 42bf2d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x6fd> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 2.9 | 1.92 |
○Loop 109 - advec_cell.cpp:158-202 - exec | 2.9 | 1.91 |
○Loop 108 - advec_cell.cpp:158-202 - exec | 0 | 0 |