Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.31% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.31% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 157 - 202 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
164: upwind = j - 2; |
165: donor = j - 1; |
166: downwind = j; |
167: dif = donor; |
168: } else { |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
0x41d550 PUSH %RBP |
0x41d551 MOV %RSP,%RBP |
0x41d554 PUSH %R15 |
0x41d556 PUSH %R14 |
0x41d558 PUSH %R13 |
0x41d55a PUSH %R12 |
0x41d55c PUSH %RBX |
0x41d55d AND $-0x40,%RSP |
0x41d561 SUB $0x180,%RSP |
0x41d568 MOV 0x50(%RBP),%RAX |
0x41d56c MOV 0x40(%RBP),%R14 |
0x41d570 MOV 0x38(%RBP),%RSI |
0x41d574 MOV 0x28(%RBP),%R13 |
0x41d578 MOV 0x20(%RBP),%RBX |
0x41d57c MOV 0x18(%RBP),%R12 |
0x41d580 MOV 0x10(%RBP),%R15 |
0x41d584 MOV 0x30(%RBP),%R10D |
0x41d588 MOV %R10D,0x14(%RSP) |
0x41d58d MOVL $0,0x44(%RSP) |
0x41d595 TEST %RAX,%RAX |
0x41d598 JS 41db1a |
0x41d59e MOV %RSI,0x20(%RSP) |
0x41d5a3 MOV %RDX,0x38(%RSP) |
0x41d5a8 MOV %RCX,0x18(%RSP) |
0x41d5ad MOV %R8,0x30(%RSP) |
0x41d5b2 MOV %R9,0x28(%RSP) |
0x41d5b7 MOV (%RDI),%ESI |
0x41d5b9 MOVQ $0,0x68(%RSP) |
0x41d5c2 MOV %RAX,0x60(%RSP) |
0x41d5c7 MOVQ $0x1,0x98(%RSP) |
0x41d5d3 SUB $0x8,%RSP |
0x41d5d7 LEA 0xa0(%RSP),%RAX |
0x41d5df LEA 0x4c(%RSP),%RCX |
0x41d5e4 LEA 0x70(%RSP),%R8 |
0x41d5e9 LEA 0x68(%RSP),%R9 |
0x41d5ee MOV $0x680470,%EDI |
0x41d5f3 MOV %ESI,0x48(%RSP) |
0x41d5f7 MOV $0x22,%EDX |
0x41d5fc PUSH $0x1 |
0x41d5fe PUSH $0x1 |
0x41d600 PUSH %RAX |
0x41d601 CALL 403180 <__kmpc_for_static_init_8@plt> |
0x41d606 ADD $0x20,%RSP |
0x41d60a MOV 0x68(%RSP),%RSI |
0x41d60f MOV 0x60(%RSP),%RAX |
0x41d614 MOV %RAX,0x50(%RSP) |
0x41d619 CMP %RAX,%RSI |
0x41d61c JA 41db35 |
0x41d622 MOV %R14,%RDX |
0x41d625 SUB 0x20(%RSP),%EDX |
0x41d629 MOV (%R12),%RAX |
0x41d62d MOV %RAX,0x48(%RSP) |
0x41d632 MOV 0x10(%R12),%RAX |
0x41d637 MOV %RAX,0x58(%RSP) |
0x41d63c MOV (%RBX),%RAX |
0x41d63f MOV (%RAX),%R11 |
0x41d642 MOV 0x10(%RAX),%RAX |
0x41d646 MOV %RAX,0x8(%RSP) |
0x41d64b MOV 0x18(%RSP),%RAX |
0x41d650 MOV 0x8(%RAX),%R12 |
0x41d654 MOV 0x30(%RSP),%RAX |
0x41d659 MOV (%RAX),%RAX |
0x41d65c MOV (%RAX),%RBX |
0x41d65f MOV 0x10(%RAX),%RDI |
0x41d663 MOV (%R15),%R8 |
0x41d666 MOV 0x10(%R15),%RAX |
0x41d66a MOV %RAX,0x30(%RSP) |
0x41d66f MOV 0x38(%RSP),%RCX |
0x41d674 ADD $0x2,%ECX |
0x41d677 LEA 0x1(%RSI),%RAX |
0x41d67b MOV 0x50(%RSP),%R9 |
0x41d680 LEA 0x1(%R9),%R14 |
0x41d684 CMP %R14,%RAX |
0x41d687 CMOVG %RAX,%R14 |
0x41d68b MOV 0x28(%RSP),%RAX |
0x41d690 MOV (%RAX),%R9 |
0x41d693 MOV 0x10(%RAX),%R15 |
0x41d697 MOV (%R13),%R10 |
0x41d69b MOV 0x10(%R13),%RAX |
0x41d69f MOV %RAX,0x28(%RSP) |
0x41d6a4 SUB %RSI,%R14 |
0x41d6a7 MOV $-0x8,%EAX |
0x41d6ac MOV %R14,0x70(%RSP) |
0x41d6b1 AND %R14,%RAX |
0x41d6b4 MOV %RCX,0x38(%RSP) |
0x41d6b9 MOV %RDX,0x18(%RSP) |
0x41d6be MOV %R9,0x80(%RSP) |
0x41d6c6 MOV %R11,0x90(%RSP) |
0x41d6ce MOV %R8,0x88(%RSP) |
0x41d6d6 MOV %R10,0x78(%RSP) |
0x41d6db JE 41db54 |
0x41d6e1 MOV %RAX,%R14 |
0x41d6e4 VPBROADCASTQ %RDX,%ZMM16 |
0x41d6ea MOV 0x14(%RSP),%EAX |
0x41d6ee VPBROADCASTD %EAX,%YMM0 |
0x41d6f4 VMOVDQU %YMM0,0xa0(%RSP) |
0x41d6fd MOV 0x20(%RSP),%RAX |
0x41d702 VPBROADCASTQ %RAX,%ZMM19 |
0x41d708 MOV 0x48(%RSP),%RAX |
0x41d70d VPBROADCASTQ %RAX,%ZMM20 |
0x41d713 VPBROADCASTD %ECX,%YMM21 |
0x41d719 VPBROADCASTQ %R11,%ZMM22 |
0x41d71f VPBROADCASTQ %RBX,%ZMM23 |
0x41d725 VPBROADCASTQ %R8,%ZMM24 |
0x41d72b VPBROADCASTQ %R9,%ZMM25 |
0x41d731 VPBROADCASTQ %R10,%ZMM26 |
0x41d737 VPBROADCASTQ %RSI,%ZMM0 |
0x41d73d VPADDQ 0x46139(%RIP),%ZMM0,%ZMM17 |
0x41d747 XOR %R13D,%R13D |
0x41d74a VBROADCASTSD 0x45214(%RIP),%ZMM28 |
0x41d754 MOV 0x8(%RSP),%RAX |
0x41d759 VPBROADCASTQ %RAX,%ZMM0 |
0x41d75f VMOVDQU64 %ZMM0,0x100(%RSP) |
0x41d767 VBROADCASTSD 0x451df(%RIP),%ZMM31 |
0x41d771 VPBROADCASTQ %RDI,%ZMM0 |
0x41d777 VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x41d77f VBROADCASTSD 0x46f37(%RIP),%ZMM29 |
0x41d789 VBROADCASTSD 0x46f35(%RIP),%ZMM18 |
0x41d793 VXORPD %XMM27,%XMM27,%XMM27 |
0x41d799 JMP 41d7df |
0x41d79b NOPL (%RAX,%RAX,1) |
(99) 0x41d7a0 VADDPD %ZMM10,%ZMM6,%ZMM6{%K1} |
(99) 0x41d7a6 VMULPD %ZMM4,%ZMM6,%ZMM2 |
(99) 0x41d7ac VPMULLQ %ZMM1,%ZMM26,%ZMM1 |
(99) 0x41d7b2 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(99) 0x41d7b8 KXNORW %K0,%K0,%K1 |
(99) 0x41d7bc MOV 0x28(%RSP),%RAX |
(99) 0x41d7c1 VSCATTERQPD %ZMM2,(%RAX,%ZMM0,8){%K1} |
(99) 0x41d7c8 VPADDQ 0x45e86(%RIP){1to8},%ZMM17,%ZMM17 |
(99) 0x41d7d2 ADD $0x8,%R13 |
(99) 0x41d7d6 CMP %R14,%R13 |
(99) 0x41d7d9 JAE 41db29 |
(99) 0x41d7df VMOVDQA64 %ZMM17,%ZMM0 |
(99) 0x41d7e5 VMOVDQA64 %ZMM16,%ZMM1 |
(99) 0x41d7eb MOV $0x451520,%RAX |
(99) 0x41d7f2 CALL %RAX |
(99) 0x41d7f4 VPMOVQD %ZMM0,%YMM0 |
(99) 0x41d7fa VPADDD 0xa0(%RSP),%YMM0,%YMM30 |
(99) 0x41d802 VMOVDQA64 %ZMM17,%ZMM0 |
(99) 0x41d808 VMOVDQA64 %ZMM16,%ZMM1 |
(99) 0x41d80e CALL 4513a0 <__svml_i64rem8_z0> |
(99) 0x41d814 VPADDQ %ZMM19,%ZMM0,%ZMM0 |
(99) 0x41d81a VPSLLQ $0x20,%ZMM0,%ZMM0 |
(99) 0x41d821 VPSRAQ $0x20,%ZMM0,%ZMM0 |
(99) 0x41d828 VPMOVSXDQ %YMM30,%ZMM1 |
(99) 0x41d82e VPXOR %XMM2,%XMM2,%XMM2 |
(99) 0x41d832 VPMULLQ %ZMM1,%ZMM20,%ZMM2 |
(99) 0x41d838 VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(99) 0x41d83e VXORPD %XMM4,%XMM4,%XMM4 |
(99) 0x41d842 KXNORW %K0,%K0,%K1 |
(99) 0x41d846 MOV 0x58(%RSP),%RAX |
(99) 0x41d84b VGATHERQPD (%RAX,%ZMM2,8),%ZMM4{%K1} |
(99) 0x41d852 VCMPPD $0x1,%ZMM4,%ZMM27,%K1 |
(99) 0x41d859 VPCMPEQD %YMM7,%YMM7,%YMM7 |
(99) 0x41d85d VPADDD %YMM7,%YMM30,%YMM2 |
(99) 0x41d863 VPMOVSXDQ %YMM2,%ZMM5 |
(99) 0x41d869 VPBLENDMQ %ZMM5,%ZMM1,%ZMM6{%K1} |
(99) 0x41d86f VPXOR %XMM2,%XMM2,%XMM2 |
(99) 0x41d873 VPMULLQ %ZMM6,%ZMM22,%ZMM2 |
(99) 0x41d879 VPADDQ %ZMM0,%ZMM2,%ZMM3 |
(99) 0x41d87f VPXOR %XMM2,%XMM2,%XMM2 |
(99) 0x41d883 KXNORW %K0,%K0,%K2 |
(99) 0x41d887 MOV 0x8(%RSP),%RAX |
(99) 0x41d88c VGATHERQPD (%RAX,%ZMM3,8),%ZMM2{%K2} |
(99) 0x41d893 VPSUBD %YMM7,%YMM30,%YMM7 |
(99) 0x41d899 VPMINSD %YMM7,%YMM21,%YMM7 |
(99) 0x41d89f VPMOVSXDQ %YMM7,%ZMM7 |
(99) 0x41d8a5 VMOVDQA64 %ZMM7,%ZMM8 |
(99) 0x41d8ab VXORPD %XMM9,%XMM9,%XMM9 |
(99) 0x41d8b0 KXNORW %K0,%K0,%K2 |
(99) 0x41d8b4 VGATHERDPD (%R12,%YMM30,8),%ZMM9{%K2} |
(99) 0x41d8bb VMOVDQA64 %ZMM5,%ZMM7{%K1} |
(99) 0x41d8c1 VANDPD %ZMM28,%ZMM4,%ZMM10 |
(99) 0x41d8c7 VDIVPD %ZMM2,%ZMM10,%ZMM10 |
(99) 0x41d8cd VXORPD %XMM2,%XMM2,%XMM2 |
(99) 0x41d8d1 KXNORW %K0,%K0,%K2 |
(99) 0x41d8d5 VGATHERQPD (%R12,%ZMM7,8),%ZMM2{%K2} |
(99) 0x41d8dc VFMADD213PD %ZMM9,%ZMM10,%ZMM9 |
(99) 0x41d8e2 VDIVPD %ZMM2,%ZMM9,%ZMM2 |
(99) 0x41d8e8 VPADDD 0x46de6(%RIP){1to8},%YMM30,%YMM9 |
(99) 0x41d8f2 VPXOR %XMM7,%XMM7,%XMM7 |
(99) 0x41d8f6 VPMULLQ %ZMM6,%ZMM23,%ZMM7 |
(99) 0x41d8fc VPADDQ %ZMM0,%ZMM7,%ZMM7 |
(99) 0x41d902 VXORPD %XMM11,%XMM11,%XMM11 |
(99) 0x41d907 KXNORW %K0,%K0,%K2 |
(99) 0x41d90b VGATHERQPD (%RDI,%ZMM7,8),%ZMM11{%K2} |
(99) 0x41d912 VPMOVSXDQ %YMM9,%ZMM8{%K1} |
(99) 0x41d918 VPXOR %XMM9,%XMM9,%XMM9 |
(99) 0x41d91d VPMULLQ %ZMM8,%ZMM23,%ZMM9 |
(99) 0x41d923 VPADDQ %ZMM0,%ZMM9,%ZMM9 |
(99) 0x41d929 VXORPD %XMM12,%XMM12,%XMM12 |
(99) 0x41d92e KXNORW %K0,%K0,%K2 |
(99) 0x41d932 VGATHERQPD (%RDI,%ZMM9,8),%ZMM12{%K2} |
(99) 0x41d939 VPBLENDMQ %ZMM1,%ZMM5,%ZMM9{%K1} |
(99) 0x41d93f VPXOR %XMM5,%XMM5,%XMM5 |
(99) 0x41d943 VPMULLQ %ZMM9,%ZMM23,%ZMM5 |
(99) 0x41d949 VPADDQ %ZMM0,%ZMM5,%ZMM5 |
(99) 0x41d94f VXORPD %XMM13,%XMM13,%XMM13 |
(99) 0x41d954 KXNORW %K0,%K0,%K1 |
(99) 0x41d958 VGATHERQPD (%RDI,%ZMM5,8),%ZMM13{%K1} |
(99) 0x41d95f VBROADCASTSD 0x46d4f(%RIP),%ZMM5 |
(99) 0x41d969 VSUBPD %ZMM10,%ZMM5,%ZMM5 |
(99) 0x41d96f VSUBPD %ZMM12,%ZMM11,%ZMM12 |
(99) 0x41d975 VSUBPD %ZMM11,%ZMM13,%ZMM13 |
(99) 0x41d97b VMULPD %ZMM12,%ZMM13,%ZMM14 |
(99) 0x41d981 VCMPPD $0x1,%ZMM14,%ZMM27,%K1 |
(99) 0x41d988 VCMPPD $0x1,%ZMM13,%ZMM27,%K2 |
(99) 0x41d98f VSUBPD %ZMM10,%ZMM31,%ZMM10 |
(99) 0x41d995 VXORPD %ZMM29,%ZMM10,%ZMM14 |
(99) 0x41d99b VMOVAPD %ZMM10,%ZMM14{%K2} |
(99) 0x41d9a1 VANDPD %ZMM28,%ZMM12,%ZMM10 |
(99) 0x41d9a7 VANDPD %ZMM28,%ZMM13,%ZMM12 |
(99) 0x41d9ad VMINPD %ZMM12,%ZMM10,%ZMM13 |
(99) 0x41d9b3 VMULPD %ZMM2,%ZMM10,%ZMM10 |
(99) 0x41d9b9 VFMADD231PD %ZMM12,%ZMM5,%ZMM10 |
(99) 0x41d9bf VMULPD %ZMM18,%ZMM10,%ZMM10 |
(99) 0x41d9c5 VMINPD %ZMM10,%ZMM13,%ZMM10 |
(99) 0x41d9cb VFMADD231PD %ZMM14,%ZMM10,%ZMM11{%K1} |
(99) 0x41d9d1 VMULPD %ZMM4,%ZMM11,%ZMM4 |
(99) 0x41d9d7 VPXOR %XMM10,%XMM10,%XMM10 |
(99) 0x41d9dc VPMULLQ %ZMM1,%ZMM24,%ZMM10 |
(99) 0x41d9e2 VPADDQ %ZMM0,%ZMM10,%ZMM10 |
(99) 0x41d9e8 KXNORW %K0,%K0,%K1 |
(99) 0x41d9ec MOV 0x30(%RSP),%RAX |
(99) 0x41d9f1 VSCATTERQPD %ZMM4,(%RAX,%ZMM10,8){%K1} |
(99) 0x41d9f8 VPMULLQ %ZMM6,%ZMM25,%ZMM6 |
(99) 0x41d9fe VPADDQ %ZMM0,%ZMM6,%ZMM10 |
(99) 0x41da04 VPXOR %XMM6,%XMM6,%XMM6 |
(99) 0x41da08 KXNORW %K0,%K0,%K1 |
(99) 0x41da0c VGATHERQPD (%R15,%ZMM10,8),%ZMM6{%K1} |
(99) 0x41da13 VPMULLQ %ZMM8,%ZMM25,%ZMM8 |
(99) 0x41da19 VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(99) 0x41da1f VXORPD %XMM10,%XMM10,%XMM10 |
(99) 0x41da24 KXNORW %K0,%K0,%K1 |
(99) 0x41da28 VGATHERQPD (%R15,%ZMM8,8),%ZMM10{%K1} |
(99) 0x41da2f VPXOR %XMM8,%XMM8,%XMM8 |
(99) 0x41da34 VPMULLQ %ZMM9,%ZMM25,%ZMM8 |
(99) 0x41da3a VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(99) 0x41da40 VPXOR %XMM9,%XMM9,%XMM9 |
(99) 0x41da45 KXNORW %K0,%K0,%K1 |
(99) 0x41da49 VGATHERQPD (%R15,%ZMM8,8),%ZMM9{%K1} |
(99) 0x41da50 VSUBPD %ZMM10,%ZMM6,%ZMM8 |
(99) 0x41da56 VSUBPD %ZMM6,%ZMM9,%ZMM9 |
(99) 0x41da5c VMULPD %ZMM8,%ZMM9,%ZMM10 |
(99) 0x41da62 VCMPPD $0x1,%ZMM10,%ZMM27,%K1 |
(99) 0x41da69 KORTESTB %K1,%K1 |
(99) 0x41da6d VXORPD %XMM10,%XMM10,%XMM10 |
(99) 0x41da72 JE 41d7a0 |
(99) 0x41da78 VPSLLQ $0x3,%ZMM3,%ZMM3 |
(99) 0x41da7f VPADDQ 0x100(%RSP),%ZMM3,%ZMM3 |
(99) 0x41da87 VPSLLQ $0x3,%ZMM7,%ZMM7 |
(99) 0x41da8e VPADDQ 0xc0(%RSP),%ZMM7,%ZMM7 |
(99) 0x41da96 KMOVQ %K1,%K2 |
(99) 0x41da9b VGATHERQPD (,%ZMM7,1),%ZMM10{%K2} |
(99) 0x41daa6 VXORPD %XMM7,%XMM7,%XMM7 |
(99) 0x41daaa KMOVQ %K1,%K2 |
(99) 0x41daaf VGATHERQPD (,%ZMM3,1),%ZMM7{%K2} |
(99) 0x41daba VANDPD %ZMM28,%ZMM4,%ZMM3 |
(99) 0x41dac0 VMULPD %ZMM10,%ZMM7,%ZMM7 |
(99) 0x41dac6 VDIVPD %ZMM7,%ZMM3,%ZMM3 |
(99) 0x41dacc VCMPPD $0x1,%ZMM9,%ZMM27,%K2 |
(99) 0x41dad3 VSUBPD %ZMM3,%ZMM31,%ZMM3 |
(99) 0x41dad9 VXORPD %ZMM29,%ZMM3,%ZMM7 |
(99) 0x41dadf VMOVAPD %ZMM3,%ZMM7{%K2} |
(99) 0x41dae5 VANDPD %ZMM28,%ZMM8,%ZMM3 |
(99) 0x41daeb VANDPD %ZMM28,%ZMM9,%ZMM8 |
(99) 0x41daf1 VMINPD %ZMM8,%ZMM3,%ZMM9 |
(99) 0x41daf7 VMULPD %ZMM2,%ZMM3,%ZMM2 |
(99) 0x41dafd VFMADD213PD %ZMM2,%ZMM8,%ZMM5 |
(99) 0x41db03 VMULPD %ZMM18,%ZMM5,%ZMM2 |
(99) 0x41db09 VMINPD %ZMM2,%ZMM9,%ZMM2 |
(99) 0x41db0f VMULPD %ZMM2,%ZMM7,%ZMM10 |
(99) 0x41db15 JMP 41d7a0 |
0x41db1a LEA -0x28(%RBP),%RSP |
0x41db1e POP %RBX |
0x41db1f POP %R12 |
0x41db21 POP %R13 |
0x41db23 POP %R14 |
0x41db25 POP %R15 |
0x41db27 POP %RBP |
0x41db28 RET |
0x41db29 CMP %R14,0x70(%RSP) |
0x41db2e MOV 0x8(%RSP),%R13 |
0x41db33 JNE 41db5b |
0x41db35 MOV $0x680490,%EDI |
0x41db3a MOV 0x40(%RSP),%ESI |
0x41db3e LEA -0x28(%RBP),%RSP |
0x41db42 POP %RBX |
0x41db43 POP %R12 |
0x41db45 POP %R13 |
0x41db47 POP %R14 |
0x41db49 POP %R15 |
0x41db4b POP %RBP |
0x41db4c VZEROUPPER |
0x41db4f JMP 402fe0 |
0x41db54 MOV 0x8(%RSP),%R13 |
0x41db59 JMP 41db5e |
0x41db5b ADD %R14,%RSI |
0x41db5e VPXOR %XMM0,%XMM0,%XMM0 |
0x41db62 VMOVDDUP 0x44dfe(%RIP),%XMM1 |
0x41db6a VMOVSD 0x46b46(%RIP),%XMM2 |
0x41db72 VMOVSD 0x44dd6(%RIP),%XMM3 |
0x41db7a VMOVDDUP 0x46b3e(%RIP),%XMM4 |
0x41db82 VMOVDDUP 0x44dde(%RIP),%XMM5 |
0x41db8a VMOVSD 0x46b36(%RIP),%XMM6 |
0x41db92 JMP 41dbcd |
0x41db94 NOPW %CS:(%RAX,%RAX,1) |
(98) 0x41dba0 VADDSD %XMM11,%XMM10,%XMM8 |
(98) 0x41dba5 VMULSD %XMM7,%XMM8,%XMM7 |
(98) 0x41dba9 IMUL 0x78(%RSP),%RDX |
(98) 0x41dbaf ADD %RAX,%RDX |
(98) 0x41dbb2 MOV 0x28(%RSP),%RAX |
(98) 0x41dbb7 VMOVSD %XMM7,(%RAX,%RDX,8) |
(98) 0x41dbbc INC %RSI |
(98) 0x41dbbf CMP 0x50(%RSP),%RSI |
(98) 0x41dbc4 MOV %R14,%RBX |
(98) 0x41dbc7 JG 41db35 |
(98) 0x41dbcd MOV %RSI,%R8 |
(98) 0x41dbd0 SHR $0x20,%R8 |
(98) 0x41dbd4 JE 41dbf0 |
(98) 0x41dbd6 MOV %RSI,%RAX |
(98) 0x41dbd9 XOR %EDX,%EDX |
(98) 0x41dbdb MOV 0x18(%RSP),%R9 |
(98) 0x41dbe0 DIV %R9 |
(98) 0x41dbe3 MOV %RAX,%RCX |
(98) 0x41dbe6 JMP 41dbfe |
0x41dbe8 NOPL (%RAX,%RAX,1) |
(98) 0x41dbf0 MOV %ESI,%EAX |
(98) 0x41dbf2 XOR %EDX,%EDX |
(98) 0x41dbf4 MOV 0x18(%RSP),%R9 |
(98) 0x41dbf9 DIV %R9D |
(98) 0x41dbfc MOV %EAX,%ECX |
(98) 0x41dbfe MOV 0x20(%RSP),%R10 |
(98) 0x41dc03 TEST %R8,%R8 |
(98) 0x41dc06 JE 41dc20 |
(98) 0x41dc08 MOV %RSI,%RAX |
(98) 0x41dc0b CQTO |
(98) 0x41dc0d IDIV %R9 |
(98) 0x41dc10 JMP 41dc27 |
0x41dc12 NOPW %CS:(%RAX,%RAX,1) |
(98) 0x41dc20 MOV %ESI,%EAX |
(98) 0x41dc22 XOR %EDX,%EDX |
(98) 0x41dc24 DIV %R9D |
(98) 0x41dc27 ADD 0x14(%RSP),%ECX |
(98) 0x41dc2b ADD %R10D,%EDX |
(98) 0x41dc2e MOVSXD %EDX,%RAX |
(98) 0x41dc31 MOVSXD %ECX,%RDX |
(98) 0x41dc34 MOV 0x48(%RSP),%R8 |
(98) 0x41dc39 IMUL %RDX,%R8 |
(98) 0x41dc3d ADD %RAX,%R8 |
(98) 0x41dc40 MOV 0x58(%RSP),%R9 |
(98) 0x41dc45 VMOVSD (%R9,%R8,8),%XMM7 |
(98) 0x41dc4b VUCOMISD %XMM7,%XMM0 |
(98) 0x41dc4f JAE 41dc70 |
(98) 0x41dc51 LEA -0x2(%RCX),%R8D |
(98) 0x41dc55 DEC %ECX |
(98) 0x41dc57 MOVSXD %ECX,%R10 |
(98) 0x41dc5a MOVSXD %R8D,%R8 |
(98) 0x41dc5d MOV %RDX,%RCX |
(98) 0x41dc60 MOV %R10,%R11 |
(98) 0x41dc63 JMP 41dc8e |
0x41dc65 NOPW %CS:(%RAX,%RAX,1) |
(98) 0x41dc70 LEA 0x1(%RCX),%R8D |
(98) 0x41dc74 MOV 0x38(%RSP),%R9 |
(98) 0x41dc79 CMP %R8D,%R9D |
(98) 0x41dc7c CMOVL %R9D,%R8D |
(98) 0x41dc80 DEC %ECX |
(98) 0x41dc82 MOVSXD %ECX,%RCX |
(98) 0x41dc85 MOVSXD %R8D,%R8 |
(98) 0x41dc88 MOV %R8,%R10 |
(98) 0x41dc8b MOV %RDX,%R11 |
(98) 0x41dc8e VANDPD %XMM1,%XMM7,%XMM8 |
(98) 0x41dc92 MOV 0x90(%RSP),%R9 |
(98) 0x41dc9a IMUL %R11,%R9 |
(98) 0x41dc9e ADD %RAX,%R9 |
(98) 0x41dca1 VDIVSD (%R13,%R9,8),%XMM8,%XMM12 |
(98) 0x41dca8 VMOVSD (%R12,%RDX,8),%XMM8 |
(98) 0x41dcae VFMADD213SD %XMM8,%XMM12,%XMM8 |
(98) 0x41dcb3 VDIVSD (%R12,%R10,8),%XMM8,%XMM9 |
(98) 0x41dcb9 VSUBSD %XMM12,%XMM2,%XMM8 |
(98) 0x41dcbe MOV %RBX,%R10 |
(98) 0x41dcc1 IMUL %R11,%R10 |
(98) 0x41dcc5 ADD %RAX,%R10 |
(98) 0x41dcc8 VMOVSD (%RDI,%R10,8),%XMM11 |
(98) 0x41dcce MOV %RBX,%R13 |
(98) 0x41dcd1 IMUL %R8,%R13 |
(98) 0x41dcd5 ADD %RAX,%R13 |
(98) 0x41dcd8 VSUBSD (%RDI,%R13,8),%XMM11,%XMM13 |
(98) 0x41dcde MOV %RBX,%R14 |
(98) 0x41dce1 MOV %RBX,%R13 |
(98) 0x41dce4 IMUL %RCX,%R13 |
(98) 0x41dce8 ADD %RAX,%R13 |
(98) 0x41dceb VMOVSD (%RDI,%R13,8),%XMM10 |
(98) 0x41dcf1 VSUBSD %XMM11,%XMM10,%XMM14 |
(98) 0x41dcf6 VMULSD %XMM13,%XMM14,%XMM15 |
(98) 0x41dcfb VXORPD %XMM10,%XMM10,%XMM10 |
(98) 0x41dd00 VUCOMISD %XMM10,%XMM15 |
(98) 0x41dd05 VXORPD %XMM15,%XMM15,%XMM15 |
(98) 0x41dd0a JBE 41dd47 |
(98) 0x41dd0c VSUBSD %XMM12,%XMM3,%XMM12 |
(98) 0x41dd11 VXORPD %XMM4,%XMM12,%XMM15 |
(98) 0x41dd15 VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(98) 0x41dd1c VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(98) 0x41dd22 VANDPD %XMM5,%XMM13,%XMM12 |
(98) 0x41dd26 VANDPD %XMM5,%XMM14,%XMM13 |
(98) 0x41dd2a VMINSD %XMM13,%XMM12,%XMM14 |
(98) 0x41dd2f VMULSD %XMM9,%XMM12,%XMM12 |
(98) 0x41dd34 VFMADD231SD %XMM13,%XMM8,%XMM12 |
(98) 0x41dd39 VMULSD %XMM6,%XMM12,%XMM12 |
(98) 0x41dd3d VMINSD %XMM12,%XMM14,%XMM12 |
(98) 0x41dd42 VMULSD %XMM15,%XMM12,%XMM15 |
(98) 0x41dd47 VADDSD %XMM11,%XMM15,%XMM11 |
(98) 0x41dd4c VMULSD %XMM7,%XMM11,%XMM7 |
(98) 0x41dd50 MOV 0x88(%RSP),%R13 |
(98) 0x41dd58 IMUL %RDX,%R13 |
(98) 0x41dd5c ADD %RAX,%R13 |
(98) 0x41dd5f MOV 0x30(%RSP),%RBX |
(98) 0x41dd64 VMOVSD %XMM7,(%RBX,%R13,8) |
(98) 0x41dd6a MOV 0x80(%RSP),%RBX |
(98) 0x41dd72 IMUL %RBX,%R11 |
(98) 0x41dd76 ADD %RAX,%R11 |
(98) 0x41dd79 VMOVSD (%R15,%R11,8),%XMM11 |
(98) 0x41dd7f IMUL %RBX,%R8 |
(98) 0x41dd83 ADD %RAX,%R8 |
(98) 0x41dd86 VSUBSD (%R15,%R8,8),%XMM11,%XMM12 |
(98) 0x41dd8c IMUL %RBX,%RCX |
(98) 0x41dd90 ADD %RAX,%RCX |
(98) 0x41dd93 VMOVSD (%R15,%RCX,8),%XMM13 |
(98) 0x41dd99 VSUBSD %XMM11,%XMM13,%XMM13 |
(98) 0x41dd9e VMULSD %XMM12,%XMM13,%XMM14 |
(98) 0x41dda3 VUCOMISD %XMM10,%XMM14 |
(98) 0x41dda8 MOV 0x8(%RSP),%R13 |
(98) 0x41ddad JBE 41dba0 |
(98) 0x41ddb3 VANDPD %XMM5,%XMM7,%XMM10 |
(98) 0x41ddb7 VMOVSD (%R13,%R9,8),%XMM14 |
(98) 0x41ddbe VMULSD (%RDI,%R10,8),%XMM14,%XMM14 |
(98) 0x41ddc4 VDIVSD %XMM14,%XMM10,%XMM10 |
(98) 0x41ddc9 VSUBSD %XMM10,%XMM3,%XMM10 |
(98) 0x41ddce VXORPD %XMM4,%XMM10,%XMM14 |
(98) 0x41ddd2 VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(98) 0x41ddd9 VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(98) 0x41dddf VANDPD %XMM5,%XMM12,%XMM10 |
(98) 0x41dde3 VANDPD %XMM5,%XMM13,%XMM12 |
(98) 0x41dde7 VMINSD %XMM12,%XMM10,%XMM13 |
(98) 0x41ddec VMULSD %XMM9,%XMM10,%XMM9 |
(98) 0x41ddf1 VFMADD213SD %XMM9,%XMM12,%XMM8 |
(98) 0x41ddf6 VMULSD %XMM6,%XMM8,%XMM8 |
(98) 0x41ddfa VMINSD %XMM8,%XMM13,%XMM8 |
(98) 0x41ddff VMULSD %XMM8,%XMM14,%XMM10 |
(98) 0x41de04 JMP 41dba0 |
0x41de09 NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 766 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.33 | 21.33 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.33 |
cycles | 5.50 | 5.60 | 21.33 | 21.33 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.67 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 21.33 |
Overall L1 | 26.83 |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 5% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 21% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41db1a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x680470,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41db35 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41db54 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x46139(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x45214(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x451df(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x46f37(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x46f35(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d7df <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x28f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R14,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41db5b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x680490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41db5e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x60e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R14,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x44dfe(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x46b46(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x44dd6(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x46b3e(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x44dde(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x46b36(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41dbcd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x67d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 766 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.33 | 21.33 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.33 |
cycles | 5.50 | 5.60 | 21.33 | 21.33 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.67 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 21.33 |
Overall L1 | 26.83 |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 10% |
load | 5% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 21% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41db1a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5ca> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x680470,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41db35 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41db54 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x46139(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x45214(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x451df(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x46f37(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x46f35(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d7df <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x28f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R14,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41db5b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x680490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x8(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41db5e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x60e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R14,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x44dfe(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x46b46(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x44dd6(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x46b3e(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x44dde(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x46b36(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41dbcd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x67d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 2.31 | 1.98 |
○Loop 99 - advec_cell.cpp:158-202 - exec | 2.31 | 1.97 |
○Loop 98 - advec_cell.cpp:158-202 - exec | 0 | 0 |