Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 1.36% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 1.36% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
0x420500 PUSH %RBP |
0x420501 MOV %RSP,%RBP |
0x420504 PUSH %R15 |
0x420506 PUSH %R14 |
0x420508 PUSH %R13 |
0x42050a PUSH %R12 |
0x42050c PUSH %RBX |
0x42050d AND $-0x20,%RSP |
0x420511 SUB $0x280,%RSP |
0x420518 MOV %R8,%R12 |
0x42051b MOV 0x50(%RBP),%RAX |
0x42051f MOV 0x40(%RBP),%R10 |
0x420523 MOV 0x38(%RBP),%RSI |
0x420527 MOV 0x30(%RBP),%R8 |
0x42052b MOV %R8,0x78(%RSP) |
0x420530 MOV 0x28(%RBP),%RBX |
0x420534 MOV 0x20(%RBP),%R15 |
0x420538 MOV 0x18(%RBP),%R13 |
0x42053c MOV 0x10(%RBP),%R14 |
0x420540 MOVL $0,0x4c(%RSP) |
0x420548 TEST %RAX,%RAX |
0x42054b JS 420e4e |
0x420551 MOV %RCX,0x28(%RSP) |
0x420556 MOV %R9,0x30(%RSP) |
0x42055b MOV %RSI,0x18(%RSP) |
0x420560 MOV %R10,0x20(%RSP) |
0x420565 MOV %RDX,0x38(%RSP) |
0x42056a MOV (%RDI),%ESI |
0x42056c MOVQ $0,0x90(%RSP) |
0x420578 MOV %RAX,0x88(%RSP) |
0x420580 MOVQ $0x1,0xb8(%RSP) |
0x42058c SUB $0x8,%RSP |
0x420590 LEA 0xc0(%RSP),%RAX |
0x420598 LEA 0x54(%RSP),%RCX |
0x42059d LEA 0x98(%RSP),%R8 |
0x4205a5 LEA 0x90(%RSP),%R9 |
0x4205ad MOV $0x6825d0,%EDI |
0x4205b2 MOV %ESI,0x50(%RSP) |
0x4205b6 MOV $0x22,%EDX |
0x4205bb PUSH $0x1 |
0x4205bd PUSH $0x1 |
0x4205bf PUSH %RAX |
0x4205c0 CALL 403020 <__kmpc_for_static_init_8@plt> |
0x4205c5 ADD $0x20,%RSP |
0x4205c9 MOV 0x90(%RSP),%RSI |
0x4205d1 MOV 0x88(%RSP),%RAX |
0x4205d9 MOV %RAX,0x70(%RSP) |
0x4205de CMP %RAX,%RSI |
0x4205e1 JA 420e2f |
0x4205e7 MOV 0x20(%RSP),%RDX |
0x4205ec SUB 0x18(%RSP),%EDX |
0x4205f0 MOV (%R13),%R9 |
0x4205f4 MOV 0x10(%R13),%R11 |
0x4205f8 MOV (%R15),%RAX |
0x4205fb MOV %RAX,0x68(%RSP) |
0x420600 MOV 0x10(%R15),%RAX |
0x420604 MOV %RAX,0x40(%RSP) |
0x420609 MOV 0x28(%RSP),%RAX |
0x42060e MOV 0x8(%RAX),%RAX |
0x420612 MOV %RAX,0x80(%RSP) |
0x42061a MOV (%R12),%RDI |
0x42061e MOV 0x10(%R12),%R12 |
0x420623 MOV (%R14),%RAX |
0x420626 MOV %RAX,0x50(%RSP) |
0x42062b MOV 0x10(%R14),%RAX |
0x42062f MOV %RAX,0x28(%RSP) |
0x420634 MOV 0x38(%RSP),%RCX |
0x420639 ADD $0x2,%ECX |
0x42063c LEA 0x1(%RSI),%RAX |
0x420640 MOV 0x70(%RSP),%R8 |
0x420645 LEA 0x1(%R8),%R10 |
0x420649 CMP %R10,%RAX |
0x42064c CMOVG %RAX,%R10 |
0x420650 MOV 0x30(%RSP),%RAX |
0x420655 MOV (%RAX),%R8 |
0x420658 MOV 0x10(%RAX),%R14 |
0x42065c MOV (%RBX),%R15 |
0x42065f MOV 0x10(%RBX),%R13 |
0x420663 SUB %RSI,%R10 |
0x420666 MOV $-0x8,%EAX |
0x42066b MOV %R10,0x98(%RSP) |
0x420673 AND %R10,%RAX |
0x420676 MOV %RCX,0x38(%RSP) |
0x42067b MOV %RDX,0x20(%RSP) |
0x420680 MOV %R9,0xb0(%RSP) |
0x420688 MOV %R11,0x30(%RSP) |
0x42068d MOV %RDI,0x60(%RSP) |
0x420692 MOV %R15,0xa0(%RSP) |
0x42069a MOV %R8,0xa8(%RSP) |
0x4206a2 JE 420e5d |
0x4206a8 MOV %RAX,%R10 |
0x4206ab VPBROADCASTQ %RDX,%YMM0 |
0x4206b1 VMOVDQU %YMM0,0x1e0(%RSP) |
0x4206ba MOV %R13,0x58(%RSP) |
0x4206bf MOV 0x78(%RSP),%RAX |
0x4206c4 VPBROADCASTQ %RAX,%YMM0 |
0x4206ca VMOVDQU %YMM0,0x1c0(%RSP) |
0x4206d3 MOV 0x18(%RSP),%RAX |
0x4206d8 VPBROADCASTD %EAX,%YMM0 |
0x4206de VMOVDQU %YMM0,0x1a0(%RSP) |
0x4206e7 VPBROADCASTQ %R9,%YMM0 |
0x4206ed VMOVDQU %YMM0,0x180(%RSP) |
0x4206f6 VPBROADCASTD %ECX,%YMM0 |
0x4206fc VMOVDQU %YMM0,0x160(%RSP) |
0x420705 MOV 0x68(%RSP),%RAX |
0x42070a VPBROADCASTQ %RAX,%YMM0 |
0x420710 VMOVDQU %YMM0,0x140(%RSP) |
0x420719 VPBROADCASTQ %RDI,%YMM0 |
0x42071f VMOVDQU %YMM0,0x120(%RSP) |
0x420728 MOV 0x50(%RSP),%RDI |
0x42072d VPBROADCASTQ %RDI,%YMM0 |
0x420733 VMOVDQU %YMM0,0x100(%RSP) |
0x42073c VPBROADCASTQ %R8,%YMM0 |
0x420742 VMOVDQU %YMM0,0xe0(%RSP) |
0x42074b VPBROADCASTQ %R15,%YMM0 |
0x420751 VMOVDQU %YMM0,0xc0(%RSP) |
0x42075a VPBROADCASTQ %RSI,%YMM0 |
0x420760 VPADDQ 0x45b18(%RIP),%YMM0,%YMM9 |
0x420768 VPADDQ 0x45990(%RIP),%YMM0,%YMM10 |
0x420770 XOR %EBX,%EBX |
0x420772 NOPW %CS:(%RAX,%RAX,1) |
(113) 0x420780 VMOVDQU %YMM9,0x200(%RSP) |
(113) 0x420789 VMOVDQA %YMM10,%YMM0 |
(113) 0x42078d VMOVUPS 0x1e0(%RSP),%YMM8 |
(113) 0x420796 VMOVAPS %YMM8,%YMM1 |
(113) 0x42079a MOV %R10,%R15 |
(113) 0x42079d MOV $0x454690,%R13 |
(113) 0x4207a4 CALL %R13 |
(113) 0x4207a7 VMOVDQA %YMM0,%YMM11 |
(113) 0x4207ab VMOVDQA %YMM9,%YMM0 |
(113) 0x4207af VMOVAPS %YMM8,%YMM1 |
(113) 0x4207b3 CALL %R13 |
(113) 0x4207b6 VMOVDQU 0x1c0(%RSP),%YMM1 |
(113) 0x4207bf VPADDQ %YMM1,%YMM11,%YMM12 |
(113) 0x4207c3 VPADDQ %YMM1,%YMM0,%YMM13 |
(113) 0x4207c7 VMOVDQA %YMM10,%YMM0 |
(113) 0x4207cb VMOVAPS %YMM8,%YMM1 |
(113) 0x4207cf MOV $0x454460,%R13 |
(113) 0x4207d6 CALL %R13 |
(113) 0x4207d9 VMOVDQA %YMM0,%YMM11 |
(113) 0x4207dd VMOVDQA %YMM9,%YMM0 |
(113) 0x4207e1 VMOVAPS %YMM8,%YMM1 |
(113) 0x4207e5 CALL %R13 |
(113) 0x4207e8 MOV %R15,%R10 |
(113) 0x4207eb MOV 0x30(%RSP),%R11 |
(113) 0x4207f0 VPMOVQD %YMM11,%XMM1 |
(113) 0x4207f6 VPMOVQD %YMM0,%XMM3 |
(113) 0x4207fc VPSLLQ $0x20,%YMM12,%YMM0 |
(113) 0x420802 VPSRAQ $0x20,%YMM0,%YMM2 |
(113) 0x420809 VMOVDQU 0x180(%RSP),%YMM5 |
(113) 0x420812 VXORPS %XMM4,%XMM4,%XMM4 |
(113) 0x420816 VPMULLQ %YMM2,%YMM5,%YMM4 |
(113) 0x42081c VPSLLQ $0x20,%YMM13,%YMM0 |
(113) 0x420822 VPSRAQ $0x20,%YMM0,%YMM0 |
(113) 0x420829 VPMULLQ %YMM0,%YMM5,%YMM5 |
(113) 0x42082f VINSERTI128 $0x1,%XMM3,%YMM1,%YMM1 |
(113) 0x420835 VPADDD 0x1a0(%RSP),%YMM1,%YMM11 |
(113) 0x42083e VPMOVSXDQ %XMM11,%YMM3 |
(113) 0x420843 VPADDQ %YMM3,%YMM4,%YMM1 |
(113) 0x420847 KXNORW %K0,%K0,%K1 |
(113) 0x42084b VXORPD %XMM7,%XMM7,%XMM7 |
(113) 0x42084f VGATHERQPD (%R11,%YMM1,8),%YMM7{%K1} |
(113) 0x420856 VEXTRACTI128 $0x1,%YMM11,%XMM12 |
(113) 0x42085c VPMOVSXDQ %XMM12,%YMM1 |
(113) 0x420861 VPADDQ %YMM1,%YMM5,%YMM4 |
(113) 0x420865 VMOVDQA64 %YMM1,%YMM16 |
(113) 0x42086b KXNORW %K0,%K0,%K1 |
(113) 0x42086f VXORPD %XMM6,%XMM6,%XMM6 |
(113) 0x420873 VGATHERQPD (%R11,%YMM4,8),%YMM6{%K1} |
(113) 0x42087a VMOVDQU 0x140(%RSP),%YMM8 |
(113) 0x420883 VXORPS %XMM4,%XMM4,%XMM4 |
(113) 0x420887 VPMULLQ %YMM2,%YMM8,%YMM4 |
(113) 0x42088d VPXOR %XMM1,%XMM1,%XMM1 |
(113) 0x420891 VCMPPD $0x1,%YMM6,%YMM1,%K1 |
(113) 0x420898 VPCMPEQD %YMM9,%YMM9,%YMM9 |
(113) 0x42089d VPADDD %YMM9,%YMM11,%YMM5 |
(113) 0x4208a2 VPMOVSXDQ %XMM5,%YMM19 |
(113) 0x4208a8 VPMULLQ %YMM0,%YMM8,%YMM15 |
(113) 0x4208ae VCMPPD $0x1,%YMM7,%YMM1,%K2 |
(113) 0x4208b5 VEXTRACTI128 $0x1,%YMM5,%XMM5 |
(113) 0x4208bb VPBLENDMQ %YMM19,%YMM3,%YMM30{%K2} |
(113) 0x4208c1 VPADDQ %YMM30,%YMM4,%YMM4 |
(113) 0x4208c7 VMOVDQU %YMM4,0x240(%RSP) |
(113) 0x4208d0 KXNORW %K0,%K0,%K3 |
(113) 0x4208d4 VPXOR %XMM13,%XMM13,%XMM13 |
(113) 0x4208d9 MOV 0x40(%RSP),%RAX |
(113) 0x4208de VGATHERQPD (%RAX,%YMM4,8),%YMM13{%K3} |
(113) 0x4208e5 VPMOVSXDQ %XMM5,%YMM14 |
(113) 0x4208ea VPBLENDMQ %YMM14,%YMM16,%YMM31{%K1} |
(113) 0x4208f0 VMOVDQA64 %YMM16,%YMM21 |
(113) 0x4208f6 VPADDQ %YMM31,%YMM15,%YMM4 |
(113) 0x4208fc VMOVDQU %YMM4,0x220(%RSP) |
(113) 0x420905 KXNORW %K0,%K0,%K3 |
(113) 0x420909 VPXORD %XMM16,%XMM16,%XMM16 |
(113) 0x42090f VGATHERQPD (%RAX,%YMM4,8),%YMM16{%K3} |
(113) 0x420916 KXNORW %K0,%K0,%K3 |
(113) 0x42091a VXORPD %XMM26,%XMM26,%XMM26 |
(113) 0x420920 MOV 0x80(%RSP),%RCX |
(113) 0x420928 VGATHERDPD (%RCX,%XMM11,8),%YMM26{%K3} |
(113) 0x42092f VPSUBD %YMM9,%YMM11,%YMM15 |
(113) 0x420934 KXNORW %K0,%K0,%K3 |
(113) 0x420938 VXORPD %XMM29,%XMM29,%XMM29 |
(113) 0x42093e VGATHERDPD (%RCX,%XMM12,8),%YMM29{%K3} |
(113) 0x420945 VPMINSD 0x160(%RSP),%YMM15,%YMM12 |
(113) 0x42094f VEXTRACTI128 $0x1,%YMM12,%XMM15 |
(113) 0x420955 VPMOVSXDQ %XMM15,%YMM17 |
(113) 0x42095b VPMOVSXDQ %XMM12,%YMM12 |
(113) 0x420960 VMOVDQA %YMM12,%YMM15 |
(113) 0x420965 VMOVDQA64 %YMM17,%YMM22 |
(113) 0x42096b VMOVDQA64 %YMM14,%YMM17{%K1} |
(113) 0x420971 VMOVDQA64 %YMM19,%YMM12{%K2} |
(113) 0x420977 KXNORW %K0,%K0,%K3 |
(113) 0x42097b VXORPD %XMM18,%XMM18,%XMM18 |
(113) 0x420981 VMOVDQU 0x120(%RSP),%YMM8 |
(113) 0x42098a VPMULLQ %YMM0,%YMM8,%YMM20 |
(113) 0x420990 VGATHERQPD (%RCX,%YMM12,8),%YMM18{%K3} |
(113) 0x420997 KXNORW %K0,%K0,%K3 |
(113) 0x42099b VXORPD %XMM25,%XMM25,%XMM25 |
(113) 0x4209a1 VPMULLQ %YMM2,%YMM8,%YMM27 |
(113) 0x4209a7 VGATHERQPD (%RCX,%YMM17,8),%YMM25{%K3} |
(113) 0x4209ae VPADDQ %YMM30,%YMM27,%YMM5 |
(113) 0x4209b4 KXNORW %K0,%K0,%K3 |
(113) 0x4209b8 VXORPD %XMM17,%XMM17,%XMM17 |
(113) 0x4209be VGATHERQPD (%R12,%YMM5,8),%YMM17{%K3} |
(113) 0x4209c5 VPBLENDMQ %YMM3,%YMM19,%YMM12{%K2} |
(113) 0x4209cb VPADDQ %YMM12,%YMM27,%YMM19 |
(113) 0x4209d1 KXNORW %K0,%K0,%K3 |
(113) 0x4209d5 VXORPD %XMM28,%XMM28,%XMM28 |
(113) 0x4209db VGATHERQPD (%R12,%YMM19,8),%YMM28{%K3} |
(113) 0x4209e2 VPBLENDMQ %YMM21,%YMM14,%YMM14{%K1} |
(113) 0x4209e8 VMOVDQA64 %YMM21,%YMM4 |
(113) 0x4209ee VPADDQ %YMM14,%YMM20,%YMM19 |
(113) 0x4209f4 KXNORW %K0,%K0,%K3 |
(113) 0x4209f8 VPXOR %XMM8,%XMM8,%XMM8 |
(113) 0x4209fd VGATHERQPD (%R12,%YMM19,8),%YMM8{%K3} |
(113) 0x420a04 VPADDD 0x458b2(%RIP){1to8},%YMM11,%YMM11 |
(113) 0x420a0e VPMOVSXDQ %XMM11,%YMM15{%K2} |
(113) 0x420a14 VPADDQ %YMM15,%YMM27,%YMM19 |
(113) 0x420a1a KXNORW %K0,%K0,%K2 |
(113) 0x420a1e VPXORD %XMM27,%XMM27,%XMM27 |
(113) 0x420a24 VGATHERQPD (%R12,%YMM19,8),%YMM27{%K2} |
(113) 0x420a2b VEXTRACTI128 $0x1,%YMM11,%XMM11 |
(113) 0x420a31 VPMOVSXDQ %XMM11,%YMM22{%K1} |
(113) 0x420a37 VPADDQ %YMM22,%YMM20,%YMM11 |
(113) 0x420a3d KXNORW %K0,%K0,%K1 |
(113) 0x420a41 VXORPD %XMM19,%XMM19,%XMM19 |
(113) 0x420a47 VGATHERQPD (%R12,%YMM11,8),%YMM19{%K1} |
(113) 0x420a4e VBROADCASTSD 0x44c91(%RIP),%YMM9 |
(113) 0x420a57 VANDPD %YMM7,%YMM9,%YMM11 |
(113) 0x420a5b VDIVPD %YMM13,%YMM11,%YMM21 |
(113) 0x420a61 VANDPD %YMM6,%YMM9,%YMM11 |
(113) 0x420a65 VDIVPD %YMM16,%YMM11,%YMM24 |
(113) 0x420a6b VFMADD213PD %YMM26,%YMM21,%YMM26 |
(113) 0x420a71 VDIVPD %YMM18,%YMM26,%YMM13 |
(113) 0x420a77 VFMADD213PD %YMM29,%YMM24,%YMM29 |
(113) 0x420a7d VDIVPD %YMM25,%YMM29,%YMM11 |
(113) 0x420a83 VPADDQ %YMM31,%YMM20,%YMM16 |
(113) 0x420a89 KXNORW %K0,%K0,%K1 |
(113) 0x420a8d VSUBPD %YMM27,%YMM17,%YMM18 |
(113) 0x420a93 VSUBPD %YMM17,%YMM28,%YMM20 |
(113) 0x420a99 VMULPD %YMM18,%YMM20,%YMM25 |
(113) 0x420a9f VBROADCASTSD 0x457ff(%RIP),%YMM23 |
(113) 0x420aa9 VSUBPD %YMM21,%YMM23,%YMM26 |
(113) 0x420aaf VCMPPD $0x1,%YMM25,%YMM1,%K2 |
(113) 0x420ab6 VCMPPD $0x1,%YMM20,%YMM1,%K3 |
(113) 0x420abd VBROADCASTSD 0x44c09(%RIP),%YMM28 |
(113) 0x420ac7 VSUBPD %YMM21,%YMM28,%YMM21 |
(113) 0x420acd VBROADCASTSD 0x457d9(%RIP),%YMM29 |
(113) 0x420ad7 VXORPD %YMM29,%YMM21,%YMM25 |
(113) 0x420add VANDPD %YMM9,%YMM18,%YMM18 |
(113) 0x420ae3 VMOVAPD %YMM21,%YMM25{%K3} |
(113) 0x420ae9 VANDPD %YMM9,%YMM20,%YMM20 |
(113) 0x420aef VMINPD %YMM20,%YMM18,%YMM21 |
(113) 0x420af5 VMULPD %YMM13,%YMM18,%YMM18 |
(113) 0x420afb VFMADD231PD %YMM20,%YMM26,%YMM18 |
(113) 0x420b01 VXORPD %XMM20,%XMM20,%XMM20 |
(113) 0x420b07 VGATHERQPD (%R12,%YMM16,8),%YMM20{%K1} |
(113) 0x420b0e VBROADCASTSD 0x457a0(%RIP),%YMM27 |
(113) 0x420b18 VMULPD %YMM27,%YMM18,%YMM18 |
(113) 0x420b1e VMINPD %YMM18,%YMM21,%YMM18 |
(113) 0x420b24 VSUBPD %YMM19,%YMM20,%YMM19 |
(113) 0x420b2a VSUBPD %YMM20,%YMM8,%YMM8 |
(113) 0x420b30 VFMADD231PD %YMM25,%YMM18,%YMM17{%K2} |
(113) 0x420b36 VSUBPD %YMM24,%YMM23,%YMM25 |
(113) 0x420b3c VMULPD %YMM19,%YMM8,%YMM18 |
(113) 0x420b42 VCMPPD $0x1,%YMM18,%YMM1,%K1 |
(113) 0x420b49 VCMPPD $0x1,%YMM8,%YMM1,%K2 |
(113) 0x420b50 VSUBPD %YMM24,%YMM28,%YMM18 |
(113) 0x420b56 VXORPD %YMM29,%YMM18,%YMM21 |
(113) 0x420b5c VMOVAPD %YMM18,%YMM21{%K2} |
(113) 0x420b62 VANDPD %YMM9,%YMM19,%YMM18 |
(113) 0x420b68 VANDPD %YMM9,%YMM8,%YMM8 |
(113) 0x420b6d VMINPD %YMM8,%YMM18,%YMM19 |
(113) 0x420b73 VMULPD %YMM11,%YMM18,%YMM18 |
(113) 0x420b79 VFMADD231PD %YMM8,%YMM25,%YMM18 |
(113) 0x420b7f VMULPD %YMM27,%YMM18,%YMM8 |
(113) 0x420b85 VMINPD %YMM8,%YMM19,%YMM8 |
(113) 0x420b8b VMOVDQU64 0x100(%RSP),%YMM19 |
(113) 0x420b93 VPXORD %XMM18,%XMM18,%XMM18 |
(113) 0x420b99 VPMULLQ %YMM2,%YMM19,%YMM18 |
(113) 0x420b9f VPADDQ %YMM3,%YMM18,%YMM18 |
(113) 0x420ba5 VMULPD %YMM7,%YMM17,%YMM17 |
(113) 0x420bab KXNORW %K0,%K0,%K2 |
(113) 0x420baf MOV 0x28(%RSP),%RCX |
(113) 0x420bb4 VSCATTERQPD %YMM17,(%RCX,%YMM18,8){%K2} |
(113) 0x420bbb VPMULLQ %YMM0,%YMM19,%YMM7 |
(113) 0x420bc1 VMOVDQU64 0xe0(%RSP),%YMM19 |
(113) 0x420bc9 VPXORD %XMM18,%XMM18,%XMM18 |
(113) 0x420bcf VPMULLQ %YMM0,%YMM19,%YMM18 |
(113) 0x420bd5 VFMADD231PD %YMM21,%YMM8,%YMM20{%K1} |
(113) 0x420bdb VMULPD %YMM6,%YMM20,%YMM6 |
(113) 0x420be1 VXORPS %XMM8,%XMM8,%XMM8 |
(113) 0x420be6 VPMULLQ %YMM2,%YMM19,%YMM8 |
(113) 0x420bec VMOVDQA64 %YMM4,%YMM21 |
(113) 0x420bf2 VPADDQ %YMM4,%YMM7,%YMM7 |
(113) 0x420bf6 KXNORW %K0,%K0,%K1 |
(113) 0x420bfa VSCATTERQPD %YMM6,(%RCX,%YMM7,8){%K1} |
(113) 0x420c01 VPADDQ %YMM31,%YMM18,%YMM19 |
(113) 0x420c07 KXNORW %K0,%K0,%K1 |
(113) 0x420c0b VXORPD %XMM7,%XMM7,%XMM7 |
(113) 0x420c0f VGATHERQPD (%R14,%YMM19,8),%YMM7{%K1} |
(113) 0x420c16 VPADDQ %YMM30,%YMM8,%YMM19 |
(113) 0x420c1c KXNORW %K0,%K0,%K1 |
(113) 0x420c20 VPXORD %XMM30,%XMM30,%XMM30 |
(113) 0x420c26 VGATHERQPD (%R14,%YMM19,8),%YMM30{%K1} |
(113) 0x420c2d VPADDQ %YMM15,%YMM8,%YMM15 |
(113) 0x420c32 KXNORW %K0,%K0,%K1 |
(113) 0x420c36 VXORPD %XMM19,%XMM19,%XMM19 |
(113) 0x420c3c VGATHERQPD (%R14,%YMM15,8),%YMM19{%K1} |
(113) 0x420c43 VPADDQ %YMM22,%YMM18,%YMM15 |
(113) 0x420c49 KXNORW %K0,%K0,%K1 |
(113) 0x420c4d VXORPD %XMM20,%XMM20,%XMM20 |
(113) 0x420c53 VGATHERQPD (%R14,%YMM15,8),%YMM20{%K1} |
(113) 0x420c5a VPADDQ %YMM12,%YMM8,%YMM8 |
(113) 0x420c5f KXNORW %K0,%K0,%K1 |
(113) 0x420c63 VPXOR %XMM12,%XMM12,%XMM12 |
(113) 0x420c68 VGATHERQPD (%R14,%YMM8,8),%YMM12{%K1} |
(113) 0x420c6f VPADDQ %YMM14,%YMM18,%YMM8 |
(113) 0x420c75 KXNORW %K0,%K0,%K1 |
(113) 0x420c79 VXORPD %XMM15,%XMM15,%XMM15 |
(113) 0x420c7e VGATHERQPD (%R14,%YMM8,8),%YMM15{%K1} |
(113) 0x420c85 VSUBPD %YMM19,%YMM30,%YMM8 |
(113) 0x420c8b VSUBPD %YMM30,%YMM12,%YMM18 |
(113) 0x420c91 VMULPD %YMM8,%YMM18,%YMM12 |
(113) 0x420c97 VCMPPD $0x1,%YMM12,%YMM1,%K2 |
(113) 0x420c9e VXORPD %XMM19,%XMM19,%XMM19 |
(113) 0x420ca4 KMOVQ %K2,%K1 |
(113) 0x420ca9 VGATHERQPD (%R12,%YMM5,8),%YMM19{%K1} |
(113) 0x420cb0 VSUBPD %YMM20,%YMM7,%YMM14 |
(113) 0x420cb6 VSUBPD %YMM7,%YMM15,%YMM12 |
(113) 0x420cba VMULPD %YMM14,%YMM12,%YMM15 |
(113) 0x420cbf VCMPPD $0x1,%YMM15,%YMM1,%K1 |
(113) 0x420cc6 KMOVQ %K1,%K3 |
(113) 0x420ccb VXORPD %XMM15,%XMM15,%XMM15 |
(113) 0x420cd0 VGATHERQPD (%R12,%YMM16,8),%YMM15{%K3} |
(113) 0x420cd7 VXORPD %XMM16,%XMM16,%XMM16 |
(113) 0x420cdd KMOVQ %K2,%K3 |
(113) 0x420ce2 VMOVUPD 0x240(%RSP),%YMM4 |
(113) 0x420ceb VGATHERQPD (%RAX,%YMM4,8),%YMM16{%K3} |
(113) 0x420cf2 KMOVQ %K1,%K3 |
(113) 0x420cf7 VXORPD %XMM4,%XMM4,%XMM4 |
(113) 0x420cfb VMOVUPD 0x220(%RSP),%YMM5 |
(113) 0x420d04 VGATHERQPD (%RAX,%YMM5,8),%YMM4{%K3} |
(113) 0x420d0b VMULPD %YMM19,%YMM16,%YMM5 |
(113) 0x420d11 VCMPPD $0x1,%YMM18,%YMM1,%K3 |
(113) 0x420d18 VANDPD %YMM9,%YMM8,%YMM8 |
(113) 0x420d1d VANDPD %YMM9,%YMM18,%YMM16 |
(113) 0x420d23 VMULPD %YMM13,%YMM8,%YMM13 |
(113) 0x420d28 VFMADD231PD %YMM26,%YMM16,%YMM13 |
(113) 0x420d2e VANDPD %YMM9,%YMM17,%YMM18 |
(113) 0x420d34 VDIVPD %YMM5,%YMM18,%YMM5 |
(113) 0x420d3a VMINPD %YMM16,%YMM8,%YMM8 |
(113) 0x420d40 VSUBPD %YMM5,%YMM28,%YMM5 |
(113) 0x420d46 VMULPD %YMM27,%YMM13,%YMM13 |
(113) 0x420d4c VMINPD %YMM13,%YMM8,%YMM8 |
(113) 0x420d51 VXORPD %YMM29,%YMM5,%YMM13 |
(113) 0x420d57 VMOVAPD %YMM5,%YMM13{%K3} |
(113) 0x420d5d VFMADD231PD %YMM8,%YMM13,%YMM30{%K2} |
(113) 0x420d63 VMOVDQU 0xc0(%RSP),%YMM8 |
(113) 0x420d6c VPMULLQ %YMM2,%YMM8,%YMM2 |
(113) 0x420d72 VPADDQ %YMM3,%YMM2,%YMM2 |
(113) 0x420d76 VMULPD %YMM17,%YMM30,%YMM3 |
(113) 0x420d7c KXNORW %K0,%K0,%K2 |
(113) 0x420d80 MOV 0x58(%RSP),%RAX |
(113) 0x420d85 VSCATTERQPD %YMM3,(%RAX,%YMM2,8){%K2} |
(113) 0x420d8c VMULPD %YMM4,%YMM15,%YMM2 |
(113) 0x420d90 VANDPD %YMM6,%YMM9,%YMM3 |
(113) 0x420d94 VDIVPD %YMM2,%YMM3,%YMM2 |
(113) 0x420d98 VANDPD %YMM9,%YMM14,%YMM3 |
(113) 0x420d9d VANDPD %YMM9,%YMM12,%YMM4 |
(113) 0x420da2 VMOVDQU 0x200(%RSP),%YMM9 |
(113) 0x420dab VMULPD %YMM3,%YMM11,%YMM5 |
(113) 0x420daf VFMADD231PD %YMM25,%YMM4,%YMM5 |
(113) 0x420db5 VCMPPD $0x1,%YMM12,%YMM1,%K2 |
(113) 0x420dbc VMINPD %YMM4,%YMM3,%YMM3 |
(113) 0x420dc0 VSUBPD %YMM2,%YMM28,%YMM2 |
(113) 0x420dc6 VMULPD %YMM27,%YMM5,%YMM4 |
(113) 0x420dcc VMINPD %YMM4,%YMM3,%YMM3 |
(113) 0x420dd0 VXORPD %YMM29,%YMM2,%YMM4 |
(113) 0x420dd6 VMOVAPD %YMM2,%YMM4{%K2} |
(113) 0x420ddc VFMADD231PD %YMM3,%YMM4,%YMM7{%K1} |
(113) 0x420de2 VMULPD %YMM6,%YMM7,%YMM2 |
(113) 0x420de6 VPMULLQ %YMM0,%YMM8,%YMM0 |
(113) 0x420dec VPADDQ %YMM21,%YMM0,%YMM0 |
(113) 0x420df2 KXNORW %K0,%K0,%K1 |
(113) 0x420df6 VSCATTERQPD %YMM2,(%RAX,%YMM0,8){%K1} |
(113) 0x420dfd VPBROADCASTQ 0x4549a(%RIP),%YMM0 |
(113) 0x420e06 VPADDQ %YMM0,%YMM10,%YMM10 |
(113) 0x420e0a VPADDQ %YMM0,%YMM9,%YMM9 |
(113) 0x420e0e ADD $0x8,%RBX |
(113) 0x420e12 CMP %R15,%RBX |
(113) 0x420e15 JB 420780 |
0x420e1b CMP %R10,0x98(%RSP) |
0x420e23 MOV 0x60(%RSP),%RBX |
0x420e28 MOV 0x58(%RSP),%R13 |
0x420e2d JNE 420e67 |
0x420e2f MOV $0x6825f0,%EDI |
0x420e34 MOV 0x48(%RSP),%ESI |
0x420e38 LEA -0x28(%RBP),%RSP |
0x420e3c POP %RBX |
0x420e3d POP %R12 |
0x420e3f POP %R13 |
0x420e41 POP %R14 |
0x420e43 POP %R15 |
0x420e45 POP %RBP |
0x420e46 VZEROUPPER |
0x420e49 JMP 402e90 |
0x420e4e LEA -0x28(%RBP),%RSP |
0x420e52 POP %RBX |
0x420e53 POP %R12 |
0x420e55 POP %R13 |
0x420e57 POP %R14 |
0x420e59 POP %R15 |
0x420e5b POP %RBP |
0x420e5c RET |
0x420e5d MOV %RDI,%RBX |
0x420e60 MOV 0x50(%RSP),%RDI |
0x420e65 JMP 420e6a |
0x420e67 ADD %R10,%RSI |
0x420e6a VPXOR %XMM0,%XMM0,%XMM0 |
0x420e6e VMOVDDUP 0x44872(%RIP),%XMM1 |
0x420e76 VMOVSD 0x4542a(%RIP),%XMM2 |
0x420e7e VMOVSD 0x4484a(%RIP),%XMM3 |
0x420e86 VMOVDDUP 0x45422(%RIP),%XMM4 |
0x420e8e VMOVDDUP 0x44852(%RIP),%XMM5 |
0x420e96 VMOVSD 0x4541a(%RIP),%XMM6 |
0x420e9e JMP 420ed7 |
(112) 0x420ea0 VADDSD %XMM11,%XMM10,%XMM8 |
(112) 0x420ea5 VMULSD %XMM7,%XMM8,%XMM7 |
(112) 0x420ea9 IMUL 0xa0(%RSP),%RCX |
(112) 0x420eb2 ADD %RAX,%RCX |
(112) 0x420eb5 VMOVSD %XMM7,(%R13,%RCX,8) |
(112) 0x420ebc INC %RSI |
(112) 0x420ebf CMP 0x70(%RSP),%RSI |
(112) 0x420ec4 MOV 0x30(%RSP),%R11 |
(112) 0x420ec9 MOV 0x60(%RSP),%RBX |
(112) 0x420ece MOV %R15,%RDI |
(112) 0x420ed1 JG 420e2f |
(112) 0x420ed7 MOV %RDI,%R15 |
(112) 0x420eda MOV %RSI,%R8 |
(112) 0x420edd SHR $0x20,%R8 |
(112) 0x420ee1 JE 420f10 |
(112) 0x420ee3 MOV %RSI,%RAX |
(112) 0x420ee6 XOR %EDX,%EDX |
(112) 0x420ee8 MOV 0x20(%RSP),%RDI |
(112) 0x420eed DIV %RDI |
(112) 0x420ef0 MOV %RAX,%RCX |
(112) 0x420ef3 MOV 0x18(%RSP),%R10 |
(112) 0x420ef8 TEST %R8,%R8 |
(112) 0x420efb MOV 0xb0(%RSP),%R9 |
(112) 0x420f03 JE 420f2f |
(112) 0x420f05 MOV %RSI,%RAX |
(112) 0x420f08 CQTO |
(112) 0x420f0a IDIV %RDI |
(112) 0x420f0d JMP 420f35 |
0x420f0f NOP |
(112) 0x420f10 MOV %ESI,%EAX |
(112) 0x420f12 XOR %EDX,%EDX |
(112) 0x420f14 MOV 0x20(%RSP),%RDI |
(112) 0x420f19 DIV %EDI |
(112) 0x420f1b MOV %EAX,%ECX |
(112) 0x420f1d MOV 0x18(%RSP),%R10 |
(112) 0x420f22 TEST %R8,%R8 |
(112) 0x420f25 MOV 0xb0(%RSP),%R9 |
(112) 0x420f2d JNE 420f05 |
(112) 0x420f2f MOV %ESI,%EAX |
(112) 0x420f31 XOR %EDX,%EDX |
(112) 0x420f33 DIV %EDI |
(112) 0x420f35 ADD 0x78(%RSP),%RCX |
(112) 0x420f3a LEA (%RDX,%R10,1),%R8D |
(112) 0x420f3e MOVSXD %R8D,%RAX |
(112) 0x420f41 MOVSXD %ECX,%RCX |
(112) 0x420f44 IMUL %RCX,%R9 |
(112) 0x420f48 ADD %RAX,%R9 |
(112) 0x420f4b VMOVSD (%R11,%R9,8),%XMM7 |
(112) 0x420f51 VUCOMISD %XMM7,%XMM0 |
(112) 0x420f55 LEA -0x1(%RDX,%R10,1),%EDX |
(112) 0x420f5a JAE 420f70 |
(112) 0x420f5c ADD $-0x2,%R8D |
(112) 0x420f60 MOVSXD %EDX,%R11 |
(112) 0x420f63 MOVSXD %R8D,%R8 |
(112) 0x420f66 MOV %RAX,%RDX |
(112) 0x420f69 MOV %R11,%R10 |
(112) 0x420f6c JMP 420f8b |
0x420f6e XCHG %AX,%AX |
(112) 0x420f70 MOVSXD %EDX,%RDX |
(112) 0x420f73 INC %R8D |
(112) 0x420f76 MOV 0x38(%RSP),%RDI |
(112) 0x420f7b CMP %R8D,%EDI |
(112) 0x420f7e CMOVL %EDI,%R8D |
(112) 0x420f82 MOVSXD %R8D,%R8 |
(112) 0x420f85 MOV %R8,%R11 |
(112) 0x420f88 MOV %RAX,%R10 |
(112) 0x420f8b VANDPD %XMM1,%XMM7,%XMM8 |
(112) 0x420f8f MOV 0x68(%RSP),%R9 |
(112) 0x420f94 IMUL %RCX,%R9 |
(112) 0x420f98 ADD %R10,%R9 |
(112) 0x420f9b MOV 0x40(%RSP),%RDI |
(112) 0x420fa0 VDIVSD (%RDI,%R9,8),%XMM8,%XMM12 |
(112) 0x420fa6 MOV 0x80(%RSP),%RDI |
(112) 0x420fae VMOVSD (%RDI,%RAX,8),%XMM8 |
(112) 0x420fb3 VFMADD213SD %XMM8,%XMM12,%XMM8 |
(112) 0x420fb8 VDIVSD (%RDI,%R11,8),%XMM8,%XMM8 |
(112) 0x420fbe IMUL %RCX,%RBX |
(112) 0x420fc2 LEA (%RBX,%R10,1),%R11 |
(112) 0x420fc6 VMOVSD (%R12,%R11,8),%XMM11 |
(112) 0x420fcc LEA (%RBX,%R8,1),%RDI |
(112) 0x420fd0 VSUBSD (%R12,%RDI,8),%XMM11,%XMM13 |
(112) 0x420fd6 ADD %RDX,%RBX |
(112) 0x420fd9 VMOVSD (%R12,%RBX,8),%XMM10 |
(112) 0x420fdf VSUBSD %XMM12,%XMM2,%XMM9 |
(112) 0x420fe4 VSUBSD %XMM11,%XMM10,%XMM14 |
(112) 0x420fe9 VMULSD %XMM13,%XMM14,%XMM15 |
(112) 0x420fee VXORPD %XMM10,%XMM10,%XMM10 |
(112) 0x420ff3 VUCOMISD %XMM10,%XMM15 |
(112) 0x420ff8 VXORPD %XMM15,%XMM15,%XMM15 |
(112) 0x420ffd JBE 42103a |
(112) 0x420fff VSUBSD %XMM12,%XMM3,%XMM12 |
(112) 0x421004 VXORPD %XMM4,%XMM12,%XMM15 |
(112) 0x421008 VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(112) 0x42100f VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(112) 0x421015 VANDPD %XMM5,%XMM13,%XMM12 |
(112) 0x421019 VANDPD %XMM5,%XMM14,%XMM13 |
(112) 0x42101d VMINSD %XMM13,%XMM12,%XMM14 |
(112) 0x421022 VMULSD %XMM8,%XMM12,%XMM12 |
(112) 0x421027 VFMADD231SD %XMM13,%XMM9,%XMM12 |
(112) 0x42102c VMULSD %XMM6,%XMM12,%XMM12 |
(112) 0x421030 VMINSD %XMM12,%XMM14,%XMM12 |
(112) 0x421035 VMULSD %XMM15,%XMM12,%XMM15 |
(112) 0x42103a VADDSD %XMM11,%XMM15,%XMM11 |
(112) 0x42103f VMULSD %XMM7,%XMM11,%XMM7 |
(112) 0x421043 MOV %R15,%RDI |
(112) 0x421046 IMUL %RCX,%RDI |
(112) 0x42104a ADD %RAX,%RDI |
(112) 0x42104d MOV 0x28(%RSP),%RBX |
(112) 0x421052 VMOVSD %XMM7,(%RBX,%RDI,8) |
(112) 0x421057 MOV 0xa8(%RSP),%RDI |
(112) 0x42105f IMUL %RCX,%RDI |
(112) 0x421063 ADD %RDI,%R10 |
(112) 0x421066 VMOVSD (%R14,%R10,8),%XMM11 |
(112) 0x42106c ADD %RDI,%R8 |
(112) 0x42106f VSUBSD (%R14,%R8,8),%XMM11,%XMM12 |
(112) 0x421075 ADD %RDX,%RDI |
(112) 0x421078 VMOVSD (%R14,%RDI,8),%XMM13 |
(112) 0x42107e VSUBSD %XMM11,%XMM13,%XMM13 |
(112) 0x421083 VMULSD %XMM12,%XMM13,%XMM14 |
(112) 0x421088 VUCOMISD %XMM10,%XMM14 |
(112) 0x42108d JBE 420ea0 |
(112) 0x421093 VANDPD %XMM5,%XMM7,%XMM10 |
(112) 0x421097 MOV 0x40(%RSP),%RDX |
(112) 0x42109c VMOVSD (%RDX,%R9,8),%XMM14 |
(112) 0x4210a2 VMULSD (%R12,%R11,8),%XMM14,%XMM14 |
(112) 0x4210a8 VDIVSD %XMM14,%XMM10,%XMM10 |
(112) 0x4210ad VSUBSD %XMM10,%XMM3,%XMM10 |
(112) 0x4210b2 VXORPD %XMM4,%XMM10,%XMM14 |
(112) 0x4210b6 VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(112) 0x4210bd VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(112) 0x4210c3 VANDPD %XMM5,%XMM12,%XMM10 |
(112) 0x4210c7 VANDPD %XMM5,%XMM13,%XMM12 |
(112) 0x4210cb VMINSD %XMM12,%XMM10,%XMM13 |
(112) 0x4210d0 VMULSD %XMM8,%XMM10,%XMM8 |
(112) 0x4210d5 VFMADD213SD %XMM8,%XMM12,%XMM9 |
(112) 0x4210da VMULSD %XMM6,%XMM9,%XMM8 |
(112) 0x4210de VMINSD %XMM8,%XMM13,%XMM8 |
(112) 0x4210e3 VMULSD %XMM8,%XMM14,%XMM10 |
(112) 0x4210e8 JMP 420ea0 |
0x4210ed NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 779 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 42 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 20.00 | 20.00 | 23.00 | 11.00 | 5.80 | 23.00 | 23.00 | 23.00 | 5.60 | 20.00 |
cycles | 5.80 | 5.80 | 20.00 | 20.00 | 23.00 | 11.00 | 5.80 | 23.00 | 23.00 | 23.00 | 5.60 | 20.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.64 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 23.00 |
Overall L1 | 26.83 |
all | 22% |
load | 25% |
store | 27% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 20% |
load | 14% |
store | 27% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 19% |
load | 21% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 17% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 420e4e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x94e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x54(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x98(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x90(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6825d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x90(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 420e2f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x92f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB 0x18(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 420e5d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x95d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R13,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45b18(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x45990(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R10,0x98(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 420e67 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x967> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6825f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 420e6a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x96a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x44872(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4542a(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4484a(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x45422(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x44852(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4541a(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 420ed7 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x9d7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 779 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 42 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 20.00 | 20.00 | 23.00 | 11.00 | 5.80 | 23.00 | 23.00 | 23.00 | 5.60 | 20.00 |
cycles | 5.80 | 5.80 | 20.00 | 20.00 | 23.00 | 11.00 | 5.80 | 23.00 | 23.00 | 23.00 | 5.60 | 20.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.64 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 23.00 |
Overall L1 | 26.83 |
all | 22% |
load | 25% |
store | 27% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 20% |
load | 14% |
store | 27% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 19% |
load | 21% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 17% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 420e4e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x94e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x54(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x98(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x90(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6825d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x90(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 420e2f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x92f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB 0x18(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 420e5d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x95d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R13,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45b18(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x45990(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R10,0x98(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 420e67 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x967> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6825f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 420e6a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x96a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R10,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x44872(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4542a(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4484a(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x45422(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x44852(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4541a(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 420ed7 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x9d7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 1.36 | 1.86 |
○Loop 113 - advec_cell.cpp:65-110 - exec | 1.36 | 1.85 |
○Loop 112 - advec_cell.cpp:66-110 - exec | 0 | 0 |