Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 2.43% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 2.43% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
115: upwind = i + 2; |
116: donor = i + 1; |
117: downwind = i; |
118: dif = donor; |
119: } else { |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
0x423e80 PUSH %RBP |
0x423e81 MOV %RSP,%RBP |
0x423e84 PUSH %R15 |
0x423e86 PUSH %R14 |
0x423e88 PUSH %R13 |
0x423e8a PUSH %R12 |
0x423e8c PUSH %RBX |
0x423e8d AND $-0x20,%RSP |
0x423e91 SUB $0xc0,%RSP |
0x423e98 MOV %RCX,%R13 |
0x423e9b MOV 0x38(%RBP),%RAX |
0x423e9f MOV 0x28(%RBP),%RBX |
0x423ea3 MOV 0x20(%RBP),%R14 |
0x423ea7 MOV 0x18(%RBP),%RCX |
0x423eab MOV %RCX,0x30(%RSP) |
0x423eb0 MOV 0x10(%RBP),%RCX |
0x423eb4 MOV %RCX,0x38(%RSP) |
0x423eb9 MOVL $0,0x14(%RSP) |
0x423ec1 TEST %RAX,%RAX |
0x423ec4 JS 424445 |
0x423eca MOV %R8,%R12 |
0x423ecd MOV %RDX,%R15 |
0x423ed0 MOV %R9,0x8(%RSP) |
0x423ed5 MOV (%RDI),%ESI |
0x423ed7 MOVQ $0,0x48(%RSP) |
0x423ee0 MOV %RAX,0x40(%RSP) |
0x423ee5 MOVQ $0x1,0x78(%RSP) |
0x423eee SUB $0x8,%RSP |
0x423ef2 LEA 0x80(%RSP),%RAX |
0x423efa LEA 0x1c(%RSP),%RCX |
0x423eff LEA 0x50(%RSP),%R8 |
0x423f04 LEA 0x48(%RSP),%R9 |
0x423f09 MOV $0x6828d0,%EDI |
0x423f0e MOV %ESI,0x18(%RSP) |
0x423f12 MOV $0x22,%EDX |
0x423f17 PUSH $0x1 |
0x423f19 PUSH $0x1 |
0x423f1b PUSH %RAX |
0x423f1c CALL 403020 <__kmpc_for_static_init_8@plt> |
0x423f21 ADD $0x20,%RSP |
0x423f25 MOV 0x48(%RSP),%RSI |
0x423f2a MOV 0x40(%RSP),%RAX |
0x423f2f MOV %RAX,0x28(%RSP) |
0x423f34 CMP %RAX,%RSI |
0x423f37 JA 424465 |
0x423f3d MOV %RBX,%R11 |
0x423f40 SUB %R14D,%R11D |
0x423f43 MOV (%R13),%RDX |
0x423f47 MOV 0x10(%R13),%R13 |
0x423f4b MOV (%R12),%R8 |
0x423f4f MOV 0x10(%R12),%R12 |
0x423f54 MOV (%R15),%R9 |
0x423f57 MOV 0x10(%R15),%R15 |
0x423f5b MOV 0x8(%RSP),%RAX |
0x423f60 MOV (%RAX),%R10 |
0x423f63 MOV 0x10(%RAX),%RDI |
0x423f67 LEA 0x1(%RSI),%RAX |
0x423f6b MOV 0x28(%RSP),%RCX |
0x423f70 INC %RCX |
0x423f73 CMP %RCX,%RAX |
0x423f76 CMOVG %RAX,%RCX |
0x423f7a SUB %RSI,%RCX |
0x423f7d MOV $-0x8,%EBX |
0x423f82 AND %RCX,%RBX |
0x423f85 MOV %RDX,0x8(%RSP) |
0x423f8a MOV %R8,0x70(%RSP) |
0x423f8f MOV %R10,0x60(%RSP) |
0x423f94 MOV %R11,0x20(%RSP) |
0x423f99 MOV %R9,0x68(%RSP) |
0x423f9e JE 424487 |
0x423fa4 MOV %R13,0x18(%RSP) |
0x423fa9 MOV %RCX,0x50(%RSP) |
0x423fae VPBROADCASTQ %R11,%YMM8 |
0x423fb4 MOV 0x30(%RSP),%RAX |
0x423fb9 VPBROADCASTQ %RAX,%YMM24 |
0x423fbf MOV %R14,0x58(%RSP) |
0x423fc4 VPBROADCASTD %R14D,%YMM0 |
0x423fca VMOVDQU %YMM0,0x80(%RSP) |
0x423fd3 VPBROADCASTQ %RDX,%YMM14 |
0x423fd9 VPBROADCASTQ %R8,%YMM15 |
0x423fdf VPBROADCASTQ %R9,%YMM16 |
0x423fe5 VPBROADCASTQ %R10,%YMM17 |
0x423feb VPBROADCASTQ %RSI,%YMM0 |
0x423ff1 VPADDQ 0x42287(%RIP),%YMM0,%YMM9 |
0x423ff9 VPADDQ 0x420ff(%RIP),%YMM0,%YMM10 |
0x424001 XOR %R14D,%R14D |
0x424004 VXORPD %XMM18,%XMM18,%XMM18 |
0x42400a VBROADCASTSD 0x416d4(%RIP),%YMM21 |
0x424014 VBROADCASTSD 0x42292(%RIP),%YMM25 |
0x42401e VPBROADCASTQ 0x42278(%RIP),%YMM26 |
0x424028 JMP 4241d8 |
0x42402d NOPL (%RAX) |
(129) 0x424030 VMOVQ %XMM31,%RAX |
(129) 0x424036 KMOVQ %K2,%K3 |
(129) 0x42403b VXORPD %XMM13,%XMM13,%XMM13 |
(129) 0x424040 VGATHERQPD (%RAX,%YMM4,8),%YMM13{%K3} |
(129) 0x424047 VEXTRACTI32X4 $0x1,%YMM22,%XMM23 |
(129) 0x42404e VPMOVSXDQ %XMM23,%YMM23 |
(129) 0x424054 KMOVQ %K1,%K3 |
(129) 0x424059 VPXORD %XMM31,%XMM31,%XMM31 |
(129) 0x42405f VGATHERQPD (%RAX,%YMM23,8),%YMM31{%K3} |
(129) 0x424066 VANDPD %YMM21,%YMM5,%YMM23 |
(129) 0x42406c VDIVPD %YMM29,%YMM23,%YMM23 |
(129) 0x424072 VANDPD %YMM21,%YMM19,%YMM19 |
(129) 0x424078 VPMOVSXDQ %XMM22,%YMM22 |
(129) 0x42407e KMOVQ %K2,%K3 |
(129) 0x424083 VXORPD %XMM29,%XMM29,%XMM29 |
(129) 0x424089 VGATHERQPD (%RAX,%YMM22,8),%YMM29{%K3} |
(129) 0x424090 VANDPD %YMM21,%YMM20,%YMM22 |
(129) 0x424096 VCMPPD $0x1,%YMM20,%YMM18,%K3 |
(129) 0x42409d VBROADCASTSD 0x42202(%RIP),%YMM12 |
(129) 0x4240a6 VSUBPD %YMM23,%YMM12,%YMM20 |
(129) 0x4240ac VMULPD %YMM20,%YMM22,%YMM20 |
(129) 0x4240b2 VMINPD %YMM22,%YMM19,%YMM22 |
(129) 0x4240b8 VFMADD213PD %YMM19,%YMM23,%YMM19 |
(129) 0x4240be VDIVPD %YMM29,%YMM19,%YMM19 |
(129) 0x4240c4 VDIVPD %YMM13,%YMM20,%YMM20 |
(129) 0x4240ca VADDPD %YMM20,%YMM19,%YMM19 |
(129) 0x4240d0 VBROADCASTSD 0x421de(%RIP),%YMM29 |
(129) 0x4240da VMULPD %YMM29,%YMM13,%YMM13 |
(129) 0x4240e0 VMULPD %YMM19,%YMM13,%YMM13 |
(129) 0x4240e6 VMINPD %YMM22,%YMM13,%YMM13 |
(129) 0x4240ec VXORPD %YMM25,%YMM13,%YMM19 |
(129) 0x4240f2 VMOVAPD %YMM13,%YMM19{%K3} |
(129) 0x4240f8 VMOVAPD %YMM19,%YMM13{%K2}{z} |
(129) 0x4240fe VBROADCASTSD 0x415c8(%RIP),%YMM20 |
(129) 0x424108 VSUBPD %YMM23,%YMM20,%YMM19 |
(129) 0x42410e VFMADD213PD %YMM11,%YMM13,%YMM19 |
(129) 0x424114 VMULPD %YMM5,%YMM19,%YMM5 |
(129) 0x42411a KMOVQ %K1,%K2 |
(129) 0x42411f VPMULLQ %YMM3,%YMM17,%YMM3 |
(129) 0x424125 VPADDQ %YMM4,%YMM3,%YMM3 |
(129) 0x424129 VXORPD %XMM4,%XMM4,%XMM4 |
(129) 0x42412d VGATHERQPD (%RAX,%YMM1,8),%YMM4{%K2} |
(129) 0x424134 KXNORW %K0,%K0,%K2 |
(129) 0x424138 VSCATTERQPD %YMM5,(%RDI,%YMM3,8){%K2} |
(129) 0x42413f VANDPD %YMM21,%YMM2,%YMM3 |
(129) 0x424145 VDIVPD %YMM7,%YMM3,%YMM3 |
(129) 0x424149 VANDPD %YMM21,%YMM27,%YMM5 |
(129) 0x42414f VANDPD %YMM21,%YMM28,%YMM7 |
(129) 0x424155 VCMPPD $0x1,%YMM28,%YMM18,%K2 |
(129) 0x42415c VSUBPD %YMM3,%YMM12,%YMM11 |
(129) 0x424160 VMULPD %YMM7,%YMM11,%YMM11 |
(129) 0x424164 VMINPD %YMM7,%YMM5,%YMM7 |
(129) 0x424168 VFMADD213PD %YMM5,%YMM3,%YMM5 |
(129) 0x42416d VDIVPD %YMM31,%YMM5,%YMM5 |
(129) 0x424173 VDIVPD %YMM4,%YMM11,%YMM11 |
(129) 0x424177 VADDPD %YMM5,%YMM11,%YMM5 |
(129) 0x42417b VMULPD %YMM29,%YMM4,%YMM4 |
(129) 0x424181 VMULPD %YMM5,%YMM4,%YMM4 |
(129) 0x424185 VMINPD %YMM7,%YMM4,%YMM4 |
(129) 0x424189 VXORPD %YMM25,%YMM4,%YMM5 |
(129) 0x42418f VMOVAPD %YMM4,%YMM5{%K2} |
(129) 0x424195 VMOVAPD %YMM5,%YMM4{%K1}{z} |
(129) 0x42419b VSUBPD %YMM3,%YMM20,%YMM3 |
(129) 0x4241a1 VFMADD213PD %YMM6,%YMM4,%YMM3 |
(129) 0x4241a6 VMULPD %YMM2,%YMM3,%YMM2 |
(129) 0x4241aa VPMULLQ %YMM0,%YMM17,%YMM0 |
(129) 0x4241b0 VPADDQ %YMM1,%YMM0,%YMM0 |
(129) 0x4241b4 KXNORW %K0,%K0,%K1 |
(129) 0x4241b8 VSCATTERQPD %YMM2,(%RDI,%YMM0,8){%K1} |
(129) 0x4241bf VPADDQ %YMM26,%YMM10,%YMM10 |
(129) 0x4241c5 VPADDQ %YMM26,%YMM9,%YMM9 |
(129) 0x4241cb ADD $0x8,%R14 |
(129) 0x4241cf CMP %RBX,%R14 |
(129) 0x4241d2 JAE 424454 |
(129) 0x4241d8 VMOVDQA %YMM10,%YMM0 |
(129) 0x4241dc VMOVDQA %YMM8,%YMM1 |
(129) 0x4241e0 MOV $0x454690,%R13 |
(129) 0x4241e7 CALL %R13 |
(129) 0x4241ea VMOVDQA %YMM0,%YMM11 |
(129) 0x4241ee VMOVDQA %YMM9,%YMM0 |
(129) 0x4241f2 VMOVDQA %YMM8,%YMM1 |
(129) 0x4241f6 CALL %R13 |
(129) 0x4241f9 VPADDQ %YMM24,%YMM0,%YMM19 |
(129) 0x4241ff VPADDQ %YMM24,%YMM11,%YMM20 |
(129) 0x424205 VMOVDQA %YMM10,%YMM0 |
(129) 0x424209 VMOVDQA %YMM8,%YMM1 |
(129) 0x42420d MOV $0x454460,%R13 |
(129) 0x424214 CALL %R13 |
(129) 0x424217 VMOVDQA %YMM0,%YMM11 |
(129) 0x42421b VMOVDQA %YMM9,%YMM0 |
(129) 0x42421f VMOVDQA %YMM8,%YMM1 |
(129) 0x424223 CALL %R13 |
(129) 0x424226 VPMOVQD %YMM11,%XMM1 |
(129) 0x42422c VPMOVQD %YMM0,%XMM0 |
(129) 0x424232 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(129) 0x424238 VPADDD 0x80(%RSP),%YMM0,%YMM11 |
(129) 0x424241 VPSLLQ $0x20,%YMM20,%YMM0 |
(129) 0x424248 VPSRAQ $0x20,%YMM0,%YMM3 |
(129) 0x42424f VPSLLQ $0x20,%YMM19,%YMM0 |
(129) 0x424256 VPSRAQ $0x20,%YMM0,%YMM0 |
(129) 0x42425d VXORPS %XMM2,%XMM2,%XMM2 |
(129) 0x424261 VPMULLQ %YMM0,%YMM14,%YMM2 |
(129) 0x424267 VXORPS %XMM5,%XMM5,%XMM5 |
(129) 0x42426b VPMULLQ %YMM3,%YMM14,%YMM5 |
(129) 0x424271 VEXTRACTI128 $0x1,%YMM11,%XMM1 |
(129) 0x424277 VPMOVSXDQ %XMM1,%YMM1 |
(129) 0x42427c VPADDQ %YMM1,%YMM2,%YMM4 |
(129) 0x424280 KXNORW %K0,%K0,%K1 |
(129) 0x424284 VPXOR %XMM2,%XMM2,%XMM2 |
(129) 0x424288 MOV 0x18(%RSP),%RAX |
(129) 0x42428d VGATHERQPD (%RAX,%YMM4,8),%YMM2{%K1} |
(129) 0x424294 VPMOVSXDQ %XMM11,%YMM4 |
(129) 0x424299 VPADDQ %YMM4,%YMM5,%YMM6 |
(129) 0x42429d KXNORW %K0,%K0,%K1 |
(129) 0x4242a1 VPXOR %XMM5,%XMM5,%XMM5 |
(129) 0x4242a5 VGATHERQPD (%RAX,%YMM6,8),%YMM5{%K1} |
(129) 0x4242ac VCMPPD $0x1,%YMM18,%YMM5,%K1 |
(129) 0x4242b3 VCMPPD $0x1,%YMM18,%YMM2,%K2 |
(129) 0x4242ba VPCMPEQD %YMM12,%YMM12,%YMM12 |
(129) 0x4242bf VPSUBD %YMM12,%YMM11,%YMM27 |
(129) 0x4242c5 VPMOVSXDQ %XMM27,%YMM19 |
(129) 0x4242cb VXORPS %XMM6,%XMM6,%XMM6 |
(129) 0x4242cf VPMULLQ %YMM3,%YMM15,%YMM6 |
(129) 0x4242d5 VPMULLQ %YMM0,%YMM15,%YMM7 |
(129) 0x4242db VEXTRACTI32X4 $0x1,%YMM27,%XMM20 |
(129) 0x4242e2 VPBLENDMQ %YMM19,%YMM4,%YMM28{%K1} |
(129) 0x4242e8 VPADDQ %YMM28,%YMM6,%YMM6 |
(129) 0x4242ee KXNORW %K0,%K0,%K3 |
(129) 0x4242f2 VXORPD %XMM29,%XMM29,%XMM29 |
(129) 0x4242f8 VGATHERQPD (%R12,%YMM6,8),%YMM29{%K3} |
(129) 0x4242ff VPMOVSXDQ %XMM20,%YMM20 |
(129) 0x424305 VPBLENDMQ %YMM20,%YMM1,%YMM6{%K2} |
(129) 0x42430b VPADDQ %YMM6,%YMM7,%YMM30 |
(129) 0x424311 KXNORW %K0,%K0,%K3 |
(129) 0x424315 VPXOR %XMM7,%XMM7,%XMM7 |
(129) 0x424319 VGATHERQPD (%R12,%YMM30,8),%YMM7{%K3} |
(129) 0x424320 VPMULLQ %YMM0,%YMM16,%YMM31 |
(129) 0x424326 VPMULLQ %YMM3,%YMM16,%YMM13 |
(129) 0x42432c KSHIFTLB $0x4,%K2,%K0 |
(129) 0x424332 KORB %K0,%K1,%K3 |
(129) 0x424336 VPADDQ %YMM6,%YMM31,%YMM30 |
(129) 0x42433c KXNORW %K0,%K0,%K4 |
(129) 0x424340 VPXOR %XMM6,%XMM6,%XMM6 |
(129) 0x424344 VGATHERQPD (%R15,%YMM30,8),%YMM6{%K4} |
(129) 0x42434b VPADDD %YMM12,%YMM11,%YMM22 |
(129) 0x424351 VMOVDQA64 %YMM22,%YMM30 |
(129) 0x424357 VPADDD 0x43e4f(%RIP){1to8},%YMM11,%YMM30{%K3} |
(129) 0x424361 VPADDQ %YMM28,%YMM13,%YMM28 |
(129) 0x424367 KXNORW %K0,%K0,%K4 |
(129) 0x42436b VPXOR %XMM11,%XMM11,%XMM11 |
(129) 0x424370 VGATHERQPD (%R15,%YMM28,8),%YMM11{%K4} |
(129) 0x424377 VEXTRACTI32X4 $0x1,%YMM30,%XMM28 |
(129) 0x42437e VPMOVSXDQ %XMM28,%YMM28 |
(129) 0x424384 VPADDQ %YMM28,%YMM31,%YMM28 |
(129) 0x42438a KXNORW %K0,%K0,%K4 |
(129) 0x42438e VXORPD %XMM23,%XMM23,%XMM23 |
(129) 0x424394 VGATHERQPD (%R15,%YMM28,8),%YMM23{%K4} |
(129) 0x42439b VPMOVSXDQ %XMM30,%YMM28 |
(129) 0x4243a1 VPADDQ %YMM28,%YMM13,%YMM28 |
(129) 0x4243a7 KXNORW %K0,%K0,%K4 |
(129) 0x4243ab VPXORD %XMM30,%XMM30,%XMM30 |
(129) 0x4243b1 VGATHERQPD (%R15,%YMM28,8),%YMM30{%K4} |
(129) 0x4243b8 VPBLENDMQ %YMM1,%YMM20,%YMM20{%K2} |
(129) 0x4243be VPADDQ %YMM20,%YMM31,%YMM20 |
(129) 0x4243c4 KXNORW %K0,%K0,%K2 |
(129) 0x4243c8 VXORPD %XMM28,%XMM28,%XMM28 |
(129) 0x4243ce VGATHERQPD (%R15,%YMM20,8),%YMM28{%K2} |
(129) 0x4243d5 VPBLENDMQ %YMM4,%YMM19,%YMM19{%K1} |
(129) 0x4243db VPADDQ %YMM19,%YMM13,%YMM13 |
(129) 0x4243e1 KXNORW %K0,%K0,%K1 |
(129) 0x4243e5 VXORPD %XMM20,%XMM20,%XMM20 |
(129) 0x4243eb VGATHERQPD (%R15,%YMM13,8),%YMM20{%K1} |
(129) 0x4243f2 VMOVDQA32 %YMM27,%YMM22{%K3} |
(129) 0x4243f8 VSUBPD %YMM30,%YMM11,%YMM19 |
(129) 0x4243fe VSUBPD %YMM23,%YMM6,%YMM27 |
(129) 0x424404 VSUBPD %YMM11,%YMM20,%YMM20 |
(129) 0x42440a VSUBPD %YMM6,%YMM28,%YMM28 |
(129) 0x424410 VMULPD %YMM27,%YMM28,%YMM13 |
(129) 0x424416 VMULPD %YMM19,%YMM20,%YMM23 |
(129) 0x42441c VCMPPD $0x1,%YMM23,%YMM18,%K2 |
(129) 0x424423 VCMPPD $0x1,%YMM13,%YMM18,%K1 |
(129) 0x42442a KORTESTB %K1,%K2 |
(129) 0x42442e JE 424030 |
(129) 0x424434 MOV 0x38(%RSP),%RAX |
(129) 0x424439 VMOVQ 0x8(%RAX),%XMM31 |
(129) 0x424440 JMP 424030 |
0x424445 LEA -0x28(%RBP),%RSP |
0x424449 POP %RBX |
0x42444a POP %R12 |
0x42444c POP %R13 |
0x42444e POP %R14 |
0x424450 POP %R15 |
0x424452 POP %RBP |
0x424453 RET |
0x424454 CMP %RBX,0x50(%RSP) |
0x424459 MOV 0x58(%RSP),%R14 |
0x42445e MOV 0x18(%RSP),%R13 |
0x424463 JNE 424484 |
0x424465 MOV $0x6828f0,%EDI |
0x42446a MOV 0x10(%RSP),%ESI |
0x42446e LEA -0x28(%RBP),%RSP |
0x424472 POP %RBX |
0x424473 POP %R12 |
0x424475 POP %R13 |
0x424477 POP %R14 |
0x424479 POP %R15 |
0x42447b POP %RBP |
0x42447c VZEROUPPER |
0x42447f JMP 402e90 |
0x424484 ADD %RBX,%RSI |
0x424487 VPXOR %XMM0,%XMM0,%XMM0 |
0x42448b VMOVDDUP 0x41255(%RIP),%XMM1 |
0x424493 VMOVSD 0x41e0d(%RIP),%XMM2 |
0x42449b VMOVSD 0x4122d(%RIP),%XMM3 |
0x4244a3 VMOVSD 0x41e0d(%RIP),%XMM4 |
0x4244ab VMOVDDUP 0x41dfd(%RIP),%XMM5 |
0x4244b3 JMP 4244e5 |
0x4244b5 NOPW %CS:(%RAX,%RAX,1) |
(128) 0x4244c0 VSUBSD %XMM7,%XMM3,%XMM7 |
(128) 0x4244c4 VFMADD213SD %XMM8,%XMM10,%XMM7 |
(128) 0x4244c9 VMULSD %XMM6,%XMM7,%XMM6 |
(128) 0x4244cd IMUL 0x60(%RSP),%RCX |
(128) 0x4244d3 ADD %RAX,%RCX |
(128) 0x4244d6 VMOVSD %XMM6,(%RDI,%RCX,8) |
(128) 0x4244db INC %RSI |
(128) 0x4244de CMP 0x28(%RSP),%RSI |
(128) 0x4244e3 JG 424465 |
(128) 0x4244e5 MOV %RSI,%R8 |
(128) 0x4244e8 SHR $0x20,%R8 |
(128) 0x4244ec JE 424520 |
(128) 0x4244ee MOV %RSI,%RAX |
(128) 0x4244f1 XOR %EDX,%EDX |
(128) 0x4244f3 MOV 0x20(%RSP),%R10 |
(128) 0x4244f8 DIV %R10 |
(128) 0x4244fb MOV %RAX,%RCX |
(128) 0x4244fe TEST %R8,%R8 |
(128) 0x424501 MOV 0x8(%RSP),%R9 |
(128) 0x424506 MOV 0x70(%RSP),%R11 |
(128) 0x42450b JE 42453d |
(128) 0x42450d MOV %RSI,%RAX |
(128) 0x424510 CQTO |
(128) 0x424512 IDIV %R10 |
(128) 0x424515 JMP 424544 |
0x424517 NOPW (%RAX,%RAX,1) |
(128) 0x424520 MOV %ESI,%EAX |
(128) 0x424522 XOR %EDX,%EDX |
(128) 0x424524 MOV 0x20(%RSP),%R10 |
(128) 0x424529 DIV %R10D |
(128) 0x42452c MOV %EAX,%ECX |
(128) 0x42452e TEST %R8,%R8 |
(128) 0x424531 MOV 0x8(%RSP),%R9 |
(128) 0x424536 MOV 0x70(%RSP),%R11 |
(128) 0x42453b JNE 42450d |
(128) 0x42453d MOV %ESI,%EAX |
(128) 0x42453f XOR %EDX,%EDX |
(128) 0x424541 DIV %R10D |
(128) 0x424544 ADD 0x30(%RSP),%RCX |
(128) 0x424549 LEA (%RDX,%R14,1),%R8D |
(128) 0x42454d MOVSXD %R8D,%RAX |
(128) 0x424550 MOVSXD %ECX,%RCX |
(128) 0x424553 IMUL %RCX,%R9 |
(128) 0x424557 ADD %RAX,%R9 |
(128) 0x42455a VMOVSD (%R13,%R9,8),%XMM6 |
(128) 0x424561 VUCOMISD %XMM6,%XMM0 |
(128) 0x424565 LEA 0x1(%RDX,%R14,1),%EDX |
(128) 0x42456a MOVSXD %EDX,%R9 |
(128) 0x42456d JBE 424580 |
(128) 0x42456f ADD $0x2,%R8D |
(128) 0x424573 MOV %RAX,%R10 |
(128) 0x424576 JMP 42458c |
0x424578 NOPL (%RAX,%RAX,1) |
(128) 0x424580 DEC %R8D |
(128) 0x424583 MOV %R9,%R10 |
(128) 0x424586 MOV %RAX,%R9 |
(128) 0x424589 MOV %R8D,%EDX |
(128) 0x42458c VANDPD %XMM1,%XMM6,%XMM7 |
(128) 0x424590 IMUL %RCX,%R11 |
(128) 0x424594 ADD %R9,%R11 |
(128) 0x424597 VDIVSD (%R12,%R11,8),%XMM7,%XMM7 |
(128) 0x42459d MOV 0x68(%RSP),%R11 |
(128) 0x4245a2 IMUL %RCX,%R11 |
(128) 0x4245a6 ADD %R11,%R9 |
(128) 0x4245a9 VMOVSD (%R15,%R9,8),%XMM8 |
(128) 0x4245af MOVSXD %R8D,%R8 |
(128) 0x4245b2 ADD %R11,%R8 |
(128) 0x4245b5 ADD %R10,%R11 |
(128) 0x4245b8 VMOVHPD (%R15,%R11,8),%XMM8,%XMM9 |
(128) 0x4245be VMOVSD (%R15,%R8,8),%XMM10 |
(128) 0x4245c4 VPUNPCKLQDQ %XMM8,%XMM10,%XMM10 |
(128) 0x4245c9 VSUBPD %XMM10,%XMM9,%XMM11 |
(128) 0x4245ce VSHUFPD $0x1,%XMM11,%XMM11,%XMM9 |
(128) 0x4245d4 VMULSD %XMM11,%XMM9,%XMM12 |
(128) 0x4245d9 VXORPD %XMM10,%XMM10,%XMM10 |
(128) 0x4245de VUCOMISD %XMM10,%XMM12 |
(128) 0x4245e3 JBE 4244c0 |
(128) 0x4245e9 MOV 0x38(%RSP),%R8 |
(128) 0x4245ee MOV 0x8(%R8),%R8 |
(128) 0x4245f2 VMOVSD (%R8,%RAX,8),%XMM10 |
(128) 0x4245f8 VANDPD %XMM1,%XMM11,%XMM11 |
(128) 0x4245fc VSUBSD %XMM7,%XMM2,%XMM12 |
(128) 0x424600 VADDSD %XMM3,%XMM7,%XMM13 |
(128) 0x424604 VPUNPCKLQDQ %XMM12,%XMM13,%XMM12 |
(128) 0x424609 MOVSXD %EDX,%RDX |
(128) 0x42460c VMULPD %XMM12,%XMM11,%XMM12 |
(128) 0x424611 VMOVSD (%R8,%RDX,8),%XMM13 |
(128) 0x424617 VPUNPCKLQDQ %XMM10,%XMM13,%XMM13 |
(128) 0x42461c VDIVPD %XMM13,%XMM12,%XMM12 |
(128) 0x424621 VSHUFPD $0x1,%XMM12,%XMM12,%XMM13 |
(128) 0x424627 VADDSD %XMM13,%XMM12,%XMM12 |
(128) 0x42462c VMULSD %XMM4,%XMM10,%XMM10 |
(128) 0x424630 VMULSD %XMM12,%XMM10,%XMM10 |
(128) 0x424635 VSHUFPS $0x4e,%XMM11,%XMM11,%XMM12 |
(128) 0x42463b VMINSD %XMM12,%XMM11,%XMM11 |
(128) 0x424640 VMINSD %XMM11,%XMM10,%XMM11 |
(128) 0x424645 VXORPD %XMM5,%XMM11,%XMM10 |
(128) 0x424649 VCMPSD $0x1,%XMM9,%XMM0,%K1 |
(128) 0x424650 VMOVSD %XMM11,%XMM10,%XMM10{%K1} |
(128) 0x424656 JMP 4244c0 |
0x42465b NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 127 |
nb uops | 129 |
loop length | 577 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 12 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.50 cycles |
front end | 21.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
cycles | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.30-21.29 |
Stall cycles | 0.00 |
Front-end | 21.50 |
Dispatch | 15.33 |
Overall L1 | 21.50 |
all | 9% |
load | 15% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 10% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 18% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 424445 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6828d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 424465 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R14D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 424487 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x607> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %R14D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x42287(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x420ff(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x416d4(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x42292(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x42278(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 4241d8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x358> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x50(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 424484 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6828f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x41255(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41e0d(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4122d(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41e0d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x41dfd(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4244e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x665> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 127 |
nb uops | 129 |
loop length | 577 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 12 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.50 cycles |
front end | 21.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
cycles | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.30-21.29 |
Stall cycles | 0.00 |
Front-end | 21.50 |
Dispatch | 15.33 |
Overall L1 | 21.50 |
all | 9% |
load | 15% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 10% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 18% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 424445 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6828d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 424465 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R14D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 424487 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x607> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %R14D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x42287(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x420ff(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x416d4(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x42292(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x42278(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 4241d8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x358> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x50(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 424484 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6828f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x41255(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41e0d(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4122d(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41e0d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x41dfd(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4244e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x665> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.43 | 3.3 |
○Loop 129 - advec_mom.cpp:109-139 - exec | 2.43 | 3.29 |
○Loop 128 - advec_mom.cpp:109-139 - exec | 0 | 0 |