Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 2.76% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 2.76% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
115: upwind = i + 2; |
116: donor = i + 1; |
117: downwind = i; |
118: dif = donor; |
119: } else { |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
0x422570 PUSH %RBP |
0x422571 MOV %RSP,%RBP |
0x422574 PUSH %R15 |
0x422576 PUSH %R14 |
0x422578 PUSH %R13 |
0x42257a PUSH %R12 |
0x42257c PUSH %RBX |
0x42257d AND $-0x20,%RSP |
0x422581 SUB $0xc0,%RSP |
0x422588 MOV %RCX,%R13 |
0x42258b MOV 0x38(%RBP),%RAX |
0x42258f MOV 0x28(%RBP),%RBX |
0x422593 MOV 0x20(%RBP),%R14 |
0x422597 MOV 0x18(%RBP),%RCX |
0x42259b MOV %RCX,0x40(%RSP) |
0x4225a0 MOV 0x10(%RBP),%RCX |
0x4225a4 MOV %RCX,0x48(%RSP) |
0x4225a9 MOVL $0,0x14(%RSP) |
0x4225b1 TEST %RAX,%RAX |
0x4225b4 JS 422b35 |
0x4225ba MOV %R8,%R12 |
0x4225bd MOV %RDX,%R15 |
0x4225c0 MOV %R9,0x8(%RSP) |
0x4225c5 MOV (%RDI),%ESI |
0x4225c7 MOVQ $0,0x58(%RSP) |
0x4225d0 MOV %RAX,0x50(%RSP) |
0x4225d5 MOVQ $0x1,0x78(%RSP) |
0x4225de SUB $0x8,%RSP |
0x4225e2 LEA 0x80(%RSP),%RAX |
0x4225ea LEA 0x1c(%RSP),%RCX |
0x4225ef LEA 0x60(%RSP),%R8 |
0x4225f4 LEA 0x58(%RSP),%R9 |
0x4225f9 MOV $0x4808e0,%EDI |
0x4225fe MOV %ESI,0x18(%RSP) |
0x422602 MOV $0x22,%EDX |
0x422607 PUSH $0x1 |
0x422609 PUSH $0x1 |
0x42260b PUSH %RAX |
0x42260c CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x422611 ADD $0x20,%RSP |
0x422615 MOV 0x58(%RSP),%RSI |
0x42261a MOV 0x50(%RSP),%RAX |
0x42261f MOV %RAX,0x38(%RSP) |
0x422624 CMP %RAX,%RSI |
0x422627 JA 422b55 |
0x42262d MOV %RBX,%R11 |
0x422630 SUB %R14D,%R11D |
0x422633 MOV (%R13),%RDX |
0x422637 MOV 0x10(%R13),%R13 |
0x42263b MOV (%R12),%R8 |
0x42263f MOV 0x10(%R12),%R12 |
0x422644 MOV (%R15),%R9 |
0x422647 MOV 0x10(%R15),%R15 |
0x42264b MOV 0x8(%RSP),%RAX |
0x422650 MOV (%RAX),%R10 |
0x422653 MOV 0x10(%RAX),%RDI |
0x422657 LEA 0x1(%RSI),%RAX |
0x42265b MOV 0x38(%RSP),%RCX |
0x422660 INC %RCX |
0x422663 CMP %RCX,%RAX |
0x422666 CMOVG %RAX,%RCX |
0x42266a SUB %RSI,%RCX |
0x42266d MOV $-0x8,%EBX |
0x422672 AND %RCX,%RBX |
0x422675 MOV %RDX,0x30(%RSP) |
0x42267a MOV %R8,0x28(%RSP) |
0x42267f MOV %R10,0x8(%RSP) |
0x422684 MOV %R11,0x20(%RSP) |
0x422689 MOV %R9,0x70(%RSP) |
0x42268e JE 422b77 |
0x422694 MOV %R13,0x18(%RSP) |
0x422699 MOV %RCX,0x60(%RSP) |
0x42269e VPBROADCASTQ %R11,%YMM8 |
0x4226a4 MOV 0x40(%RSP),%RAX |
0x4226a9 VPBROADCASTQ %RAX,%YMM24 |
0x4226af MOV %R14,0x68(%RSP) |
0x4226b4 VPBROADCASTD %R14D,%YMM0 |
0x4226ba VMOVDQU %YMM0,0x80(%RSP) |
0x4226c3 VPBROADCASTQ %RDX,%YMM14 |
0x4226c9 VPBROADCASTQ %R8,%YMM15 |
0x4226cf VPBROADCASTQ %R9,%YMM16 |
0x4226d5 VPBROADCASTQ %R10,%YMM17 |
0x4226db VPBROADCASTQ %RSI,%YMM0 |
0x4226e1 VPADDQ 0x41f97(%RIP),%YMM0,%YMM9 |
0x4226e9 VPADDQ 0x41e0f(%RIP),%YMM0,%YMM10 |
0x4226f1 XOR %R14D,%R14D |
0x4226f4 VXORPD %XMM18,%XMM18,%XMM18 |
0x4226fa VBROADCASTSD 0x413dc(%RIP),%YMM21 |
0x422704 VBROADCASTSD 0x41fa2(%RIP),%YMM25 |
0x42270e VPBROADCASTQ 0x41f88(%RIP),%YMM26 |
0x422718 JMP 4228c8 |
0x42271d NOPL (%RAX) |
(178) 0x422720 VMOVQ %XMM19,%RAX |
(178) 0x422726 KMOVQ %K2,%K3 |
(178) 0x42272b VXORPD %XMM13,%XMM13,%XMM13 |
(178) 0x422730 VGATHERQPD (%RAX,%YMM4,8),%YMM13{%K3} |
(178) 0x422737 VEXTRACTI32X4 $0x1,%YMM22,%XMM19 |
(178) 0x42273e VPMOVSXDQ %XMM19,%YMM19 |
(178) 0x422744 KMOVQ %K1,%K3 |
(178) 0x422749 VXORPD %XMM23,%XMM23,%XMM23 |
(178) 0x42274f VGATHERQPD (%RAX,%YMM19,8),%YMM23{%K3} |
(178) 0x422756 VANDPD %YMM21,%YMM5,%YMM19 |
(178) 0x42275c VDIVPD %YMM29,%YMM19,%YMM19 |
(178) 0x422762 VANDPD %YMM21,%YMM20,%YMM20 |
(178) 0x422768 VPMOVSXDQ %XMM22,%YMM22 |
(178) 0x42276e KMOVQ %K2,%K3 |
(178) 0x422773 VXORPD %XMM29,%XMM29,%XMM29 |
(178) 0x422779 VGATHERQPD (%RAX,%YMM22,8),%YMM29{%K3} |
(178) 0x422780 VANDPD %YMM21,%YMM31,%YMM22 |
(178) 0x422786 VCMPPD $0x1,%YMM31,%YMM18,%K3 |
(178) 0x42278d VBROADCASTSD 0x41f12(%RIP),%YMM12 |
(178) 0x422796 VSUBPD %YMM19,%YMM12,%YMM30 |
(178) 0x42279c VMULPD %YMM30,%YMM22,%YMM30 |
(178) 0x4227a2 VMINPD %YMM22,%YMM20,%YMM22 |
(178) 0x4227a8 VFMADD213PD %YMM20,%YMM19,%YMM20 |
(178) 0x4227ae VDIVPD %YMM29,%YMM20,%YMM20 |
(178) 0x4227b4 VDIVPD %YMM13,%YMM30,%YMM29 |
(178) 0x4227ba VADDPD %YMM29,%YMM20,%YMM20 |
(178) 0x4227c0 VBROADCASTSD 0x41eee(%RIP),%YMM29 |
(178) 0x4227ca VMULPD %YMM29,%YMM13,%YMM13 |
(178) 0x4227d0 VMULPD %YMM20,%YMM13,%YMM13 |
(178) 0x4227d6 VMINPD %YMM22,%YMM13,%YMM13 |
(178) 0x4227dc VXORPD %YMM25,%YMM13,%YMM20 |
(178) 0x4227e2 VMOVAPD %YMM13,%YMM20{%K3} |
(178) 0x4227e8 VMOVAPD %YMM20,%YMM13{%K2}{z} |
(178) 0x4227ee VBROADCASTSD 0x412d0(%RIP),%YMM20 |
(178) 0x4227f8 VSUBPD %YMM19,%YMM20,%YMM19 |
(178) 0x4227fe VFMADD213PD %YMM11,%YMM13,%YMM19 |
(178) 0x422804 VMULPD %YMM5,%YMM19,%YMM5 |
(178) 0x42280a KMOVQ %K1,%K2 |
(178) 0x42280f VPMULLQ %YMM3,%YMM17,%YMM3 |
(178) 0x422815 VPADDQ %YMM4,%YMM3,%YMM3 |
(178) 0x422819 VXORPD %XMM4,%XMM4,%XMM4 |
(178) 0x42281d VGATHERQPD (%RAX,%YMM1,8),%YMM4{%K2} |
(178) 0x422824 KXNORW %K0,%K0,%K2 |
(178) 0x422828 VSCATTERQPD %YMM5,(%RDI,%YMM3,8){%K2} |
(178) 0x42282f VANDPD %YMM21,%YMM2,%YMM3 |
(178) 0x422835 VDIVPD %YMM7,%YMM3,%YMM3 |
(178) 0x422839 VANDPD %YMM21,%YMM27,%YMM5 |
(178) 0x42283f VANDPD %YMM21,%YMM28,%YMM7 |
(178) 0x422845 VCMPPD $0x1,%YMM28,%YMM18,%K2 |
(178) 0x42284c VSUBPD %YMM3,%YMM12,%YMM11 |
(178) 0x422850 VMULPD %YMM7,%YMM11,%YMM11 |
(178) 0x422854 VMINPD %YMM7,%YMM5,%YMM7 |
(178) 0x422858 VFMADD213PD %YMM5,%YMM3,%YMM5 |
(178) 0x42285d VDIVPD %YMM23,%YMM5,%YMM5 |
(178) 0x422863 VDIVPD %YMM4,%YMM11,%YMM11 |
(178) 0x422867 VADDPD %YMM5,%YMM11,%YMM5 |
(178) 0x42286b VMULPD %YMM29,%YMM4,%YMM4 |
(178) 0x422871 VMULPD %YMM5,%YMM4,%YMM4 |
(178) 0x422875 VMINPD %YMM7,%YMM4,%YMM4 |
(178) 0x422879 VXORPD %YMM25,%YMM4,%YMM5 |
(178) 0x42287f VMOVAPD %YMM4,%YMM5{%K2} |
(178) 0x422885 VMOVAPD %YMM5,%YMM4{%K1}{z} |
(178) 0x42288b VSUBPD %YMM3,%YMM20,%YMM3 |
(178) 0x422891 VFMADD213PD %YMM6,%YMM4,%YMM3 |
(178) 0x422896 VMULPD %YMM2,%YMM3,%YMM2 |
(178) 0x42289a VPMULLQ %YMM0,%YMM17,%YMM0 |
(178) 0x4228a0 VPADDQ %YMM1,%YMM0,%YMM0 |
(178) 0x4228a4 KXNORW %K0,%K0,%K1 |
(178) 0x4228a8 VSCATTERQPD %YMM2,(%RDI,%YMM0,8){%K1} |
(178) 0x4228af VPADDQ %YMM26,%YMM10,%YMM10 |
(178) 0x4228b5 VPADDQ %YMM26,%YMM9,%YMM9 |
(178) 0x4228bb ADD $0x8,%R14 |
(178) 0x4228bf CMP %RBX,%R14 |
(178) 0x4228c2 JAE 422b44 |
(178) 0x4228c8 VMOVDQA %YMM10,%YMM0 |
(178) 0x4228cc VMOVDQA %YMM8,%YMM1 |
(178) 0x4228d0 MOV $0x452aa0,%R13 |
(178) 0x4228d7 CALL %R13 |
(178) 0x4228da VMOVDQA %YMM0,%YMM11 |
(178) 0x4228de VMOVDQA %YMM9,%YMM0 |
(178) 0x4228e2 VMOVDQA %YMM8,%YMM1 |
(178) 0x4228e6 CALL %R13 |
(178) 0x4228e9 VPADDQ %YMM24,%YMM0,%YMM19 |
(178) 0x4228ef VPADDQ %YMM24,%YMM11,%YMM20 |
(178) 0x4228f5 VMOVDQA %YMM10,%YMM0 |
(178) 0x4228f9 VMOVDQA %YMM8,%YMM1 |
(178) 0x4228fd MOV $0x452870,%R13 |
(178) 0x422904 CALL %R13 |
(178) 0x422907 VMOVDQA %YMM0,%YMM11 |
(178) 0x42290b VMOVDQA %YMM9,%YMM0 |
(178) 0x42290f VMOVDQA %YMM8,%YMM1 |
(178) 0x422913 CALL %R13 |
(178) 0x422916 VPMOVQD %YMM11,%XMM1 |
(178) 0x42291c VPMOVQD %YMM0,%XMM0 |
(178) 0x422922 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(178) 0x422928 VPADDD 0x80(%RSP),%YMM0,%YMM11 |
(178) 0x422931 VPSLLQ $0x20,%YMM20,%YMM0 |
(178) 0x422938 VPSRAQ $0x20,%YMM0,%YMM3 |
(178) 0x42293f VPSLLQ $0x20,%YMM19,%YMM0 |
(178) 0x422946 VPSRAQ $0x20,%YMM0,%YMM0 |
(178) 0x42294d VXORPS %XMM2,%XMM2,%XMM2 |
(178) 0x422951 VPMULLQ %YMM0,%YMM14,%YMM2 |
(178) 0x422957 VXORPS %XMM5,%XMM5,%XMM5 |
(178) 0x42295b VPMULLQ %YMM3,%YMM14,%YMM5 |
(178) 0x422961 VEXTRACTI128 $0x1,%YMM11,%XMM1 |
(178) 0x422967 VPMOVSXDQ %XMM1,%YMM1 |
(178) 0x42296c VPADDQ %YMM1,%YMM2,%YMM4 |
(178) 0x422970 KXNORW %K0,%K0,%K1 |
(178) 0x422974 VPXOR %XMM2,%XMM2,%XMM2 |
(178) 0x422978 MOV 0x18(%RSP),%RAX |
(178) 0x42297d VGATHERQPD (%RAX,%YMM4,8),%YMM2{%K1} |
(178) 0x422984 VPMOVSXDQ %XMM11,%YMM4 |
(178) 0x422989 VPADDQ %YMM4,%YMM5,%YMM6 |
(178) 0x42298d KXNORW %K0,%K0,%K1 |
(178) 0x422991 VPXOR %XMM5,%XMM5,%XMM5 |
(178) 0x422995 VGATHERQPD (%RAX,%YMM6,8),%YMM5{%K1} |
(178) 0x42299c VCMPPD $0x1,%YMM18,%YMM5,%K1 |
(178) 0x4229a3 VCMPPD $0x1,%YMM18,%YMM2,%K2 |
(178) 0x4229aa VPCMPEQD %YMM12,%YMM12,%YMM12 |
(178) 0x4229af VPSUBD %YMM12,%YMM11,%YMM19 |
(178) 0x4229b5 VPMOVSXDQ %XMM19,%YMM20 |
(178) 0x4229bb VXORPS %XMM6,%XMM6,%XMM6 |
(178) 0x4229bf VPMULLQ %YMM3,%YMM15,%YMM6 |
(178) 0x4229c5 VPMULLQ %YMM0,%YMM15,%YMM7 |
(178) 0x4229cb VEXTRACTI32X4 $0x1,%YMM19,%XMM27 |
(178) 0x4229d2 VPBLENDMQ %YMM20,%YMM4,%YMM28{%K1} |
(178) 0x4229d8 VPADDQ %YMM28,%YMM6,%YMM6 |
(178) 0x4229de KXNORW %K0,%K0,%K3 |
(178) 0x4229e2 VXORPD %XMM29,%XMM29,%XMM29 |
(178) 0x4229e8 VGATHERQPD (%R12,%YMM6,8),%YMM29{%K3} |
(178) 0x4229ef VPMOVSXDQ %XMM27,%YMM27 |
(178) 0x4229f5 VPBLENDMQ %YMM27,%YMM1,%YMM6{%K2} |
(178) 0x4229fb VPADDQ %YMM6,%YMM7,%YMM30 |
(178) 0x422a01 KXNORW %K0,%K0,%K3 |
(178) 0x422a05 VPXOR %XMM7,%XMM7,%XMM7 |
(178) 0x422a09 VGATHERQPD (%R12,%YMM30,8),%YMM7{%K3} |
(178) 0x422a10 VPMULLQ %YMM0,%YMM16,%YMM31 |
(178) 0x422a16 VPMULLQ %YMM3,%YMM16,%YMM13 |
(178) 0x422a1c KSHIFTLB $0x4,%K2,%K0 |
(178) 0x422a22 KORB %K0,%K1,%K3 |
(178) 0x422a26 VPADDQ %YMM6,%YMM31,%YMM30 |
(178) 0x422a2c KXNORW %K0,%K0,%K4 |
(178) 0x422a30 VPXOR %XMM6,%XMM6,%XMM6 |
(178) 0x422a34 VGATHERQPD (%R15,%YMM30,8),%YMM6{%K4} |
(178) 0x422a3b VPADDD %YMM12,%YMM11,%YMM22 |
(178) 0x422a41 VMOVDQA64 %YMM22,%YMM30 |
(178) 0x422a47 VPADDD 0x43acb(%RIP){1to8},%YMM11,%YMM30{%K3} |
(178) 0x422a51 VPADDQ %YMM28,%YMM13,%YMM28 |
(178) 0x422a57 KXNORW %K0,%K0,%K4 |
(178) 0x422a5b VPXOR %XMM11,%XMM11,%XMM11 |
(178) 0x422a60 VGATHERQPD (%R15,%YMM28,8),%YMM11{%K4} |
(178) 0x422a67 VEXTRACTI32X4 $0x1,%YMM30,%XMM28 |
(178) 0x422a6e VPMOVSXDQ %XMM28,%YMM28 |
(178) 0x422a74 VPADDQ %YMM28,%YMM31,%YMM28 |
(178) 0x422a7a KXNORW %K0,%K0,%K4 |
(178) 0x422a7e VXORPD %XMM23,%XMM23,%XMM23 |
(178) 0x422a84 VGATHERQPD (%R15,%YMM28,8),%YMM23{%K4} |
(178) 0x422a8b VPMOVSXDQ %XMM30,%YMM28 |
(178) 0x422a91 VPADDQ %YMM28,%YMM13,%YMM28 |
(178) 0x422a97 KXNORW %K0,%K0,%K4 |
(178) 0x422a9b VPXORD %XMM30,%XMM30,%XMM30 |
(178) 0x422aa1 VGATHERQPD (%R15,%YMM28,8),%YMM30{%K4} |
(178) 0x422aa8 VPBLENDMQ %YMM1,%YMM27,%YMM27{%K2} |
(178) 0x422aae VPADDQ %YMM27,%YMM31,%YMM27 |
(178) 0x422ab4 KXNORW %K0,%K0,%K2 |
(178) 0x422ab8 VXORPD %XMM28,%XMM28,%XMM28 |
(178) 0x422abe VGATHERQPD (%R15,%YMM27,8),%YMM28{%K2} |
(178) 0x422ac5 VPBLENDMQ %YMM4,%YMM20,%YMM20{%K1} |
(178) 0x422acb VPADDQ %YMM20,%YMM13,%YMM13 |
(178) 0x422ad1 KXNORW %K0,%K0,%K1 |
(178) 0x422ad5 VPXORD %XMM31,%XMM31,%XMM31 |
(178) 0x422adb VGATHERQPD (%R15,%YMM13,8),%YMM31{%K1} |
(178) 0x422ae2 VMOVDQA32 %YMM19,%YMM22{%K3} |
(178) 0x422ae8 VSUBPD %YMM30,%YMM11,%YMM20 |
(178) 0x422aee VSUBPD %YMM23,%YMM6,%YMM27 |
(178) 0x422af4 VSUBPD %YMM11,%YMM31,%YMM31 |
(178) 0x422afa VSUBPD %YMM6,%YMM28,%YMM28 |
(178) 0x422b00 VMULPD %YMM27,%YMM28,%YMM13 |
(178) 0x422b06 VMULPD %YMM20,%YMM31,%YMM19 |
(178) 0x422b0c VCMPPD $0x1,%YMM19,%YMM18,%K2 |
(178) 0x422b13 VCMPPD $0x1,%YMM13,%YMM18,%K1 |
(178) 0x422b1a KORTESTB %K1,%K2 |
(178) 0x422b1e JE 422720 |
(178) 0x422b24 MOV 0x48(%RSP),%RAX |
(178) 0x422b29 VMOVQ 0x8(%RAX),%XMM19 |
(178) 0x422b30 JMP 422720 |
0x422b35 LEA -0x28(%RBP),%RSP |
0x422b39 POP %RBX |
0x422b3a POP %R12 |
0x422b3c POP %R13 |
0x422b3e POP %R14 |
0x422b40 POP %R15 |
0x422b42 POP %RBP |
0x422b43 RET |
0x422b44 CMP %RBX,0x60(%RSP) |
0x422b49 MOV 0x68(%RSP),%R14 |
0x422b4e MOV 0x18(%RSP),%R13 |
0x422b53 JNE 422b74 |
0x422b55 MOV $0x480900,%EDI |
0x422b5a MOV 0x10(%RSP),%ESI |
0x422b5e LEA -0x28(%RBP),%RSP |
0x422b62 POP %RBX |
0x422b63 POP %R12 |
0x422b65 POP %R13 |
0x422b67 POP %R14 |
0x422b69 POP %R15 |
0x422b6b POP %RBP |
0x422b6c VZEROUPPER |
0x422b6f JMP 403050 |
0x422b74 ADD %RBX,%RSI |
0x422b77 VPXOR %XMM0,%XMM0,%XMM0 |
0x422b7b VMOVDDUP 0x40f5d(%RIP),%XMM1 |
0x422b83 VMOVSD 0x41b1d(%RIP),%XMM2 |
0x422b8b VMOVSD 0x40f35(%RIP),%XMM3 |
0x422b93 VMOVSD 0x41b1d(%RIP),%XMM4 |
0x422b9b VMOVDDUP 0x41b0d(%RIP),%XMM5 |
0x422ba3 JMP 422bd5 |
0x422ba5 NOPW %CS:(%RAX,%RAX,1) |
(177) 0x422bb0 VSUBSD %XMM7,%XMM3,%XMM7 |
(177) 0x422bb4 VFMADD213SD %XMM8,%XMM10,%XMM7 |
(177) 0x422bb9 VMULSD %XMM6,%XMM7,%XMM6 |
(177) 0x422bbd IMUL 0x8(%RSP),%RCX |
(177) 0x422bc3 ADD %RAX,%RCX |
(177) 0x422bc6 VMOVSD %XMM6,(%RDI,%RCX,8) |
(177) 0x422bcb INC %RSI |
(177) 0x422bce CMP 0x38(%RSP),%RSI |
(177) 0x422bd3 JG 422b55 |
(177) 0x422bd5 MOV %RSI,%R8 |
(177) 0x422bd8 SHR $0x20,%R8 |
(177) 0x422bdc JE 422c10 |
(177) 0x422bde MOV %RSI,%RAX |
(177) 0x422be1 XOR %EDX,%EDX |
(177) 0x422be3 MOV 0x20(%RSP),%R10 |
(177) 0x422be8 DIV %R10 |
(177) 0x422beb MOV %RAX,%RCX |
(177) 0x422bee MOV 0x30(%RSP),%R9 |
(177) 0x422bf3 MOV 0x28(%RSP),%R11 |
(177) 0x422bf8 TEST %R8,%R8 |
(177) 0x422bfb JE 422c2d |
(177) 0x422bfd MOV %RSI,%RAX |
(177) 0x422c00 CQTO |
(177) 0x422c02 IDIV %R10 |
(177) 0x422c05 JMP 422c34 |
0x422c07 NOPW (%RAX,%RAX,1) |
(177) 0x422c10 MOV %ESI,%EAX |
(177) 0x422c12 XOR %EDX,%EDX |
(177) 0x422c14 MOV 0x20(%RSP),%R10 |
(177) 0x422c19 DIV %R10D |
(177) 0x422c1c MOV %EAX,%ECX |
(177) 0x422c1e MOV 0x30(%RSP),%R9 |
(177) 0x422c23 MOV 0x28(%RSP),%R11 |
(177) 0x422c28 TEST %R8,%R8 |
(177) 0x422c2b JNE 422bfd |
(177) 0x422c2d MOV %ESI,%EAX |
(177) 0x422c2f XOR %EDX,%EDX |
(177) 0x422c31 DIV %R10D |
(177) 0x422c34 ADD 0x40(%RSP),%RCX |
(177) 0x422c39 LEA (%RDX,%R14,1),%R8D |
(177) 0x422c3d MOVSXD %R8D,%RAX |
(177) 0x422c40 MOVSXD %ECX,%RCX |
(177) 0x422c43 IMUL %RCX,%R9 |
(177) 0x422c47 ADD %RAX,%R9 |
(177) 0x422c4a VMOVSD (%R13,%R9,8),%XMM6 |
(177) 0x422c51 VUCOMISD %XMM6,%XMM0 |
(177) 0x422c55 LEA 0x1(%RDX,%R14,1),%EDX |
(177) 0x422c5a MOVSXD %EDX,%R9 |
(177) 0x422c5d JBE 422c70 |
(177) 0x422c5f ADD $0x2,%R8D |
(177) 0x422c63 MOV %RAX,%R10 |
(177) 0x422c66 JMP 422c7c |
0x422c68 NOPL (%RAX,%RAX,1) |
(177) 0x422c70 DEC %R8D |
(177) 0x422c73 MOV %R9,%R10 |
(177) 0x422c76 MOV %RAX,%R9 |
(177) 0x422c79 MOV %R8D,%EDX |
(177) 0x422c7c VANDPD %XMM1,%XMM6,%XMM7 |
(177) 0x422c80 IMUL %RCX,%R11 |
(177) 0x422c84 ADD %R9,%R11 |
(177) 0x422c87 VDIVSD (%R12,%R11,8),%XMM7,%XMM7 |
(177) 0x422c8d MOV 0x70(%RSP),%R11 |
(177) 0x422c92 IMUL %RCX,%R11 |
(177) 0x422c96 ADD %R11,%R9 |
(177) 0x422c99 MOVSXD %R8D,%R8 |
(177) 0x422c9c ADD %R11,%R8 |
(177) 0x422c9f ADD %R10,%R11 |
(177) 0x422ca2 VMOVSD (%R15,%R9,8),%XMM8 |
(177) 0x422ca8 VMOVHPD (%R15,%R11,8),%XMM8,%XMM9 |
(177) 0x422cae VMOVSD (%R15,%R8,8),%XMM10 |
(177) 0x422cb4 VPUNPCKLQDQ %XMM8,%XMM10,%XMM10 |
(177) 0x422cb9 VSUBPD %XMM10,%XMM9,%XMM11 |
(177) 0x422cbe VSHUFPD $0x1,%XMM11,%XMM11,%XMM9 |
(177) 0x422cc4 VMULSD %XMM11,%XMM9,%XMM12 |
(177) 0x422cc9 VXORPD %XMM10,%XMM10,%XMM10 |
(177) 0x422cce VUCOMISD %XMM10,%XMM12 |
(177) 0x422cd3 JBE 422bb0 |
(177) 0x422cd9 MOV 0x48(%RSP),%R8 |
(177) 0x422cde MOV 0x8(%R8),%R8 |
(177) 0x422ce2 VANDPD %XMM1,%XMM11,%XMM10 |
(177) 0x422ce6 VSUBSD %XMM7,%XMM2,%XMM11 |
(177) 0x422cea VADDSD %XMM3,%XMM7,%XMM12 |
(177) 0x422cee VPUNPCKLQDQ %XMM11,%XMM12,%XMM11 |
(177) 0x422cf3 MOVSXD %EDX,%RDX |
(177) 0x422cf6 VMOVSD (%R8,%RAX,8),%XMM12 |
(177) 0x422cfc VMULPD %XMM11,%XMM10,%XMM11 |
(177) 0x422d01 VMOVSD (%R8,%RDX,8),%XMM13 |
(177) 0x422d07 VPUNPCKLQDQ %XMM12,%XMM13,%XMM13 |
(177) 0x422d0c VDIVPD %XMM13,%XMM11,%XMM11 |
(177) 0x422d11 VSHUFPD $0x1,%XMM11,%XMM11,%XMM13 |
(177) 0x422d17 VADDSD %XMM13,%XMM11,%XMM11 |
(177) 0x422d1c VMULSD %XMM4,%XMM12,%XMM12 |
(177) 0x422d20 VMULSD %XMM11,%XMM12,%XMM11 |
(177) 0x422d25 VSHUFPS $0x4e,%XMM10,%XMM10,%XMM12 |
(177) 0x422d2b VMINSD %XMM12,%XMM10,%XMM10 |
(177) 0x422d30 VMINSD %XMM10,%XMM11,%XMM11 |
(177) 0x422d35 VXORPD %XMM5,%XMM11,%XMM10 |
(177) 0x422d39 VCMPSD $0x1,%XMM9,%XMM0,%K1 |
(177) 0x422d40 VMOVSD %XMM11,%XMM10,%XMM10{%K1} |
(177) 0x422d46 JMP 422bb0 |
0x422d4b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 127 |
nb uops | 129 |
loop length | 577 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 12 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.50 cycles |
front end | 21.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
cycles | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.30-21.29 |
Stall cycles | 0.00 |
Front-end | 21.50 |
Dispatch | 15.33 |
Overall L1 | 21.50 |
all | 9% |
load | 15% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 10% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 18% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 422b35 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4808e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 422b55 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R14D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 422b77 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x607> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %R14D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x41f97(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x41e0f(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x413dc(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x41fa2(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x41f88(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 4228c8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x358> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x60(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 422b74 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480900,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x40f5d(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41b1d(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40f35(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41b1d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x41b0d(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 422bd5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x665> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 127 |
nb uops | 129 |
loop length | 577 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 12 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.50 cycles |
front end | 21.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
cycles | 5.60 | 5.60 | 15.33 | 15.33 | 14.00 | 8.00 | 5.40 | 14.00 | 14.00 | 14.00 | 5.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.30-21.29 |
Stall cycles | 0.00 |
Front-end | 21.50 |
Dispatch | 15.33 |
Overall L1 | 21.50 |
all | 9% |
load | 15% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 10% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 18% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 14% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 422b35 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4808e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 422b55 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x5e5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R14D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 422b77 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x607> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %R14D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x41f97(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x41e0f(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x413dc(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x41fa2(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x41f88(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 4228c8 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x358> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x60(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 422b74 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480900,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x40f5d(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41b1d(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40f35(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x41b1d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x41b0d(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 422bd5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27+0x665> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.27– | 2.76 | 3.89 |
○Loop 178 - advec_mom.cpp:108-139 - exec | 2.76 | 3.88 |
○Loop 177 - advec_mom.cpp:108-139 - exec | 0 | 0 |