Function: .omp_outlined..12#0x239e10 | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 4.31% |
---|
Function: .omp_outlined..12#0x239e10 | Module: exec | Source: advec_mom.cpp:108-139 [...] | Coverage: 4.31% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 108 - 139 |
-------------------------------------------------------------------------------- |
108: #pragma omp parallel for simd collapse(2) |
109: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
110: for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) |
111: ({ |
112: int upwind, donor, downwind, dif; |
113: double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; |
114: if (node_flux(i, j) < 0.0) { |
115: upwind = i + 2; |
116: donor = i + 1; |
117: downwind = i; |
118: dif = donor; |
119: } else { |
120: upwind = i - 1; |
121: donor = i; |
122: downwind = i + 1; |
123: dif = upwind; |
124: } |
125: sigma = std::fabs(node_flux(i, j)) / (node_mass_pre(donor, j)); |
126: width = celldx[i]; |
127: vdiffuw = vel1(donor, j) - vel1(upwind, j); |
128: vdiffdw = vel1(downwind, j) - vel1(donor, j); |
129: limiter = 0.0; |
130: if (vdiffuw * vdiffdw > 0.0) { |
131: auw = std::fabs(vdiffuw); |
132: adw = std::fabs(vdiffdw); |
133: wind = 1.0; |
134: if (vdiffdw <= 0.0) wind = -1.0; |
135: limiter = |
136: wind * std::fmin(std::fmin(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldx[dif]) / 6.0, auw), adw); |
137: } |
138: advec_vel_s = vel1(donor, j) + (1.0 - sigma) * limiter; |
139: mom_flux(i, j) = advec_vel_s * node_flux(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x239e10 PUSH %RBP |
0x239e11 MOV %RSP,%RBP |
0x239e14 PUSH %R15 |
0x239e16 PUSH %R14 |
0x239e18 PUSH %R13 |
0x239e1a PUSH %R12 |
0x239e1c PUSH %RBX |
0x239e1d SUB $0x78,%RSP |
0x239e21 MOV (%RCX),%R15D |
0x239e24 MOV (%RDX),%R12D |
0x239e27 INC %R12D |
0x239e2a ADD $0x3,%R15D |
0x239e2e SUB %R12D,%R15D |
0x239e31 JLE 23a5a5 |
0x239e37 MOV (%R9),%EBX |
0x239e3a MOV (%R8),%R13D |
0x239e3d ADD $0x3,%EBX |
0x239e40 CMP %R13D,%EBX |
0x239e43 JLE 23a5a5 |
0x239e49 SUB %R13D,%EBX |
0x239e4c MOV (%RDI),%ESI |
0x239e4e MOVQ $0,-0x68(%RBP) |
0x239e56 MOVQ $0x1,-0xa0(%RBP) |
0x239e61 MOVL $0,-0x3c(%RBP) |
0x239e68 IMUL %RBX,%R15 |
0x239e6c DEC %R15 |
0x239e6f MOV %R15,-0x30(%RBP) |
0x239e73 SUB $0x8,%RSP |
0x239e77 LEA -0x3c(%RBP),%RCX |
0x239e7b LEA -0xa0(%RBP),%RAX |
0x239e82 LEA 0x26f2f(%RIP),%RDI |
0x239e89 LEA -0x68(%RBP),%R8 |
0x239e8d LEA -0x30(%RBP),%R9 |
0x239e91 MOV %ESI,-0x38(%RBP) |
0x239e94 MOV $0x22,%EDX |
0x239e99 PUSH $0x1 |
0x239e9b PUSH $0x1 |
0x239e9d PUSH %RAX |
0x239e9e CALL 25f740 <@plt_start@+0x530> |
0x239ea3 ADD $0x20,%RSP |
0x239ea7 MOV -0x30(%RBP),%RAX |
0x239eab MOV -0x68(%RBP),%RCX |
0x239eaf CMP %R15,%RAX |
0x239eb2 CMOVL %RAX,%R15 |
0x239eb6 MOV %R15,-0x30(%RBP) |
0x239eba CMP %R15,%RCX |
0x239ebd JG 23a593 |
0x239ec3 MOV 0x18(%RBP),%R8 |
0x239ec7 MOV 0x28(%RBP),%RDX |
0x239ecb MOV 0x30(%RBP),%RAX |
0x239ecf MOV 0x20(%RBP),%RSI |
0x239ed3 MOV 0x10(%RBP),%RDI |
0x239ed7 MOV %RBX,-0x58(%RBP) |
0x239edb MOV (%R8),%R10 |
0x239ede MOV 0x10(%R8),%R9 |
0x239ee2 MOV (%RDX),%R8 |
0x239ee5 MOV 0x10(%RDX),%R11 |
0x239ee9 MOV (%RAX),%RDX |
0x239eec MOV 0x10(%RAX),%RAX |
0x239ef0 MOV (%RDI),%R14 |
0x239ef3 MOV 0x8(%RSI),%RSI |
0x239ef7 MOV 0x10(%RDI),%RDI |
0x239efb MOV %RAX,-0x60(%RBP) |
0x239eff MOV %R15,%RAX |
0x239f02 SUB %RCX,%RAX |
0x239f05 MOV %RSI,-0x48(%RBP) |
0x239f09 MOV %R14,-0x50(%RBP) |
0x239f0d MOV %R10,-0x98(%RBP) |
0x239f14 MOV %R8,-0x90(%RBP) |
0x239f1b MOV %RDX,-0x88(%RBP) |
0x239f22 INC %RAX |
0x239f25 CMP $0x8,%RAX |
0x239f29 JB 23a5b4 |
0x239f2f VBROADCASTSD -0x28741(%RIP),%ZMM13 |
0x239f39 VPBROADCASTQ %RCX,%ZMM0 |
0x239f3f VPADDQ -0x281c9(%RIP),%ZMM0,%ZMM0 |
0x239f49 VBROADCASTSD -0x2876b(%RIP),%ZMM12 |
0x239f53 VPBROADCASTQ -0x28755(%RIP),%ZMM14 |
0x239f5d VPBROADCASTQ %RBX,%ZMM1 |
0x239f63 MOV %RAX,-0x78(%RBP) |
0x239f67 AND $-0x8,%RAX |
0x239f6b VPBROADCASTQ %R10,%ZMM5 |
0x239f71 VPBROADCASTD %R12D,%YMM2 |
0x239f77 VPBROADCASTD %R13D,%YMM3 |
0x239f7d VPBROADCASTQ %R14,%ZMM4 |
0x239f83 VPBROADCASTQ %R8,%ZMM6 |
0x239f89 VPBROADCASTQ %RDX,%ZMM7 |
0x239f8f VXORPD %XMM10,%XMM10,%XMM10 |
0x239f94 VPCMPEQD %YMM11,%YMM11,%YMM11 |
0x239f99 MOV %R12D,-0x34(%RBP) |
0x239f9d MOV %R13,-0x80(%RBP) |
0x239fa1 ADD %RAX,%RCX |
0x239fa4 VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 |
0x239fab VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 |
0x239fb2 MOV %RAX,%R10 |
0x239fb5 MOV %RAX,-0x70(%RBP) |
0x239fb9 JMP 23a003 |
(291) 0x239fc0 VSUBPD %ZMM20,%ZMM13,%ZMM19 |
(291) 0x239fc6 VMOVAPD %ZMM24,%ZMM18{%K2}{z} |
(291) 0x239fcc MOV -0x60(%RBP),%RAX |
(291) 0x239fd0 VPMULLQ %ZMM16,%ZMM7,%ZMM16 |
(291) 0x239fd6 KXNORW %K0,%K0,%K1 |
(291) 0x239fda ADD $-0x8,%R10 |
(291) 0x239fde VPADDQ %ZMM14,%ZMM0,%ZMM0 |
(291) 0x239fe4 VFMADD213PD %ZMM21,%ZMM18,%ZMM19 |
(291) 0x239fea VPADDQ %ZMM15,%ZMM16,%ZMM15 |
(291) 0x239ff0 VMULPD %ZMM19,%ZMM17,%ZMM17 |
(291) 0x239ff6 VSCATTERQPD %ZMM17,(%RAX,%ZMM15,8){%K1} |
(291) 0x239ffd JE 23a579 |
(291) 0x23a003 VEXTRACTI32X4 $0x3,%ZMM0,%XMM15 |
(291) 0x23a00a VPEXTRQ $0x1,%XMM8,%R8 |
(291) 0x23a010 VPEXTRQ $0x1,%XMM15,%RAX |
(291) 0x23a016 CQTO |
(291) 0x23a018 IDIV %R8 |
(291) 0x23a01b VMOVQ %XMM8,%R8 |
(291) 0x23a020 VMOVQ %RAX,%XMM16 |
(291) 0x23a026 VMOVQ %XMM15,%RAX |
(291) 0x23a02b CQTO |
(291) 0x23a02d IDIV %R8 |
(291) 0x23a030 VPEXTRQ $0x1,%XMM9,%R8 |
(291) 0x23a036 VMOVQ %RAX,%XMM15 |
(291) 0x23a03b VPUNPCKLQDQ %XMM16,%XMM15,%XMM15 |
(291) 0x23a041 VEXTRACTI32X4 $0x2,%ZMM0,%XMM16 |
(291) 0x23a048 VPEXTRQ $0x1,%XMM16,%RAX |
(291) 0x23a04f CQTO |
(291) 0x23a051 IDIV %R8 |
(291) 0x23a054 VMOVQ %XMM9,%R8 |
(291) 0x23a059 VMOVQ %RAX,%XMM17 |
(291) 0x23a05f VMOVQ %XMM16,%RAX |
(291) 0x23a065 CQTO |
(291) 0x23a067 IDIV %R8 |
(291) 0x23a06a VMOVQ %RAX,%XMM16 |
(291) 0x23a070 VPUNPCKLQDQ %XMM17,%XMM16,%XMM16 |
(291) 0x23a076 VEXTRACTI32X4 $0x1,%YMM0,%XMM17 |
(291) 0x23a07d VPEXTRQ $0x1,%XMM17,%RAX |
(291) 0x23a084 VINSERTI32X4 $0x1,%XMM15,%YMM16,%YMM15 |
(291) 0x23a08b VEXTRACTI32X4 $0x1,%YMM1,%XMM16 |
(291) 0x23a092 VPEXTRQ $0x1,%XMM16,%R8 |
(291) 0x23a099 CQTO |
(291) 0x23a09b IDIV %R8 |
(291) 0x23a09e VMOVQ %XMM16,%R8 |
(291) 0x23a0a4 VMOVQ %RAX,%XMM18 |
(291) 0x23a0aa VMOVQ %XMM17,%RAX |
(291) 0x23a0b0 CQTO |
(291) 0x23a0b2 IDIV %R8 |
(291) 0x23a0b5 VPEXTRQ $0x1,%XMM1,%R8 |
(291) 0x23a0bb VMOVQ %RAX,%XMM16 |
(291) 0x23a0c1 VPEXTRQ $0x1,%XMM0,%RAX |
(291) 0x23a0c7 CQTO |
(291) 0x23a0c9 VPUNPCKLQDQ %XMM18,%XMM16,%XMM16 |
(291) 0x23a0cf IDIV %R8 |
(291) 0x23a0d2 VMOVQ %XMM1,%R8 |
(291) 0x23a0d7 VMOVQ %RAX,%XMM17 |
(291) 0x23a0dd VMOVQ %XMM0,%RAX |
(291) 0x23a0e2 CQTO |
(291) 0x23a0e4 IDIV %R8 |
(291) 0x23a0e7 VMOVQ %RAX,%XMM18 |
(291) 0x23a0ed VPUNPCKLQDQ %XMM17,%XMM18,%XMM17 |
(291) 0x23a0f3 VINSERTI32X4 $0x1,%XMM16,%YMM17,%YMM16 |
(291) 0x23a0fa VINSERTI64X4 $0x1,%YMM15,%ZMM16,%ZMM15 |
(291) 0x23a101 VPMOVQD %ZMM15,%YMM16 |
(291) 0x23a107 VPMULLQ %ZMM1,%ZMM15,%ZMM15 |
(291) 0x23a10d VPSUBQ %ZMM15,%ZMM0,%ZMM15 |
(291) 0x23a113 VPMOVQD %ZMM15,%YMM15 |
(291) 0x23a119 VPADDD %YMM16,%YMM2,%YMM16 |
(291) 0x23a11f VPMOVSXDQ %YMM16,%ZMM16 |
(291) 0x23a125 VPADDD %YMM15,%YMM3,%YMM20 |
(291) 0x23a12b VPMOVSXDQ %YMM20,%ZMM15 |
(291) 0x23a131 VPMULLQ %ZMM16,%ZMM4,%ZMM17 |
(291) 0x23a137 VPMULLQ %ZMM16,%ZMM5,%ZMM24 |
(291) 0x23a13d VPADDQ %ZMM15,%ZMM17,%ZMM17 |
(291) 0x23a143 VEXTRACTI32X4 $0x1,%YMM17,%XMM18 |
(291) 0x23a14a VMOVQ %XMM17,%RDX |
(291) 0x23a150 VPEXTRQ $0x1,%XMM17,%RAX |
(291) 0x23a157 VMOVQ %XMM18,%R8 |
(291) 0x23a15d VPEXTRQ $0x1,%XMM18,%R14 |
(291) 0x23a164 VEXTRACTI32X4 $0x2,%ZMM17,%XMM18 |
(291) 0x23a16b VEXTRACTI32X4 $0x3,%ZMM17,%XMM17 |
(291) 0x23a172 VMOVSD (%RDI,%RDX,8),%XMM19 |
(291) 0x23a179 VMOVQ %XMM18,%RSI |
(291) 0x23a17f VMOVQ %XMM17,%R13 |
(291) 0x23a185 VPEXTRQ $0x1,%XMM18,%RBX |
(291) 0x23a18c VPEXTRQ $0x1,%XMM17,%R12 |
(291) 0x23a193 VMOVHPD (%RDI,%RAX,8),%XMM19,%XMM19 |
(291) 0x23a19a VMOVSD (%RDI,%R13,8),%XMM17 |
(291) 0x23a1a1 VMOVSD (%RDI,%RSI,8),%XMM18 |
(291) 0x23a1a8 VMOVHPD (%RDI,%R12,8),%XMM17,%XMM17 |
(291) 0x23a1af VMOVHPD (%RDI,%RBX,8),%XMM18,%XMM18 |
(291) 0x23a1b6 VINSERTF32X4 $0x1,%XMM17,%YMM18,%YMM17 |
(291) 0x23a1bd VMOVSD (%RDI,%R8,8),%XMM18 |
(291) 0x23a1c4 VMOVHPD (%RDI,%R14,8),%XMM18,%XMM18 |
(291) 0x23a1cb VINSERTF32X4 $0x1,%XMM18,%YMM19,%YMM18 |
(291) 0x23a1d2 VPSUBD %YMM11,%YMM20,%YMM19 |
(291) 0x23a1d8 VPMOVSXDQ %YMM19,%ZMM22 |
(291) 0x23a1de VINSERTF64X4 $0x1,%YMM17,%ZMM18,%ZMM17 |
(291) 0x23a1e5 VPADDD %YMM11,%YMM20,%YMM18 |
(291) 0x23a1eb VMOVDQA64 %YMM18,%YMM23 |
(291) 0x23a1f1 VCMPPD $0x1,%ZMM10,%ZMM17,%K1 |
(291) 0x23a1f8 VPBLENDMQ %ZMM22,%ZMM15,%ZMM21{%K1} |
(291) 0x23a1fe VPADDD -0x375f0(%RIP){1to8},%YMM20,%YMM23{%K1} |
(291) 0x23a208 VANDPD %ZMM12,%ZMM17,%ZMM20 |
(291) 0x23a20e VMOVDQA64 %ZMM15,%ZMM22{%K1} |
(291) 0x23a214 VPADDQ %ZMM21,%ZMM24,%ZMM24 |
(291) 0x23a21a VEXTRACTI32X4 $0x1,%YMM24,%XMM25 |
(291) 0x23a221 VMOVQ %XMM24,%RAX |
(291) 0x23a227 VPEXTRQ $0x1,%XMM24,%RDX |
(291) 0x23a22e VMOVQ %XMM25,%RSI |
(291) 0x23a234 VPEXTRQ $0x1,%XMM25,%R8 |
(291) 0x23a23b VEXTRACTI32X4 $0x2,%ZMM24,%XMM25 |
(291) 0x23a242 VEXTRACTI32X4 $0x3,%ZMM24,%XMM24 |
(291) 0x23a249 VMOVSD (%R9,%RAX,8),%XMM26 |
(291) 0x23a250 VMOVQ %XMM25,%RBX |
(291) 0x23a256 VMOVQ %XMM24,%R12 |
(291) 0x23a25c VPEXTRQ $0x1,%XMM25,%R14 |
(291) 0x23a263 VPEXTRQ $0x1,%XMM24,%R13 |
(291) 0x23a26a VMOVHPD (%R9,%RDX,8),%XMM26,%XMM26 |
(291) 0x23a271 VMOVSD (%R9,%R12,8),%XMM24 |
(291) 0x23a278 VMOVSD (%R9,%RBX,8),%XMM25 |
(291) 0x23a27f VMOVHPD (%R9,%R13,8),%XMM24,%XMM24 |
(291) 0x23a286 VMOVHPD (%R9,%R14,8),%XMM25,%XMM25 |
(291) 0x23a28d VPMOVSXDQ %YMM23,%ZMM23 |
(291) 0x23a293 VINSERTF32X4 $0x1,%XMM24,%YMM25,%YMM24 |
(291) 0x23a29a VMOVSD (%R9,%RSI,8),%XMM25 |
(291) 0x23a2a1 VMOVHPD (%R9,%R8,8),%XMM25,%XMM25 |
(291) 0x23a2a8 VINSERTF32X4 $0x1,%XMM25,%YMM26,%YMM25 |
(291) 0x23a2af VINSERTF64X4 $0x1,%YMM24,%ZMM25,%ZMM24 |
(291) 0x23a2b6 VDIVPD %ZMM24,%ZMM20,%ZMM20 |
(291) 0x23a2bc VPMULLQ %ZMM16,%ZMM6,%ZMM24 |
(291) 0x23a2c2 VPADDQ %ZMM21,%ZMM24,%ZMM21 |
(291) 0x23a2c8 VPADDQ %ZMM23,%ZMM24,%ZMM23 |
(291) 0x23a2ce VPADDQ %ZMM22,%ZMM24,%ZMM22 |
(291) 0x23a2d4 VEXTRACTI32X4 $0x1,%YMM21,%XMM25 |
(291) 0x23a2db VMOVQ %XMM21,%RDX |
(291) 0x23a2e1 VPEXTRQ $0x1,%XMM21,%RAX |
(291) 0x23a2e8 VEXTRACTI32X4 $0x1,%YMM22,%XMM24 |
(291) 0x23a2ef VMOVQ %XMM25,%RSI |
(291) 0x23a2f5 VPEXTRQ $0x1,%XMM25,%R8 |
(291) 0x23a2fc VEXTRACTI32X4 $0x2,%ZMM21,%XMM25 |
(291) 0x23a303 VEXTRACTI32X4 $0x3,%ZMM21,%XMM21 |
(291) 0x23a30a VMOVSD (%R11,%RDX,8),%XMM26 |
(291) 0x23a311 VPEXTRQ $0x1,%XMM23,%RDX |
(291) 0x23a318 VMOVQ %XMM25,%RBX |
(291) 0x23a31e VMOVQ %XMM21,%R13 |
(291) 0x23a324 VPEXTRQ $0x1,%XMM25,%R14 |
(291) 0x23a32b VPEXTRQ $0x1,%XMM21,%R12 |
(291) 0x23a332 VMOVHPD (%R11,%RAX,8),%XMM26,%XMM26 |
(291) 0x23a339 VMOVQ %XMM23,%RAX |
(291) 0x23a33f VMOVSD (%R11,%R13,8),%XMM21 |
(291) 0x23a346 VMOVSD (%R11,%RBX,8),%XMM25 |
(291) 0x23a34d VMOVHPD (%R11,%R12,8),%XMM21,%XMM21 |
(291) 0x23a354 VMOVHPD (%R11,%R14,8),%XMM25,%XMM25 |
(291) 0x23a35b VINSERTF32X4 $0x1,%XMM21,%YMM25,%YMM21 |
(291) 0x23a362 VMOVSD (%R11,%RSI,8),%XMM25 |
(291) 0x23a369 VMOVHPD (%R11,%R8,8),%XMM25,%XMM25 |
(291) 0x23a370 VINSERTF32X4 $0x1,%XMM25,%YMM26,%YMM25 |
(291) 0x23a377 VMOVSD (%R11,%RAX,8),%XMM26 |
(291) 0x23a37e VMOVQ %XMM22,%RAX |
(291) 0x23a384 VMOVHPD (%R11,%RDX,8),%XMM26,%XMM26 |
(291) 0x23a38b VPEXTRQ $0x1,%XMM22,%RDX |
(291) 0x23a392 VINSERTF64X4 $0x1,%YMM21,%ZMM25,%ZMM21 |
(291) 0x23a399 VEXTRACTI32X4 $0x1,%YMM23,%XMM25 |
(291) 0x23a3a0 VMOVQ %XMM25,%RSI |
(291) 0x23a3a6 VPEXTRQ $0x1,%XMM25,%R8 |
(291) 0x23a3ad VEXTRACTI32X4 $0x2,%ZMM23,%XMM25 |
(291) 0x23a3b4 VEXTRACTI32X4 $0x3,%ZMM23,%XMM23 |
(291) 0x23a3bb VMOVQ %XMM25,%R14 |
(291) 0x23a3c1 VMOVQ %XMM23,%R12 |
(291) 0x23a3c7 VPEXTRQ $0x1,%XMM25,%RBX |
(291) 0x23a3ce VPEXTRQ $0x1,%XMM23,%R13 |
(291) 0x23a3d5 VMOVSD (%R11,%R12,8),%XMM23 |
(291) 0x23a3dc VMOVSD (%R11,%R14,8),%XMM25 |
(291) 0x23a3e3 VMOVHPD (%R11,%R13,8),%XMM23,%XMM23 |
(291) 0x23a3ea VMOVHPD (%R11,%RBX,8),%XMM25,%XMM25 |
(291) 0x23a3f1 VINSERTF32X4 $0x1,%XMM23,%YMM25,%YMM23 |
(291) 0x23a3f8 VMOVSD (%R11,%RSI,8),%XMM25 |
(291) 0x23a3ff VPEXTRQ $0x1,%XMM24,%RSI |
(291) 0x23a406 VMOVHPD (%R11,%R8,8),%XMM25,%XMM25 |
(291) 0x23a40d VMOVQ %XMM24,%R8 |
(291) 0x23a413 VEXTRACTI32X4 $0x2,%ZMM22,%XMM24 |
(291) 0x23a41a VEXTRACTI32X4 $0x3,%ZMM22,%XMM22 |
(291) 0x23a421 VMOVQ %XMM24,%RBX |
(291) 0x23a427 VMOVQ %XMM22,%R12 |
(291) 0x23a42d VPEXTRQ $0x1,%XMM24,%R14 |
(291) 0x23a434 VPEXTRQ $0x1,%XMM22,%R13 |
(291) 0x23a43b VMOVSD (%R11,%R12,8),%XMM22 |
(291) 0x23a442 VMOVSD (%R11,%RBX,8),%XMM24 |
(291) 0x23a449 VMOVHPD (%R11,%R13,8),%XMM22,%XMM22 |
(291) 0x23a450 VMOVHPD (%R11,%R14,8),%XMM24,%XMM24 |
(291) 0x23a457 VINSERTF32X4 $0x1,%XMM25,%YMM26,%YMM25 |
(291) 0x23a45e VINSERTF64X4 $0x1,%YMM23,%ZMM25,%ZMM23 |
(291) 0x23a465 VMOVSD (%R11,%RAX,8),%XMM25 |
(291) 0x23a46c VINSERTF32X4 $0x1,%XMM22,%YMM24,%YMM22 |
(291) 0x23a473 VMOVSD (%R11,%R8,8),%XMM24 |
(291) 0x23a47a VMOVHPD (%R11,%RDX,8),%XMM25,%XMM25 |
(291) 0x23a481 VMOVHPD (%R11,%RSI,8),%XMM24,%XMM24 |
(291) 0x23a488 VSUBPD %ZMM23,%ZMM21,%ZMM23 |
(291) 0x23a48e VINSERTF32X4 $0x1,%XMM24,%YMM25,%YMM24 |
(291) 0x23a495 VINSERTF64X4 $0x1,%YMM22,%ZMM24,%ZMM22 |
(291) 0x23a49c VSUBPD %ZMM21,%ZMM22,%ZMM22 |
(291) 0x23a4a2 VMULPD %ZMM22,%ZMM23,%ZMM24 |
(291) 0x23a4a8 VCMPPD $0x1,%ZMM24,%ZMM10,%K2 |
(291) 0x23a4af KORTESTB %K2,%K2 |
(291) 0x23a4b3 JE 239fc0 |
(291) 0x23a4b9 VBROADCASTSD -0x28ce3(%RIP),%ZMM25 |
(291) 0x23a4c3 MOV -0x48(%RBP),%RAX |
(291) 0x23a4c7 VMOVDQA32 %YMM19,%YMM18{%K1} |
(291) 0x23a4cd VPXORD %XMM19,%XMM19,%XMM19 |
(291) 0x23a4d3 KMOVQ %K2,%K1 |
(291) 0x23a4d8 VANDPD %ZMM12,%ZMM22,%ZMM24 |
(291) 0x23a4de VXORPD %XMM26,%XMM26,%XMM26 |
(291) 0x23a4e4 KMOVQ %K2,%K3 |
(291) 0x23a4e9 VANDPD %ZMM12,%ZMM23,%ZMM23 |
(291) 0x23a4ef VGATHERQPD (%RAX,%ZMM15,8),%ZMM19{%K1} |
(291) 0x23a4f6 VCMPPD $0x6,%ZMM10,%ZMM22,%K1 |
(291) 0x23a4fd VGATHERDPD (%RAX,%YMM18,8),%ZMM26{%K3} |
(291) 0x23a504 VSUBPD %ZMM20,%ZMM25,%ZMM22 |
(291) 0x23a50a VADDPD %ZMM13,%ZMM20,%ZMM25 |
(291) 0x23a510 VMULPD %ZMM24,%ZMM22,%ZMM22 |
(291) 0x23a516 VMULPD %ZMM23,%ZMM25,%ZMM25 |
(291) 0x23a51c VDIVPD %ZMM19,%ZMM22,%ZMM22 |
(291) 0x23a522 VDIVPD %ZMM26,%ZMM25,%ZMM18 |
(291) 0x23a528 VADDPD %ZMM18,%ZMM22,%ZMM18 |
(291) 0x23a52e VMULPD %ZMM18,%ZMM19,%ZMM18 |
(291) 0x23a534 VDIVPD -0x28cee(%RIP){1to8},%ZMM18,%ZMM18 |
(291) 0x23a53e VMINPD %ZMM18,%ZMM23,%ZMM19 |
(291) 0x23a544 VCMPPD $0x3,%ZMM18,%ZMM18,%K3 |
(291) 0x23a54b VMOVAPD %ZMM23,%ZMM19{%K3} |
(291) 0x23a551 VMINPD %ZMM19,%ZMM24,%ZMM18 |
(291) 0x23a557 VCMPPD $0x3,%ZMM19,%ZMM19,%K3 |
(291) 0x23a55e VMOVAPD %ZMM24,%ZMM18{%K3} |
(291) 0x23a564 VXORPD -0x28cd6(%RIP){1to8},%ZMM18,%ZMM24 |
(291) 0x23a56e VMOVAPD %ZMM18,%ZMM24{%K1} |
(291) 0x23a574 JMP 239fc0 |
0x23a579 MOV -0x70(%RBP),%RAX |
0x23a57d MOV -0x34(%RBP),%R12D |
0x23a581 MOV -0x80(%RBP),%R13 |
0x23a585 MOV -0x58(%RBP),%RBX |
0x23a589 MOV -0x50(%RBP),%R14 |
0x23a58d CMP %RAX,-0x78(%RBP) |
0x23a591 JNE 23a5b4 |
0x23a593 MOV -0x38(%RBP),%ESI |
0x23a596 LEA 0x26833(%RIP),%RDI |
0x23a59d VZEROUPPER |
0x23a5a0 CALL 25f750 <@plt_start@+0x540> |
0x23a5a5 ADD $0x78,%RSP |
0x23a5a9 POP %RBX |
0x23a5aa POP %R12 |
0x23a5ac POP %R13 |
0x23a5ae POP %R14 |
0x23a5b0 POP %R15 |
0x23a5b2 POP %RBP |
0x23a5b3 RET |
0x23a5b4 VMOVDDUP -0x28dd4(%RIP),%XMM1 |
0x23a5bc VMOVDDUP -0x28ddc(%RIP),%XMM2 |
0x23a5c4 VMOVSD -0x28dec(%RIP),%XMM3 |
0x23a5cc VMOVSD -0x28ddc(%RIP),%XMM4 |
0x23a5d4 VMOVSD -0x28d8c(%RIP),%XMM5 |
0x23a5dc VMOVDDUP -0x28d4c(%RIP),%XMM6 |
0x23a5e4 VPXOR %XMM0,%XMM0,%XMM0 |
0x23a5e8 JMP 23a62a |
(290) 0x23a5f0 VSUBSD %XMM8,%XMM4,%XMM8 |
(290) 0x23a5f5 IMUL -0x88(%RBP),%RAX |
(290) 0x23a5fd MOV -0x60(%RBP),%RDX |
(290) 0x23a601 MOV -0x58(%RBP),%RBX |
(290) 0x23a605 MOV -0x50(%RBP),%R14 |
(290) 0x23a609 VFMADD213SD %XMM9,%XMM11,%XMM8 |
(290) 0x23a60e VMULSD %XMM7,%XMM8,%XMM7 |
(290) 0x23a612 ADD %R8,%RAX |
(290) 0x23a615 CMP %R15,%RCX |
(290) 0x23a618 VMOVSD %XMM7,(%RDX,%RAX,8) |
(290) 0x23a61d LEA 0x1(%RCX),%RAX |
(290) 0x23a621 MOV %RAX,%RCX |
(290) 0x23a624 JGE 23a593 |
(290) 0x23a62a MOV %RCX,%RAX |
(290) 0x23a62d CQTO |
(290) 0x23a62f MOV %R14,%RSI |
(290) 0x23a632 IDIV %RBX |
(290) 0x23a635 LEA (%R13,%RDX,1),%R10D |
(290) 0x23a63a LEA 0x1(%R13,%RDX,1),%EDX |
(290) 0x23a63f MOVSXD %R10D,%R8 |
(290) 0x23a642 ADD %R12D,%EAX |
(290) 0x23a645 CLTQ |
(290) 0x23a647 IMUL %RAX,%RSI |
(290) 0x23a64b ADD %R8,%RSI |
(290) 0x23a64e VMOVSD (%RDI,%RSI,8),%XMM7 |
(290) 0x23a653 MOVSXD %EDX,%RSI |
(290) 0x23a656 VUCOMISD %XMM7,%XMM0 |
(290) 0x23a65a JBE 23a670 |
(290) 0x23a65c ADD $0x2,%R10D |
(290) 0x23a660 MOV %R8,%R14 |
(290) 0x23a663 JMP 23a67c |
(290) 0x23a670 DEC %R10D |
(290) 0x23a673 MOV %RSI,%R14 |
(290) 0x23a676 MOV %R8,%RSI |
(290) 0x23a679 MOV %R10D,%EDX |
(290) 0x23a67c MOV -0x98(%RBP),%RBX |
(290) 0x23a683 VANDPD %XMM1,%XMM7,%XMM8 |
(290) 0x23a687 VPXOR %XMM11,%XMM11,%XMM11 |
(290) 0x23a68c IMUL %RAX,%RBX |
(290) 0x23a690 ADD %RSI,%RBX |
(290) 0x23a693 VDIVSD (%R9,%RBX,8),%XMM8,%XMM8 |
(290) 0x23a699 MOV -0x90(%RBP),%RBX |
(290) 0x23a6a0 IMUL %RAX,%RBX |
(290) 0x23a6a4 ADD %RBX,%RSI |
(290) 0x23a6a7 VMOVSD (%R11,%RSI,8),%XMM9 |
(290) 0x23a6ad MOVSXD %R10D,%RSI |
(290) 0x23a6b0 ADD %RBX,%RSI |
(290) 0x23a6b3 ADD %R14,%RBX |
(290) 0x23a6b6 VMOVSD (%R11,%RBX,8),%XMM10 |
(290) 0x23a6bc VSUBSD (%R11,%RSI,8),%XMM9,%XMM12 |
(290) 0x23a6c2 VSUBSD %XMM9,%XMM10,%XMM10 |
(290) 0x23a6c7 VMULSD %XMM10,%XMM12,%XMM13 |
(290) 0x23a6cc VUCOMISD %XMM11,%XMM13 |
(290) 0x23a6d1 JBE 23a5f0 |
(290) 0x23a6d7 MOV -0x48(%RBP),%RSI |
(290) 0x23a6db VADDSD %XMM4,%XMM8,%XMM15 |
(290) 0x23a6df VANDPD %XMM2,%XMM12,%XMM12 |
(290) 0x23a6e3 MOVSXD %EDX,%RDX |
(290) 0x23a6e6 VSUBSD %XMM8,%XMM3,%XMM14 |
(290) 0x23a6eb VANDPD %XMM2,%XMM10,%XMM13 |
(290) 0x23a6ef VMULSD %XMM12,%XMM15,%XMM15 |
(290) 0x23a6f4 VMULSD %XMM13,%XMM14,%XMM14 |
(290) 0x23a6f9 VMOVSD (%RSI,%R8,8),%XMM11 |
(290) 0x23a6ff VDIVSD (%RSI,%RDX,8),%XMM15,%XMM15 |
(290) 0x23a704 VDIVSD %XMM11,%XMM14,%XMM14 |
(290) 0x23a709 VADDSD %XMM15,%XMM14,%XMM14 |
(290) 0x23a70e VMULSD %XMM14,%XMM11,%XMM11 |
(290) 0x23a713 VDIVSD %XMM5,%XMM11,%XMM11 |
(290) 0x23a717 VCMPSD $0x3,%XMM11,%XMM11,%K1 |
(290) 0x23a71e VMINSD %XMM11,%XMM12,%XMM11 |
(290) 0x23a723 VMOVSD %XMM12,%XMM11,%XMM11{%K1} |
(290) 0x23a729 VCMPSD $0x3,%XMM11,%XMM11,%K1 |
(290) 0x23a730 VMINSD %XMM11,%XMM13,%XMM12 |
(290) 0x23a735 VMOVSD %XMM13,%XMM12,%XMM12{%K1} |
(290) 0x23a73b VCMPSD $0x6,%XMM0,%XMM10,%K1 |
(290) 0x23a742 VXORPD %XMM6,%XMM12,%XMM11 |
(290) 0x23a746 VMOVSD %XMM12,%XMM11,%XMM11{%K1} |
(290) 0x23a74c JMP 23a5f0 |
Path / |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 124 |
nb uops | 134 |
loop length | 540 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 3 |
used zmm registers | 9 |
nb stack references | 21 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 5.00 | 19.00 | 19.00 | 19.00 | 1.00 | 9.00 | 9.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 5.00 | 19.33 | 19.33 | 19.33 | 1.00 | 9.00 | 9.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.33 |
Dispatch | 19.33 |
Overall L1 | 22.33 |
all | 10% |
load | 7% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 23% |
all | 11% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 4% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 13% |
load | 15% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23a5a5 <.omp_outlined..12+0x795> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x3,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23a5a5 <.omp_outlined..12+0x795> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %RBX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xa0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x26f2f(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x68(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23a593 <.omp_outlined..12+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 23a5b4 <.omp_outlined..12+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VBROADCASTSD -0x28741(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x281c9(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VBROADCASTSD -0x2876b(%RIP),%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ -0x28755(%RIP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ %RBX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R10,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R13D,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R8,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %YMM11,%YMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R12D,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 23a003 <.omp_outlined..12+0x1f3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x34(%RBP),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x78(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JNE 23a5b4 <.omp_outlined..12+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x26833(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28dd4(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28ddc(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28dec(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28ddc(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28d8c(%RIP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28d4c(%RIP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 23a62a <.omp_outlined..12+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Source file and lines | advec_mom.cpp:108-139 |
Module | exec |
nb instructions | 124 |
nb uops | 134 |
loop length | 540 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 3 |
used zmm registers | 9 |
nb stack references | 21 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 5.00 | 19.00 | 19.00 | 19.00 | 1.00 | 9.00 | 9.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 5.00 | 19.33 | 19.33 | 19.33 | 1.00 | 9.00 | 9.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.33 |
Dispatch | 19.33 |
Overall L1 | 22.33 |
all | 10% |
load | 7% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 23% |
all | 11% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 10% |
load | 4% |
store | 0% |
mul | 0% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 13% |
load | 15% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23a5a5 <.omp_outlined..12+0x795> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x3,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23a5a5 <.omp_outlined..12+0x795> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %RBX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xa0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x26f2f(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x68(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23a593 <.omp_outlined..12+0x783> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 23a5b4 <.omp_outlined..12+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VBROADCASTSD -0x28741(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x281c9(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VBROADCASTSD -0x2876b(%RIP),%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ -0x28755(%RIP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ %RBX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R10,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R13D,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R8,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %YMM11,%YMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R12D,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 23a003 <.omp_outlined..12+0x1f3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x34(%RBP),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x78(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JNE 23a5b4 <.omp_outlined..12+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x26833(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28dd4(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28ddc(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28dec(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28ddc(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x28d8c(%RIP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDDUP -0x28d4c(%RIP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 23a62a <.omp_outlined..12+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..12#0x239e10– | 4.31 | 2.2 |
○Loop 291 - advec_mom.cpp:109-139 - exec | 4.31 | 2.2 |
○Loop 290 - advec_mom.cpp:109-139 - exec | 0 | 0 |