Function: .omp_outlined..18 | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.04% |
---|
Function: .omp_outlined..18 | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.04% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/update_tile_halo_kernel.cpp: 154 - 156 |
-------------------------------------------------------------------------------- |
154: #pragma omp parallel for simd |
155: for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { |
156: for (int j = 0; j < depth; ++j) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x23b240 PUSH %RBP |
0x23b241 MOV %RSP,%RBP |
0x23b244 PUSH %R15 |
0x23b246 PUSH %R14 |
0x23b248 PUSH %R13 |
0x23b24a PUSH %R12 |
0x23b24c PUSH %RBX |
0x23b24d SUB $0x98,%RSP |
0x23b254 MOV (%RCX),%R15D |
0x23b257 MOV (%RDX),%R14D |
0x23b25a ADD $0x4,%R15D |
0x23b25e SUB %R14D,%R15D |
0x23b261 JLE 23b9e4 |
0x23b267 MOV (%R9),%R13D |
0x23b26a MOV (%R8),%EBX |
0x23b26d LEA 0x1(%RBX),%R12D |
0x23b271 ADD $0x3,%R13D |
0x23b275 CMP %R12D,%R13D |
0x23b278 JLE 23b9e4 |
0x23b27e SUB %R12D,%R13D |
0x23b281 MOV (%RDI),%ESI |
0x23b283 MOVQ $0,-0x60(%RBP) |
0x23b28b MOVQ $0x1,-0xb8(%RBP) |
0x23b296 MOVL $0,-0x38(%RBP) |
0x23b29d IMUL %R13,%R15 |
0x23b2a1 DEC %R15 |
0x23b2a4 MOV %R15,-0x30(%RBP) |
0x23b2a8 SUB $0x8,%RSP |
0x23b2ac LEA -0x38(%RBP),%RCX |
0x23b2b0 LEA -0xb8(%RBP),%RAX |
0x23b2b7 LEA 0x25bd2(%RIP),%RDI |
0x23b2be LEA -0x60(%RBP),%R8 |
0x23b2c2 LEA -0x30(%RBP),%R9 |
0x23b2c6 MOV %ESI,-0x34(%RBP) |
0x23b2c9 MOV $0x22,%EDX |
0x23b2ce PUSH $0x1 |
0x23b2d0 PUSH $0x1 |
0x23b2d2 PUSH %RAX |
0x23b2d3 CALL 25f740 <@plt_start@+0x530> |
0x23b2d8 ADD $0x20,%RSP |
0x23b2dc MOV -0x30(%RBP),%RAX |
0x23b2e0 MOV -0x60(%RBP),%RCX |
0x23b2e4 CMP %R15,%RAX |
0x23b2e7 CMOVL %RAX,%R15 |
0x23b2eb MOV %R15,-0x30(%RBP) |
0x23b2ef CMP %R15,%RCX |
0x23b2f2 JG 23b9d2 |
0x23b2f8 MOV 0x20(%RBP),%RSI |
0x23b2fc MOV 0x18(%RBP),%RDI |
0x23b300 MOV 0x10(%RBP),%R9 |
0x23b304 MOV 0x30(%RBP),%RAX |
0x23b308 MOV 0x28(%RBP),%RDX |
0x23b30c MOV %R12,-0x50(%RBP) |
0x23b310 MOV %RBX,-0x48(%RBP) |
0x23b314 MOV (%RDI),%R12 |
0x23b317 MOV 0x10(%RDI),%R8 |
0x23b31b MOV (%RSI),%RDI |
0x23b31e MOV 0x10(%RSI),%R10 |
0x23b322 MOV (%R9),%RSI |
0x23b325 MOV 0x10(%R9),%R9 |
0x23b329 MOV (%RAX),%R11 |
0x23b32c MOV 0x10(%RAX),%RBX |
0x23b330 MOV 0x10(%RDX),%RAX |
0x23b334 MOV %R9,-0x58(%RBP) |
0x23b338 MOV (%RDX),%R9 |
0x23b33b MOV %RAX,-0x40(%RBP) |
0x23b33f MOV %R15,%RAX |
0x23b342 SUB %RCX,%RAX |
0x23b345 MOV %R12,-0xb0(%RBP) |
0x23b34c MOV %RDI,-0xa8(%RBP) |
0x23b353 MOV %RSI,-0xa0(%RBP) |
0x23b35a MOV %R11,-0x98(%RBP) |
0x23b361 INC %RAX |
0x23b364 MOV %R9,-0x90(%RBP) |
0x23b36b CMP $0x4,%RAX |
0x23b36f JB 23b89b |
0x23b375 MOV %R9,%RDX |
0x23b378 MOV %RAX,%R9 |
0x23b37b MOV %RAX,-0x70(%RBP) |
0x23b37f MOV -0x50(%RBP),%RAX |
0x23b383 VBROADCASTSD -0x29b74(%RIP),%YMM12 |
0x23b38c VPBROADCASTQ %RCX,%YMM0 |
0x23b392 VPADDQ -0x2999a(%RIP),%YMM0,%YMM0 |
0x23b39a VPBROADCASTQ -0x29be3(%RIP),%YMM13 |
0x23b3a3 VPBROADCASTQ %R11,%YMM8 |
0x23b3a9 VPBROADCASTQ %RDI,%YMM5 |
0x23b3af MOV -0x40(%RBP),%R11 |
0x23b3b3 AND $-0x4,%R9 |
0x23b3b7 VPBROADCASTQ %R13,%YMM1 |
0x23b3bd VPBROADCASTD %R14D,%XMM2 |
0x23b3c3 VPBROADCASTQ %R12,%YMM4 |
0x23b3c9 VPBROADCASTQ %RSI,%YMM7 |
0x23b3cf VPBROADCASTQ %RDX,%YMM9 |
0x23b3d5 VPCMPEQD %XMM11,%XMM11,%XMM11 |
0x23b3da ADD %R9,%RCX |
0x23b3dd VEXTRACTI128 $0x1,%YMM1,%XMM10 |
0x23b3e3 MOV %R9,-0x68(%RBP) |
0x23b3e7 VPBROADCASTD %EAX,%XMM3 |
0x23b3ed MOV %RDI,%RAX |
0x23b3f0 MOV -0x48(%RBP),%RAX |
0x23b3f4 MOV -0x58(%RBP),%RDI |
0x23b3f8 VPBROADCASTD %EAX,%XMM6 |
0x23b3fe XCHG %AX,%AX |
(325) 0x23b400 VEXTRACTI128 $0x1,%YMM0,%XMM14 |
(325) 0x23b406 VPEXTRQ $0x1,%XMM10,%RSI |
(325) 0x23b40c VMOVQ %XMM1,%R12 |
(325) 0x23b411 VPEXTRQ $0x1,%XMM14,%RAX |
(325) 0x23b417 CQTO |
(325) 0x23b419 IDIV %RSI |
(325) 0x23b41c VMOVQ %XMM14,%RDX |
(325) 0x23b421 VMOVQ %XMM10,%RSI |
(325) 0x23b426 VMOVQ %RAX,%XMM14 |
(325) 0x23b42b MOV %RDX,%RAX |
(325) 0x23b42e CQTO |
(325) 0x23b430 IDIV %RSI |
(325) 0x23b433 VPEXTRQ $0x1,%XMM1,%RSI |
(325) 0x23b439 VMOVQ %RAX,%XMM15 |
(325) 0x23b43e VPEXTRQ $0x1,%XMM0,%RAX |
(325) 0x23b444 CQTO |
(325) 0x23b446 VPUNPCKLQDQ %XMM14,%XMM15,%XMM14 |
(325) 0x23b44b IDIV %RSI |
(325) 0x23b44e MOV %RAX,%RSI |
(325) 0x23b451 VMOVQ %XMM0,%RAX |
(325) 0x23b456 VMOVQ %RSI,%XMM15 |
(325) 0x23b45b CQTO |
(325) 0x23b45d IDIV %R12 |
(325) 0x23b460 ADD $-0x4,%R9 |
(325) 0x23b464 VMOVQ %RAX,%XMM16 |
(325) 0x23b46a VPUNPCKLQDQ %XMM15,%XMM16,%XMM15 |
(325) 0x23b470 VINSERTI128 $0x1,%XMM14,%YMM15,%YMM14 |
(325) 0x23b476 VPMOVQD %YMM14,%XMM15 |
(325) 0x23b47c VPMULLQ %YMM1,%YMM14,%YMM14 |
(325) 0x23b482 VPSUBQ %YMM14,%YMM0,%YMM14 |
(325) 0x23b487 VPADDQ %YMM0,%YMM13,%YMM0 |
(325) 0x23b48b VPMOVQD %YMM14,%XMM17 |
(325) 0x23b491 VPADDD %XMM2,%XMM15,%XMM15 |
(325) 0x23b495 VPADDD %XMM11,%XMM15,%XMM16 |
(325) 0x23b49b VPMOVSXDQ %XMM15,%YMM15 |
(325) 0x23b4a0 VPMOVSXDQ %XMM16,%YMM16 |
(325) 0x23b4a6 VPMULLQ %YMM15,%YMM4,%YMM24 |
(325) 0x23b4ac VPMULLQ %YMM15,%YMM5,%YMM30 |
(325) 0x23b4b2 VPADDD %XMM17,%XMM3,%XMM14 |
(325) 0x23b4b8 VPMULLQ %YMM16,%YMM4,%YMM18 |
(325) 0x23b4be VPMULLQ %YMM16,%YMM5,%YMM21 |
(325) 0x23b4c4 VPADDD %XMM17,%XMM6,%XMM17 |
(325) 0x23b4ca VPMULLQ %YMM16,%YMM8,%YMM16 |
(325) 0x23b4d0 VPMOVSXDQ %XMM14,%YMM14 |
(325) 0x23b4d5 VPMOVSXDQ %XMM17,%YMM17 |
(325) 0x23b4db VPADDQ %YMM14,%YMM18,%YMM19 |
(325) 0x23b4e1 VPADDQ %YMM14,%YMM21,%YMM22 |
(325) 0x23b4e7 VPADDQ %YMM14,%YMM24,%YMM25 |
(325) 0x23b4ed VPADDQ %YMM14,%YMM30,%YMM28 |
(325) 0x23b4f3 VPADDQ %YMM17,%YMM18,%YMM18 |
(325) 0x23b4f9 VPADDQ %YMM17,%YMM21,%YMM21 |
(325) 0x23b4ff VPADDQ %YMM17,%YMM24,%YMM24 |
(325) 0x23b505 VPADDQ %YMM17,%YMM30,%YMM17 |
(325) 0x23b50b VPADDQ %YMM14,%YMM16,%YMM16 |
(325) 0x23b511 VMOVQ %XMM19,%RAX |
(325) 0x23b517 VPEXTRQ $0x1,%XMM19,%RSI |
(325) 0x23b51e VEXTRACTI32X4 $0x1,%YMM19,%XMM19 |
(325) 0x23b525 VEXTRACTI32X4 $0x1,%YMM25,%XMM26 |
(325) 0x23b52c VMOVSD (%R8,%RAX,8),%XMM20 |
(325) 0x23b533 VMOVQ %XMM19,%R12 |
(325) 0x23b539 VPEXTRQ $0x1,%XMM19,%RDX |
(325) 0x23b540 VMOVSD (%R8,%R12,8),%XMM19 |
(325) 0x23b547 VMOVHPD (%R8,%RSI,8),%XMM20,%XMM20 |
(325) 0x23b54e VMOVQ %XMM22,%RSI |
(325) 0x23b554 VPEXTRQ $0x1,%XMM22,%R12 |
(325) 0x23b55b VEXTRACTI32X4 $0x1,%YMM22,%XMM22 |
(325) 0x23b562 VMOVQ %XMM22,%RAX |
(325) 0x23b568 VMOVHPD (%R8,%RDX,8),%XMM19,%XMM19 |
(325) 0x23b56f VMOVQ %XMM28,%RDX |
(325) 0x23b575 VMOVSD (%R10,%RAX,8),%XMM23 |
(325) 0x23b57c VPEXTRQ $0x1,%XMM22,%RAX |
(325) 0x23b583 VMOVSD (%R10,%RSI,8),%XMM22 |
(325) 0x23b58a VPEXTRQ $0x1,%XMM25,%RSI |
(325) 0x23b591 VMOVHPD (%R10,%R12,8),%XMM22,%XMM22 |
(325) 0x23b598 VMOVQ %XMM26,%R12 |
(325) 0x23b59e VMOVHPD (%R10,%RAX,8),%XMM23,%XMM23 |
(325) 0x23b5a5 VMOVQ %XMM18,%RAX |
(325) 0x23b5ab VMOVSD (%R8,%R12,8),%XMM27 |
(325) 0x23b5b2 VMOVQ %XMM25,%R12 |
(325) 0x23b5b8 VMOVSD (%R8,%R12,8),%XMM25 |
(325) 0x23b5bf VPEXTRQ $0x1,%XMM26,%R12 |
(325) 0x23b5c6 VMOVHPD (%R8,%RSI,8),%XMM25,%XMM25 |
(325) 0x23b5cd VPEXTRQ $0x1,%XMM28,%RSI |
(325) 0x23b5d4 VEXTRACTI32X4 $0x1,%YMM28,%XMM28 |
(325) 0x23b5db VMOVHPD (%R8,%R12,8),%XMM27,%XMM27 |
(325) 0x23b5e2 VMOVQ %XMM28,%R12 |
(325) 0x23b5e8 VMOVSD (%R10,%R12,8),%XMM29 |
(325) 0x23b5ef VPEXTRQ $0x1,%XMM28,%R12 |
(325) 0x23b5f6 VMOVHPD (%R10,%R12,8),%XMM29,%XMM28 |
(325) 0x23b5fd VMOVSD (%R10,%RDX,8),%XMM29 |
(325) 0x23b604 VPEXTRQ $0x1,%XMM18,%RDX |
(325) 0x23b60b VEXTRACTI32X4 $0x1,%YMM18,%XMM18 |
(325) 0x23b612 VINSERTF32X4 $0x1,%XMM19,%YMM20,%YMM19 |
(325) 0x23b619 VMOVSD (%R8,%RAX,8),%XMM20 |
(325) 0x23b620 VMOVQ %XMM21,%RAX |
(325) 0x23b626 VMOVHPD (%R10,%RSI,8),%XMM29,%XMM29 |
(325) 0x23b62d VMOVQ %XMM18,%RSI |
(325) 0x23b633 VPEXTRQ $0x1,%XMM18,%R12 |
(325) 0x23b63a VMOVHPD (%R8,%RDX,8),%XMM20,%XMM20 |
(325) 0x23b641 VPEXTRQ $0x1,%XMM21,%RDX |
(325) 0x23b648 VEXTRACTI32X4 $0x1,%YMM21,%XMM21 |
(325) 0x23b64f VMOVSD (%R8,%RSI,8),%XMM18 |
(325) 0x23b656 VMOVQ %XMM21,%RSI |
(325) 0x23b65c VINSERTF32X4 $0x1,%XMM23,%YMM22,%YMM22 |
(325) 0x23b663 VMOVHPD (%R8,%R12,8),%XMM18,%XMM18 |
(325) 0x23b66a VPEXTRQ $0x1,%XMM21,%R12 |
(325) 0x23b671 VMOVSD (%R10,%RSI,8),%XMM21 |
(325) 0x23b678 VMOVHPD (%R10,%R12,8),%XMM21,%XMM21 |
(325) 0x23b67f VINSERTF32X4 $0x1,%XMM27,%YMM25,%YMM23 |
(325) 0x23b686 VMOVSD (%R10,%RAX,8),%XMM27 |
(325) 0x23b68d VMOVQ %XMM24,%RAX |
(325) 0x23b693 VMOVHPD (%R10,%RDX,8),%XMM27,%XMM27 |
(325) 0x23b69a VPEXTRQ $0x1,%XMM24,%RDX |
(325) 0x23b6a1 VINSERTF32X4 $0x1,%XMM28,%YMM29,%YMM25 |
(325) 0x23b6a8 VMULPD %YMM25,%YMM23,%YMM23 |
(325) 0x23b6ae VINSERTF32X4 $0x1,%XMM18,%YMM20,%YMM18 |
(325) 0x23b6b5 VEXTRACTI32X4 $0x1,%YMM24,%XMM20 |
(325) 0x23b6bc VMOVSD (%R8,%RAX,8),%XMM24 |
(325) 0x23b6c3 VMOVQ %XMM17,%RAX |
(325) 0x23b6c9 VMOVQ %XMM20,%R12 |
(325) 0x23b6cf VPEXTRQ $0x1,%XMM20,%RSI |
(325) 0x23b6d6 VMOVHPD (%R8,%RDX,8),%XMM24,%XMM24 |
(325) 0x23b6dd VPEXTRQ $0x1,%XMM17,%RDX |
(325) 0x23b6e4 VEXTRACTI32X4 $0x1,%YMM17,%XMM17 |
(325) 0x23b6eb VMOVSD (%R8,%R12,8),%XMM20 |
(325) 0x23b6f2 VPEXTRQ $0x1,%XMM17,%R12 |
(325) 0x23b6f9 VFMADD231PD %YMM22,%YMM19,%YMM23 |
(325) 0x23b6ff VINSERTF32X4 $0x1,%XMM21,%YMM27,%YMM21 |
(325) 0x23b706 VMOVHPD (%R8,%RSI,8),%XMM20,%XMM20 |
(325) 0x23b70d VMOVQ %XMM17,%RSI |
(325) 0x23b713 VPMULLQ %YMM15,%YMM8,%YMM19 |
(325) 0x23b719 VMOVSD (%R10,%RSI,8),%XMM17 |
(325) 0x23b720 VMOVHPD (%R10,%R12,8),%XMM17,%XMM17 |
(325) 0x23b727 VPADDQ %YMM14,%YMM19,%YMM19 |
(325) 0x23b72d VFMADD213PD %YMM23,%YMM18,%YMM21 |
(325) 0x23b733 VMOVSD (%R10,%RAX,8),%XMM18 |
(325) 0x23b73a VMOVHPD (%R10,%RDX,8),%XMM18,%XMM18 |
(325) 0x23b741 VINSERTF32X4 $0x1,%XMM20,%YMM24,%YMM20 |
(325) 0x23b748 VINSERTF32X4 $0x1,%XMM17,%YMM18,%YMM17 |
(325) 0x23b74f VPMULLQ %YMM15,%YMM7,%YMM18 |
(325) 0x23b755 VPMULLQ %YMM15,%YMM9,%YMM15 |
(325) 0x23b75b VFMADD213PD %YMM21,%YMM20,%YMM17 |
(325) 0x23b761 VPADDQ %YMM14,%YMM18,%YMM18 |
(325) 0x23b767 VPADDQ %YMM14,%YMM15,%YMM14 |
(325) 0x23b76c VMOVQ %XMM18,%RAX |
(325) 0x23b772 VPEXTRQ $0x1,%XMM18,%RDX |
(325) 0x23b779 VEXTRACTI32X4 $0x1,%YMM18,%XMM18 |
(325) 0x23b780 VMOVQ %XMM18,%RSI |
(325) 0x23b786 VPEXTRQ $0x1,%XMM18,%R12 |
(325) 0x23b78d VMULPD %YMM12,%YMM17,%YMM17 |
(325) 0x23b793 VEXTRACTF32X4 $0x1,%YMM17,%XMM18 |
(325) 0x23b79a VMOVLPD %XMM17,(%RDI,%RAX,8) |
(325) 0x23b7a1 VMOVHPD %XMM17,(%RDI,%RDX,8) |
(325) 0x23b7a8 VPEXTRQ $0x1,%XMM16,%RAX |
(325) 0x23b7af VMOVQ %XMM16,%RDX |
(325) 0x23b7b5 VEXTRACTI32X4 $0x1,%YMM16,%XMM16 |
(325) 0x23b7bc VMOVLPD %XMM18,(%RDI,%RSI,8) |
(325) 0x23b7c3 VMOVHPD %XMM18,(%RDI,%R12,8) |
(325) 0x23b7ca VMOVQ %XMM16,%RSI |
(325) 0x23b7d0 VPEXTRQ $0x1,%XMM16,%R12 |
(325) 0x23b7d7 VMOVSD (%RBX,%RDX,8),%XMM18 |
(325) 0x23b7de VMOVSD (%RBX,%RSI,8),%XMM16 |
(325) 0x23b7e5 VMOVQ %XMM19,%RDX |
(325) 0x23b7eb VMOVHPD (%RBX,%RAX,8),%XMM18,%XMM18 |
(325) 0x23b7f2 VMOVHPD (%RBX,%R12,8),%XMM16,%XMM16 |
(325) 0x23b7f9 VPEXTRQ $0x1,%XMM19,%RAX |
(325) 0x23b800 VEXTRACTI32X4 $0x1,%YMM19,%XMM19 |
(325) 0x23b807 VMOVQ %XMM19,%R12 |
(325) 0x23b80d VPEXTRQ $0x1,%XMM19,%RSI |
(325) 0x23b814 VMOVSD (%RBX,%RDX,8),%XMM19 |
(325) 0x23b81b VMOVQ %XMM14,%RDX |
(325) 0x23b820 VMOVHPD (%RBX,%RAX,8),%XMM19,%XMM19 |
(325) 0x23b827 VPEXTRQ $0x1,%XMM14,%RAX |
(325) 0x23b82d VEXTRACTI128 $0x1,%YMM14,%XMM14 |
(325) 0x23b833 VINSERTF32X4 $0x1,%XMM16,%YMM18,%YMM16 |
(325) 0x23b83a VMOVSD (%RBX,%R12,8),%XMM18 |
(325) 0x23b841 VPEXTRQ $0x1,%XMM14,%R12 |
(325) 0x23b847 VMOVHPD (%RBX,%RSI,8),%XMM18,%XMM18 |
(325) 0x23b84e VMOVQ %XMM14,%RSI |
(325) 0x23b853 VSUBPD %YMM16,%YMM17,%YMM16 |
(325) 0x23b859 VINSERTF32X4 $0x1,%XMM18,%YMM19,%YMM17 |
(325) 0x23b860 VADDPD %YMM17,%YMM16,%YMM16 |
(325) 0x23b866 VEXTRACTF32X4 $0x1,%YMM16,%XMM14 |
(325) 0x23b86d VMOVLPD %XMM16,(%R11,%RDX,8) |
(325) 0x23b874 VMOVHPD %XMM16,(%R11,%RAX,8) |
(325) 0x23b87b VMOVLPD %XMM14,(%R11,%RSI,8) |
(325) 0x23b881 VMOVHPD %XMM14,(%R11,%R12,8) |
(325) 0x23b887 JNE 23b400 |
0x23b88d MOV -0x68(%RBP),%RAX |
0x23b891 CMP %RAX,-0x70(%RBP) |
0x23b895 JE 23b9d2 |
0x23b89b VMOVSD -0x2a08b(%RIP),%XMM0 |
0x23b8a3 MOV %R14,-0x88(%RBP) |
0x23b8aa MOV %R13,-0x80(%RBP) |
0x23b8ae MOV %R15,-0x78(%RBP) |
0x23b8b2 NOPW %CS:(%RAX,%RAX,1) |
(324) 0x23b8c0 MOV %RCX,%RAX |
(324) 0x23b8c3 CQTO |
(324) 0x23b8c5 IDIV %R13 |
(324) 0x23b8c8 MOV -0xb0(%RBP),%RDI |
(324) 0x23b8cf MOV -0x50(%RBP),%RSI |
(324) 0x23b8d3 MOV -0xa8(%RBP),%R15 |
(324) 0x23b8da ADD %EDX,%ESI |
(324) 0x23b8dc MOV %R15,%R13 |
(324) 0x23b8df ADD -0x48(%RBP),%EDX |
(324) 0x23b8e2 MOVSXD %ESI,%RSI |
(324) 0x23b8e5 MOVSXD %EDX,%RDX |
(324) 0x23b8e8 LEA (%R14,%RAX,1),%R12D |
(324) 0x23b8ec LEA -0x1(%R14,%RAX,1),%EAX |
(324) 0x23b8f1 MOV %RDI,%R14 |
(324) 0x23b8f4 MOVSXD %EAX,%R9 |
(324) 0x23b8f7 IMUL %R9,%R14 |
(324) 0x23b8fb IMUL %R9,%R13 |
(324) 0x23b8ff LEA (%R14,%RSI,1),%RAX |
(324) 0x23b903 ADD %RDX,%R14 |
(324) 0x23b906 VMOVSD (%R8,%RAX,8),%XMM1 |
(324) 0x23b90c MOVSXD %R12D,%RAX |
(324) 0x23b90f MOV %RDI,%R12 |
(324) 0x23b912 IMUL %RAX,%R12 |
(324) 0x23b916 LEA (%R12,%RSI,1),%R11 |
(324) 0x23b91a ADD %RDX,%R12 |
(324) 0x23b91d VMOVSD (%R8,%R11,8),%XMM2 |
(324) 0x23b923 MOV %R15,%R11 |
(324) 0x23b926 IMUL %RAX,%R11 |
(324) 0x23b92a MOV -0x78(%RBP),%R15 |
(324) 0x23b92e LEA (%R11,%RSI,1),%RDI |
(324) 0x23b932 ADD %RDX,%R11 |
(324) 0x23b935 VMULSD (%R10,%RDI,8),%XMM2,%XMM2 |
(324) 0x23b93b LEA (%R13,%RSI,1),%RDI |
(324) 0x23b940 ADD %RDX,%R13 |
(324) 0x23b943 VFMADD231SD (%R10,%RDI,8),%XMM1,%XMM2 |
(324) 0x23b949 VMOVSD (%R8,%R14,8),%XMM1 |
(324) 0x23b94f MOV -0x58(%RBP),%RDI |
(324) 0x23b953 MOV -0x88(%RBP),%R14 |
(324) 0x23b95a VFMADD132SD (%R10,%R13,8),%XMM2,%XMM1 |
(324) 0x23b960 VMOVSD (%R8,%R12,8),%XMM2 |
(324) 0x23b966 MOV -0xa0(%RBP),%R12 |
(324) 0x23b96d MOV -0x80(%RBP),%R13 |
(324) 0x23b971 MOV %R12,%RDX |
(324) 0x23b974 IMUL %RAX,%RDX |
(324) 0x23b978 ADD %RSI,%RDX |
(324) 0x23b97b VFMADD132SD (%R10,%R11,8),%XMM1,%XMM2 |
(324) 0x23b981 MOV -0x40(%RBP),%R11 |
(324) 0x23b985 VMULSD %XMM0,%XMM2,%XMM1 |
(324) 0x23b989 VMOVSD %XMM1,(%RDI,%RDX,8) |
(324) 0x23b98e MOV -0x98(%RBP),%RDX |
(324) 0x23b995 IMUL %RDX,%R9 |
(324) 0x23b999 MOV %RDX,%RDX |
(324) 0x23b99c IMUL %RAX,%RDX |
(324) 0x23b9a0 ADD %RSI,%R9 |
(324) 0x23b9a3 ADD %RSI,%RDX |
(324) 0x23b9a6 VSUBSD (%RBX,%R9,8),%XMM1,%XMM1 |
(324) 0x23b9ac MOV -0x90(%RBP),%R9 |
(324) 0x23b9b3 VADDSD (%RBX,%RDX,8),%XMM1,%XMM1 |
(324) 0x23b9b8 IMUL %R9,%RAX |
(324) 0x23b9bc ADD %RSI,%RAX |
(324) 0x23b9bf VMOVSD %XMM1,(%R11,%RAX,8) |
(324) 0x23b9c5 CMP %R15,%RCX |
(324) 0x23b9c8 LEA 0x1(%RCX),%RCX |
(324) 0x23b9cc JL 23b8c0 |
0x23b9d2 MOV -0x34(%RBP),%ESI |
0x23b9d5 LEA 0x254cc(%RIP),%RDI |
0x23b9dc VZEROUPPER |
0x23b9df CALL 25f750 <@plt_start@+0x540> |
0x23b9e4 ADD $0x98,%RSP |
0x23b9eb POP %RBX |
0x23b9ec POP %R12 |
0x23b9ee POP %R13 |
0x23b9f0 POP %R14 |
0x23b9f2 POP %R15 |
0x23b9f4 POP %RBP |
0x23b9f5 RET |
0x23b9f6 NOPW %CS:(%RAX,%RAX,1) |
0x24d175 NOPW %CS:(%RAX,%RAX,1) |
0x25371f NOP |
0x2537ab NOPL (%RAX,%RAX,1) |
0x2537c1 NOPW %CS:(%RAX,%RAX,1) |
0x253936 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 129 |
nb uops | 133 |
loop length | 587 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 22.17 cycles |
front end | 22.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 8.00 | 8.00 | 4.00 | 18.33 | 18.33 | 18.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.00 | 8.00 | 8.00 | 8.00 | 4.00 | 18.33 | 18.33 | 18.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.17 |
Dispatch | 18.33 |
Overall L1 | 22.17 |
all | 7% |
load | 10% |
store | 0% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 12% |
load | 14% |
store | 11% |
mul | 12% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 14% |
store | 11% |
mul | 12% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b9e4 <.omp_outlined..18+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RBX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b9e4 <.omp_outlined..18+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x38(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xb8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x25bd2(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x60(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23b9d2 <.omp_outlined..18+0x792> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 23b89b <.omp_outlined..18+0x65b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VBROADCASTSD -0x29b74(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x2999a(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x29be3(%RIP),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
AND $-0x4,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R14D,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R12,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
ADD %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%XMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23b9d2 <.omp_outlined..18+0x792> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD -0x2a08b(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x254cc(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 129 |
nb uops | 133 |
loop length | 587 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 22.17 cycles |
front end | 22.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 8.00 | 8.00 | 4.00 | 18.33 | 18.33 | 18.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
cycles | 8.00 | 8.00 | 8.00 | 8.00 | 4.00 | 18.33 | 18.33 | 18.33 | 1.00 | 10.00 | 1.00 | 1.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 22.17 |
Dispatch | 18.33 |
Overall L1 | 22.17 |
all | 7% |
load | 10% |
store | 0% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 7% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 12% |
load | 14% |
store | 11% |
mul | 12% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 14% |
store | 11% |
mul | 12% |
add-sub | 14% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b9e4 <.omp_outlined..18+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RBX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b9e4 <.omp_outlined..18+0x7a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x38(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0xb8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x25bd2(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x60(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23b9d2 <.omp_outlined..18+0x792> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R9),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R9),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x4,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 23b89b <.omp_outlined..18+0x65b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R9,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VBROADCASTSD -0x29b74(%RIP),%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %RCX,%YMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x2999a(%RIP),%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x29be3(%RIP),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
AND $-0x4,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R13,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R14D,%XMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R12,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%YMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
ADD %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI128 $0x1,%YMM1,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%XMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23b9d2 <.omp_outlined..18+0x792> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD -0x2a08b(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x254cc(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x98,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..18– | 3.04 | 1.55 |
○Loop 325 - advec_mom.cpp:168-172 - exec | 3.04 | 1.55 |
○Loop 324 - advec_mom.cpp:168-172 - exec | 0 | 0 |