Function: .omp_outlined.#0x237520 | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.97% |
---|
Function: .omp_outlined.#0x237520 | Module: exec | Source: advec_mom.cpp:44-48 [...] | Coverage: 2.97% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 44 - 48 |
-------------------------------------------------------------------------------- |
44: #pragma omp parallel for simd collapse(2) |
45: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
46: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
47: post_vol(i, j) = volume(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
48: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x237520 PUSH %RBP |
0x237521 MOV %RSP,%RBP |
0x237524 PUSH %R15 |
0x237526 PUSH %R14 |
0x237528 PUSH %R13 |
0x23752a PUSH %R12 |
0x23752c PUSH %RBX |
0x23752d SUB $0x88,%RSP |
0x237534 MOV (%RCX),%R15D |
0x237537 MOV (%RDX),%EAX |
0x237539 ADD $0x4,%R15D |
0x23753d CMP %EAX,%R15D |
0x237540 JL 237cce |
0x237546 MOV (%R9),%EDX |
0x237549 MOV (%R8),%R12D |
0x23754c ADD $0x4,%EDX |
0x23754f CMP %R12D,%EDX |
0x237552 JL 237cce |
0x237558 LEA -0x1(%RAX),%EBX |
0x23755b LEA -0x1(%R12),%R14D |
0x237560 MOV %RDX,%RCX |
0x237563 MOV (%RDI),%ESI |
0x237565 MOVQ $0,-0x68(%RBP) |
0x23756d MOVQ $0x1,-0xa8(%RBP) |
0x237578 MOVL $0,-0x48(%RBP) |
0x23757f SUB %R14D,%ECX |
0x237582 SUB %EBX,%R15D |
0x237585 IMUL %RCX,%R15 |
0x237589 MOV %RCX,-0x58(%RBP) |
0x23758d DEC %R15 |
0x237590 MOV %R15,-0x40(%RBP) |
0x237594 SUB $0x8,%RSP |
0x237598 LEA -0x48(%RBP),%RCX |
0x23759c MOV %RAX,%R13 |
0x23759f LEA -0xa8(%RBP),%RAX |
0x2375a6 LEA 0x2965b(%RIP),%RDI |
0x2375ad LEA -0x68(%RBP),%R8 |
0x2375b1 LEA -0x40(%RBP),%R9 |
0x2375b5 MOV %ESI,-0x44(%RBP) |
0x2375b8 MOV $0x22,%EDX |
0x2375bd PUSH $0x1 |
0x2375bf PUSH $0x1 |
0x2375c1 PUSH %RAX |
0x2375c2 CALL 25f740 <@plt_start@+0x530> |
0x2375c7 ADD $0x20,%RSP |
0x2375cb MOV -0x40(%RBP),%RAX |
0x2375cf MOV -0x68(%RBP),%RCX |
0x2375d3 CMP %R15,%RAX |
0x2375d6 CMOVL %RAX,%R15 |
0x2375da MOV %R15,-0x40(%RBP) |
0x2375de CMP %R15,%RCX |
0x2375e1 JG 237cbc |
0x2375e7 MOV 0x18(%RBP),%RDI |
0x2375eb MOV %R14,-0x30(%RBP) |
0x2375ef MOV 0x30(%RBP),%RAX |
0x2375f3 MOV 0x20(%RBP),%RSI |
0x2375f7 MOV %R13,%R14 |
0x2375fa MOV %R13,-0x38(%RBP) |
0x2375fe MOV 0x10(%RBP),%R8 |
0x237602 MOV 0x28(%RBP),%R13 |
0x237606 MOV %RBX,-0x50(%RBP) |
0x23760a MOV (%RDI),%R10 |
0x23760d MOV 0x10(%RDI),%R9 |
0x237611 MOV (%RSI),%RDI |
0x237614 MOV 0x10(%RSI),%R11 |
0x237618 MOV (%R8),%RSI |
0x23761b MOV 0x10(%R8),%R14 |
0x23761f MOV (%RAX),%RDX |
0x237622 MOV %R13,%R8 |
0x237625 MOV 0x10(%RAX),%RBX |
0x237629 MOV 0x10(%R8),%RAX |
0x23762d MOV (%R13),%R13 |
0x237631 MOV %R15,%R8 |
0x237634 SUB %RCX,%R8 |
0x237637 INC %R8 |
0x23763a MOV %RAX,-0x60(%RBP) |
0x23763e MOV %R10,-0xa0(%RBP) |
0x237645 MOV %RSI,-0x98(%RBP) |
0x23764c MOV %RDX,-0x90(%RBP) |
0x237653 MOV %R13,-0x88(%RBP) |
0x23765a MOV %RDI,-0x80(%RBP) |
0x23765e CMP $0x8,%R8 |
0x237662 JAE 237674 |
0x237664 MOV %R14,%R8 |
0x237667 MOV -0x38(%RBP),%R10 |
0x23766b MOV -0x30(%RBP),%R14 |
0x23766f JMP 237bf0 |
0x237674 MOV -0x58(%RBP),%RAX |
0x237678 VPBROADCASTQ %RCX,%ZMM0 |
0x23767e VPADDQ -0x25a48(%RIP),%ZMM0,%ZMM0 |
0x237688 VPBROADCASTQ -0x25e8a(%RIP),%ZMM13 |
0x237692 MOV %R8,-0x70(%RBP) |
0x237696 AND $-0x8,%R8 |
0x23769a VPBROADCASTQ %R10,%ZMM4 |
0x2376a0 VPBROADCASTQ %RDI,%ZMM6 |
0x2376a6 VPBROADCASTQ %RSI,%ZMM7 |
0x2376ac VPBROADCASTD %R12D,%YMM8 |
0x2376b2 VPBROADCASTQ %RDX,%ZMM9 |
0x2376b8 VPBROADCASTQ %R13,%ZMM10 |
0x2376be MOV %R12,-0x78(%RBP) |
0x2376c2 ADD %R8,%RCX |
0x2376c5 MOV %R8,%R10 |
0x2376c8 VPBROADCASTQ %RAX,%ZMM1 |
0x2376ce MOV -0x50(%RBP),%RAX |
0x2376d2 VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 |
0x2376d9 VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 |
0x2376e0 VPBROADCASTD %EAX,%YMM2 |
0x2376e6 MOV -0x30(%RBP),%RAX |
0x2376ea VPBROADCASTD %EAX,%YMM3 |
0x2376f0 MOV -0x38(%RBP),%RAX |
0x2376f4 VPBROADCASTD %EAX,%YMM5 |
0x2376fa NOPW (%RAX,%RAX,1) |
(176) 0x237700 VEXTRACTI32X4 $0x3,%ZMM0,%XMM14 |
(176) 0x237707 VPEXTRQ $0x1,%XMM11,%RSI |
(176) 0x23770d VMOVQ %XMM11,%RDI |
(176) 0x237712 VPEXTRQ $0x1,%XMM12,%R12 |
(176) 0x237718 VEXTRACTI32X4 $0x1,%YMM1,%XMM18 |
(176) 0x23771f KXNORW %K0,%K0,%K1 |
(176) 0x237723 VPEXTRQ $0x1,%XMM14,%RAX |
(176) 0x237729 CQTO |
(176) 0x23772b IDIV %RSI |
(176) 0x23772e MOV %RAX,%RSI |
(176) 0x237731 VMOVQ %XMM14,%RAX |
(176) 0x237736 VEXTRACTI32X4 $0x2,%ZMM0,%XMM14 |
(176) 0x23773d VMOVQ %RSI,%XMM15 |
(176) 0x237742 VMOVQ %XMM12,%RSI |
(176) 0x237747 CQTO |
(176) 0x237749 IDIV %RDI |
(176) 0x23774c MOV %RAX,%RDI |
(176) 0x23774f VPEXTRQ $0x1,%XMM14,%RAX |
(176) 0x237755 VMOVQ %RDI,%XMM16 |
(176) 0x23775b VMOVQ %XMM18,%RDI |
(176) 0x237761 CQTO |
(176) 0x237763 VPUNPCKLQDQ %XMM15,%XMM16,%XMM15 |
(176) 0x237769 IDIV %R12 |
(176) 0x23776c VMOVQ %RAX,%XMM16 |
(176) 0x237772 VMOVQ %XMM14,%RAX |
(176) 0x237777 VEXTRACTI128 $0x1,%YMM0,%XMM14 |
(176) 0x23777d CQTO |
(176) 0x23777f IDIV %RSI |
(176) 0x237782 VPEXTRQ $0x1,%XMM18,%RSI |
(176) 0x237789 VMOVQ %RAX,%XMM17 |
(176) 0x23778f VPEXTRQ $0x1,%XMM14,%RAX |
(176) 0x237795 CQTO |
(176) 0x237797 IDIV %RSI |
(176) 0x23779a MOV %RAX,%RSI |
(176) 0x23779d VMOVQ %XMM14,%RAX |
(176) 0x2377a2 VPUNPCKLQDQ %XMM16,%XMM17,%XMM14 |
(176) 0x2377a8 CQTO |
(176) 0x2377aa VINSERTI128 $0x1,%XMM15,%YMM14,%YMM14 |
(176) 0x2377b0 VMOVQ %RSI,%XMM15 |
(176) 0x2377b5 VPEXTRQ $0x1,%XMM1,%RSI |
(176) 0x2377bb IDIV %RDI |
(176) 0x2377be VPEXTRQ $0x1,%XMM0,%RDX |
(176) 0x2377c4 VMOVQ %RAX,%XMM16 |
(176) 0x2377ca MOV %RDX,%RAX |
(176) 0x2377cd CQTO |
(176) 0x2377cf IDIV %RSI |
(176) 0x2377d2 VMOVQ %XMM1,%RSI |
(176) 0x2377d7 VPUNPCKLQDQ %XMM15,%XMM16,%XMM15 |
(176) 0x2377dd VMOVQ %RAX,%XMM17 |
(176) 0x2377e3 VMOVQ %XMM0,%RAX |
(176) 0x2377e8 CQTO |
(176) 0x2377ea IDIV %RSI |
(176) 0x2377ed ADD $-0x8,%R8 |
(176) 0x2377f1 VMOVQ %RAX,%XMM16 |
(176) 0x2377f7 VPUNPCKLQDQ %XMM17,%XMM16,%XMM16 |
(176) 0x2377fd VINSERTI32X4 $0x1,%XMM15,%YMM16,%YMM15 |
(176) 0x237804 VINSERTI64X4 $0x1,%YMM14,%ZMM15,%ZMM14 |
(176) 0x23780b VPMOVQD %ZMM14,%YMM17 |
(176) 0x237811 VPMULLQ %ZMM1,%ZMM14,%ZMM14 |
(176) 0x237817 VPSUBQ %ZMM14,%ZMM0,%ZMM14 |
(176) 0x23781d VPADDQ %ZMM13,%ZMM0,%ZMM0 |
(176) 0x237823 VPMOVQD %ZMM14,%YMM16 |
(176) 0x237829 VPADDD %YMM17,%YMM2,%YMM15 |
(176) 0x23782f VPADDD %YMM17,%YMM5,%YMM17 |
(176) 0x237835 VPMOVSXDQ %YMM15,%ZMM15 |
(176) 0x23783b VPMOVSXDQ %YMM17,%ZMM17 |
(176) 0x237841 VPADDD %YMM16,%YMM3,%YMM14 |
(176) 0x237847 VPADDD %YMM16,%YMM8,%YMM16 |
(176) 0x23784d VPMOVSXDQ %YMM14,%ZMM14 |
(176) 0x237853 VPMULLQ %ZMM15,%ZMM4,%ZMM18 |
(176) 0x237859 VPMULLQ %ZMM17,%ZMM6,%ZMM17 |
(176) 0x23785f VPMOVSXDQ %YMM16,%ZMM16 |
(176) 0x237865 VPADDQ %ZMM14,%ZMM18,%ZMM18 |
(176) 0x23786b VPADDQ %ZMM14,%ZMM17,%ZMM17 |
(176) 0x237871 VEXTRACTI32X4 $0x3,%ZMM18,%XMM21 |
(176) 0x237878 VEXTRACTI32X4 $0x2,%ZMM18,%XMM20 |
(176) 0x23787f VEXTRACTI32X4 $0x1,%YMM18,%XMM19 |
(176) 0x237886 VMOVQ %XMM21,%RAX |
(176) 0x23788c VMOVSD (%R9,%RAX,8),%XMM22 |
(176) 0x237893 VPEXTRQ $0x1,%XMM21,%RAX |
(176) 0x23789a VMOVHPD (%R9,%RAX,8),%XMM22,%XMM21 |
(176) 0x2378a1 VMOVQ %XMM20,%RAX |
(176) 0x2378a7 VMOVSD (%R9,%RAX,8),%XMM22 |
(176) 0x2378ae VMOVQ %XMM19,%RAX |
(176) 0x2378b4 VMOVSD (%R9,%RAX,8),%XMM23 |
(176) 0x2378bb VMOVQ %XMM18,%RAX |
(176) 0x2378c1 VMOVSD (%R9,%RAX,8),%XMM24 |
(176) 0x2378c8 VPEXTRQ $0x1,%XMM20,%RAX |
(176) 0x2378cf VMOVHPD (%R9,%RAX,8),%XMM22,%XMM20 |
(176) 0x2378d6 VPEXTRQ $0x1,%XMM18,%RAX |
(176) 0x2378dd VEXTRACTI32X4 $0x2,%ZMM17,%XMM22 |
(176) 0x2378e4 VMOVHPD (%R9,%RAX,8),%XMM24,%XMM18 |
(176) 0x2378eb VPEXTRQ $0x1,%XMM19,%RAX |
(176) 0x2378f2 VEXTRACTI32X4 $0x3,%ZMM17,%XMM24 |
(176) 0x2378f9 VEXTRACTI32X4 $0x1,%YMM17,%XMM19 |
(176) 0x237900 VMOVHPD (%R9,%RAX,8),%XMM23,%XMM23 |
(176) 0x237907 VMOVQ %XMM24,%RAX |
(176) 0x23790d VMOVSD (%R11,%RAX,8),%XMM25 |
(176) 0x237914 VPEXTRQ $0x1,%XMM24,%RAX |
(176) 0x23791b VMOVHPD (%R11,%RAX,8),%XMM25,%XMM24 |
(176) 0x237922 VMOVQ %XMM22,%RAX |
(176) 0x237928 VMOVSD (%R11,%RAX,8),%XMM25 |
(176) 0x23792f VPEXTRQ $0x1,%XMM22,%RAX |
(176) 0x237936 VMOVHPD (%R11,%RAX,8),%XMM25,%XMM22 |
(176) 0x23793d VMOVQ %XMM19,%RAX |
(176) 0x237943 VMOVSD (%R11,%RAX,8),%XMM25 |
(176) 0x23794a VPEXTRQ $0x1,%XMM19,%RAX |
(176) 0x237951 VMOVHPD (%R11,%RAX,8),%XMM25,%XMM19 |
(176) 0x237958 VMOVQ %XMM17,%RAX |
(176) 0x23795e VINSERTF32X4 $0x1,%XMM21,%YMM20,%YMM20 |
(176) 0x237965 VMOVSD (%R11,%RAX,8),%XMM21 |
(176) 0x23796c VPEXTRQ $0x1,%XMM17,%RAX |
(176) 0x237973 VMOVHPD (%R11,%RAX,8),%XMM21,%XMM17 |
(176) 0x23797a VPMULLQ %ZMM15,%ZMM6,%ZMM21 |
(176) 0x237980 VINSERTF32X4 $0x1,%XMM23,%YMM18,%YMM18 |
(176) 0x237987 VINSERTF64X4 $0x1,%YMM20,%ZMM18,%ZMM18 |
(176) 0x23798e VPADDQ %ZMM14,%ZMM21,%ZMM21 |
(176) 0x237994 VEXTRACTI32X4 $0x3,%ZMM21,%XMM25 |
(176) 0x23799b VINSERTF32X4 $0x1,%XMM24,%YMM22,%YMM22 |
(176) 0x2379a2 VEXTRACTI32X4 $0x2,%ZMM21,%XMM24 |
(176) 0x2379a9 VEXTRACTI32X4 $0x1,%YMM21,%XMM23 |
(176) 0x2379b0 VMOVQ %XMM25,%RAX |
(176) 0x2379b6 VMOVSD (%R11,%RAX,8),%XMM26 |
(176) 0x2379bd VPEXTRQ $0x1,%XMM25,%RAX |
(176) 0x2379c4 VMOVHPD (%R11,%RAX,8),%XMM26,%XMM25 |
(176) 0x2379cb VMOVQ %XMM24,%RAX |
(176) 0x2379d1 VINSERTF32X4 $0x1,%XMM19,%YMM17,%YMM17 |
(176) 0x2379d8 VMOVSD (%R11,%RAX,8),%XMM19 |
(176) 0x2379df VPEXTRQ $0x1,%XMM24,%RAX |
(176) 0x2379e6 VMOVHPD (%R11,%RAX,8),%XMM19,%XMM19 |
(176) 0x2379ed VMOVQ %XMM23,%RAX |
(176) 0x2379f3 VMOVSD (%R11,%RAX,8),%XMM20 |
(176) 0x2379fa VPEXTRQ $0x1,%XMM23,%RAX |
(176) 0x237a01 VINSERTF64X4 $0x1,%YMM22,%ZMM17,%ZMM17 |
(176) 0x237a08 VMOVHPD (%R11,%RAX,8),%XMM20,%XMM20 |
(176) 0x237a0f VMOVQ %XMM21,%RAX |
(176) 0x237a15 VMOVSD (%R11,%RAX,8),%XMM22 |
(176) 0x237a1c VPEXTRQ $0x1,%XMM21,%RAX |
(176) 0x237a23 VADDPD %ZMM17,%ZMM18,%ZMM17 |
(176) 0x237a29 VMOVHPD (%R11,%RAX,8),%XMM22,%XMM21 |
(176) 0x237a30 VINSERTF32X4 $0x1,%XMM25,%YMM19,%YMM19 |
(176) 0x237a37 VINSERTF32X4 $0x1,%XMM20,%YMM21,%YMM18 |
(176) 0x237a3e VINSERTF64X4 $0x1,%YMM19,%ZMM18,%ZMM18 |
(176) 0x237a45 VPMULLQ %ZMM15,%ZMM7,%ZMM19 |
(176) 0x237a4b VSUBPD %ZMM18,%ZMM17,%ZMM17 |
(176) 0x237a51 VPMULLQ %ZMM15,%ZMM9,%ZMM18 |
(176) 0x237a57 VPMULLQ %ZMM15,%ZMM10,%ZMM15 |
(176) 0x237a5d VPADDQ %ZMM14,%ZMM19,%ZMM19 |
(176) 0x237a63 VPADDQ %ZMM16,%ZMM18,%ZMM16 |
(176) 0x237a69 VSCATTERQPD %ZMM17,(%R14,%ZMM19,8){%K1} |
(176) 0x237a70 KXNORW %K0,%K0,%K1 |
(176) 0x237a74 VEXTRACTI32X4 $0x3,%ZMM16,%XMM21 |
(176) 0x237a7b VEXTRACTI32X4 $0x2,%ZMM16,%XMM19 |
(176) 0x237a82 VEXTRACTI32X4 $0x1,%YMM16,%XMM20 |
(176) 0x237a89 VMOVQ %XMM21,%RAX |
(176) 0x237a8f VMOVSD (%RBX,%RAX,8),%XMM22 |
(176) 0x237a96 VPEXTRQ $0x1,%XMM21,%RAX |
(176) 0x237a9d VMOVHPD (%RBX,%RAX,8),%XMM22,%XMM21 |
(176) 0x237aa4 VMOVQ %XMM19,%RAX |
(176) 0x237aaa VMOVSD (%RBX,%RAX,8),%XMM22 |
(176) 0x237ab1 VPEXTRQ $0x1,%XMM19,%RAX |
(176) 0x237ab8 VMOVHPD (%RBX,%RAX,8),%XMM22,%XMM19 |
(176) 0x237abf VMOVQ %XMM20,%RAX |
(176) 0x237ac5 VMOVSD (%RBX,%RAX,8),%XMM22 |
(176) 0x237acc VPEXTRQ $0x1,%XMM20,%RAX |
(176) 0x237ad3 VMOVHPD (%RBX,%RAX,8),%XMM22,%XMM20 |
(176) 0x237ada VMOVQ %XMM16,%RAX |
(176) 0x237ae0 VMOVSD (%RBX,%RAX,8),%XMM22 |
(176) 0x237ae7 VPEXTRQ $0x1,%XMM16,%RAX |
(176) 0x237aee VPADDQ %ZMM14,%ZMM18,%ZMM16 |
(176) 0x237af4 VPADDQ %ZMM14,%ZMM15,%ZMM14 |
(176) 0x237afa VEXTRACTI32X4 $0x3,%ZMM16,%XMM24 |
(176) 0x237b01 VMOVHPD (%RBX,%RAX,8),%XMM22,%XMM22 |
(176) 0x237b08 VEXTRACTI32X4 $0x2,%ZMM16,%XMM23 |
(176) 0x237b0f VEXTRACTI32X4 $0x1,%YMM16,%XMM18 |
(176) 0x237b16 VMOVQ %XMM24,%RAX |
(176) 0x237b1c VMOVSD (%RBX,%RAX,8),%XMM25 |
(176) 0x237b23 VPEXTRQ $0x1,%XMM24,%RAX |
(176) 0x237b2a VMOVHPD (%RBX,%RAX,8),%XMM25,%XMM24 |
(176) 0x237b31 VMOVQ %XMM23,%RAX |
(176) 0x237b37 VMOVSD (%RBX,%RAX,8),%XMM25 |
(176) 0x237b3e VPEXTRQ $0x1,%XMM23,%RAX |
(176) 0x237b45 VINSERTF32X4 $0x1,%XMM21,%YMM19,%YMM19 |
(176) 0x237b4c VMOVHPD (%RBX,%RAX,8),%XMM25,%XMM23 |
(176) 0x237b53 VMOVQ %XMM18,%RAX |
(176) 0x237b59 VMOVSD (%RBX,%RAX,8),%XMM25 |
(176) 0x237b60 VPEXTRQ $0x1,%XMM18,%RAX |
(176) 0x237b67 VMOVHPD (%RBX,%RAX,8),%XMM25,%XMM18 |
(176) 0x237b6e VMOVQ %XMM16,%RAX |
(176) 0x237b74 VMOVSD (%RBX,%RAX,8),%XMM25 |
(176) 0x237b7b VPEXTRQ $0x1,%XMM16,%RAX |
(176) 0x237b82 VINSERTF32X4 $0x1,%XMM20,%YMM22,%YMM20 |
(176) 0x237b89 VMOVHPD (%RBX,%RAX,8),%XMM25,%XMM16 |
(176) 0x237b90 MOV -0x60(%RBP),%RAX |
(176) 0x237b94 VINSERTF64X4 $0x1,%YMM19,%ZMM20,%ZMM19 |
(176) 0x237b9b VADDPD %ZMM19,%ZMM17,%ZMM17 |
(176) 0x237ba1 VINSERTF32X4 $0x1,%XMM24,%YMM23,%YMM19 |
(176) 0x237ba8 VINSERTF32X4 $0x1,%XMM18,%YMM16,%YMM16 |
(176) 0x237baf VINSERTF64X4 $0x1,%YMM19,%ZMM16,%ZMM16 |
(176) 0x237bb6 VSUBPD %ZMM16,%ZMM17,%ZMM16 |
(176) 0x237bbc VSCATTERQPD %ZMM16,(%RAX,%ZMM14,8){%K1} |
(176) 0x237bc3 JNE 237700 |
0x237bc9 MOV %R14,%R8 |
0x237bcc MOV -0x78(%RBP),%R12 |
0x237bd0 MOV -0x30(%RBP),%R14 |
0x237bd4 CMP %R10,-0x70(%RBP) |
0x237bd8 MOV -0x38(%RBP),%R10 |
0x237bdc JE 237cbc |
0x237be2 NOPW %CS:(%RAX,%RAX,1) |
(177) 0x237bf0 MOV -0x58(%RBP),%RSI |
(177) 0x237bf4 MOV %RCX,%RAX |
(177) 0x237bf7 CQTO |
(177) 0x237bf9 MOV %R8,%R13 |
(177) 0x237bfc IDIV %RSI |
(177) 0x237bff MOV -0x50(%RBP),%RSI |
(177) 0x237c03 MOV -0xa0(%RBP),%R8 |
(177) 0x237c0a LEA (%RSI,%RAX,1),%EDI |
(177) 0x237c0d LEA (%R14,%RDX,1),%ESI |
(177) 0x237c11 MOV %R15,%R14 |
(177) 0x237c14 MOV %R10,%R15 |
(177) 0x237c17 MOV %R12,%R10 |
(177) 0x237c1a MOV %R15,%R10 |
(177) 0x237c1d ADD %R10D,%EAX |
(177) 0x237c20 ADD %R12D,%EDX |
(177) 0x237c23 MOV %R14,%R15 |
(177) 0x237c26 MOV -0x30(%RBP),%R14 |
(177) 0x237c2a MOVSXD %EDI,%RDI |
(177) 0x237c2d MOVSXD %ESI,%RSI |
(177) 0x237c30 CLTQ |
(177) 0x237c32 IMUL %RDI,%R8 |
(177) 0x237c36 ADD %RSI,%R8 |
(177) 0x237c39 VMOVSD (%R9,%R8,8),%XMM0 |
(177) 0x237c3f MOV %R13,%R8 |
(177) 0x237c42 MOV -0x80(%RBP),%R13 |
(177) 0x237c46 IMUL %R13,%RAX |
(177) 0x237c4a ADD %RSI,%RAX |
(177) 0x237c4d VADDSD (%R11,%RAX,8),%XMM0,%XMM0 |
(177) 0x237c53 MOV %R13,%RAX |
(177) 0x237c56 IMUL %RDI,%RAX |
(177) 0x237c5a MOV -0x98(%RBP),%R13 |
(177) 0x237c61 ADD %RSI,%RAX |
(177) 0x237c64 VSUBSD (%R11,%RAX,8),%XMM0,%XMM0 |
(177) 0x237c6a MOV %R13,%RAX |
(177) 0x237c6d IMUL %RDI,%RAX |
(177) 0x237c71 ADD %RSI,%RAX |
(177) 0x237c74 VMOVSD %XMM0,(%R8,%RAX,8) |
(177) 0x237c7a MOVSXD %EDX,%RAX |
(177) 0x237c7d MOV -0x90(%RBP),%RDX |
(177) 0x237c84 IMUL %RDI,%RDX |
(177) 0x237c88 ADD %RDX,%RAX |
(177) 0x237c8b ADD %RSI,%RDX |
(177) 0x237c8e VADDSD (%RBX,%RAX,8),%XMM0,%XMM0 |
(177) 0x237c93 MOV -0x60(%RBP),%RAX |
(177) 0x237c97 VSUBSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(177) 0x237c9c MOV -0x88(%RBP),%RDX |
(177) 0x237ca3 IMUL %RDX,%RDI |
(177) 0x237ca7 ADD %RSI,%RDI |
(177) 0x237caa VMOVSD %XMM0,(%RAX,%RDI,8) |
(177) 0x237caf CMP %R15,%RCX |
(177) 0x237cb2 LEA 0x1(%RCX),%RCX |
(177) 0x237cb6 JL 237bf0 |
0x237cbc MOV -0x44(%RBP),%ESI |
0x237cbf LEA 0x28f5a(%RIP),%RDI |
0x237cc6 VZEROUPPER |
0x237cc9 CALL 25f750 <@plt_start@+0x540> |
0x237cce ADD $0x88,%RSP |
0x237cd5 POP %RBX |
0x237cd6 POP %R12 |
0x237cd8 POP %R13 |
0x237cda POP %R14 |
0x237cdc POP %R15 |
0x237cde POP %RBP |
0x237cdf RET |
Path / |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 131 |
nb uops | 142 |
loop length | 555 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 4 |
used zmm registers | 8 |
nb stack references | 22 |
micro-operation queue | 23.67 cycles |
front end | 23.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 18.33 | 18.33 | 18.33 | 0.50 | 10.00 | 10.00 | 0.50 | 0.00 | 0.00 |
cycles | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 18.67 | 18.67 | 18.67 | 0.50 | 10.00 | 10.00 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 23.67 |
Dispatch | 18.67 |
Overall L1 | 23.67 |
all | 7% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 19% |
store | 11% |
mul | 12% |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 237cce <.omp_outlined.+0x7ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 237cce <.omp_outlined.+0x7ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%R12),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %R14D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EBX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %RCX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0xa8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2965b(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x68(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 237cbc <.omp_outlined.+0x79c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 237674 <.omp_outlined.+0x154> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 237bf0 <.omp_outlined.+0x6d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x25a48(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x25e8a(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R10,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R13,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R10,-0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JE 237cbc <.omp_outlined.+0x79c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x28f5a(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_mom.cpp:44-48 |
Module | exec |
nb instructions | 131 |
nb uops | 142 |
loop length | 555 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 4 |
used zmm registers | 8 |
nb stack references | 22 |
micro-operation queue | 23.67 cycles |
front end | 23.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 18.33 | 18.33 | 18.33 | 0.50 | 10.00 | 10.00 | 0.50 | 0.00 | 0.00 |
cycles | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 18.67 | 18.67 | 18.67 | 0.50 | 10.00 | 10.00 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 23.67 |
Dispatch | 18.67 |
Overall L1 | 23.67 |
all | 7% |
load | 8% |
store | 0% |
mul | 0% |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 19% |
store | 11% |
mul | 12% |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 237cce <.omp_outlined.+0x7ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 237cce <.omp_outlined.+0x7ae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%R12),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %R14D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EBX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %RCX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0xa8(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2965b(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x68(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 237cbc <.omp_outlined.+0x79c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 237674 <.omp_outlined.+0x154> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 237bf0 <.omp_outlined.+0x6d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x25a48(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x25e8a(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R8,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R10,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R12D,%YMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R13,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTD %EAX,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R10,-0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JE 237cbc <.omp_outlined.+0x79c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x28f5a(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined.#0x237520– | 2.97 | 1.51 |
○Loop 176 - advec_mom.cpp:45-48 - exec | 2.97 | 1.51 |
○Loop 177 - advec_mom.cpp:45-48 - exec | 0 | 0 |