Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage: 2.53% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:62-66 [...] | Coverage: 2.53% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 62 - 66 |
-------------------------------------------------------------------------------- |
62: #pragma omp parallel for simd collapse(2) |
63: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
64: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
65: post_vol(i, j) = volume(i, j); |
66: pre_vol(i, j) = post_vol(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42d510 PUSH %RBP |
0x42d511 MOV %RSP,%RBP |
0x42d514 PUSH %R15 |
0x42d516 PUSH %R14 |
0x42d518 PUSH %R13 |
0x42d51a PUSH %R12 |
0x42d51c PUSH %RBX |
0x42d51d MOV %RDI,%R15 |
0x42d520 AND $-0x40,%RSP |
0x42d524 SUB $0x40,%RSP |
0x42d528 MOV 0x28(%RDI),%EAX |
0x42d52b MOV 0x2c(%RDI),%EDX |
0x42d52e MOV 0x20(%RDI),%EDI |
0x42d531 MOV 0x24(%R15),%ECX |
0x42d535 ADD $0x4,%EDX |
0x42d538 DEC %EDI |
0x42d53a LEA -0x1(%RAX),%R13D |
0x42d53e MOV %EDX,0x10(%RSP) |
0x42d542 MOV %EDI,0xc(%RSP) |
0x42d546 CMP %EDX,%R13D |
0x42d549 JGE 42db23 |
0x42d54f MOV %EDX,%EBX |
0x42d551 LEA 0x4(%RCX),%R14D |
0x42d555 SUB %R13D,%EBX |
0x42d558 CMP %R14D,%EDI |
0x42d55b JGE 42db23 |
0x42d561 MOV %R14D,%ESI |
0x42d564 SUB %EDI,%ESI |
0x42d566 MOV %ESI,0x14(%RSP) |
0x42d56a CALL 404650 <omp_get_num_threads@plt> |
0x42d56f MOV %EAX,%R12D |
0x42d572 CALL 404540 <omp_get_thread_num@plt> |
0x42d577 XOR %EDX,%EDX |
0x42d579 MOV %EAX,%R8D |
0x42d57c MOV 0x14(%RSP),%EAX |
0x42d580 IMUL %EBX,%EAX |
0x42d583 DIV %R12D |
0x42d586 MOV %EAX,%ECX |
0x42d588 CMP %EDX,%R8D |
0x42d58b JB 42db5c |
0x42d591 IMUL %ECX,%R8D |
0x42d595 LEA (%R8,%RDX,1),%R8D |
0x42d599 LEA (%RCX,%R8,1),%R9D |
0x42d59d MOV %R9D,0x8(%RSP) |
0x42d5a2 CMP %R9D,%R8D |
0x42d5a5 JAE 42db23 |
0x42d5ab MOV %R8D,%EAX |
0x42d5ae XOR %EDX,%EDX |
0x42d5b0 MOV 0xc(%RSP),%R10D |
0x42d5b5 VMOVQ 0x8(%R15),%XMM13 |
0x42d5bb DIVL 0x14(%RSP) |
0x42d5bf VMOVQ 0x18(%R15),%XMM12 |
0x42d5c5 VMOVQ (%R15),%XMM3 |
0x42d5ca VMOVQ 0x10(%R15),%XMM1 |
0x42d5d0 LEA (%RDX,%R10,1),%R11D |
0x42d5d4 LEA (%RAX,%R13,1),%R13D |
0x42d5d8 SUB %R11D,%R14D |
0x42d5db MOVSXD %R13D,%RDI |
0x42d5de MOV %R11D,0x3c(%RSP) |
0x42d5e3 NOPW %CS:(%RAX,%RAX,1) |
0x42d5ee XCHG %AX,%AX |
(181) 0x42d5f0 CMP %R14D,%ECX |
(181) 0x42d5f3 CMOVA %R14D,%ECX |
(181) 0x42d5f7 LEA (%R8,%RCX,1),%R14D |
(181) 0x42d5fb MOV %R14D,0x38(%RSP) |
(181) 0x42d600 CMP %R14D,%R8D |
(181) 0x42d603 JAE 42db40 |
(181) 0x42d609 VMOVQ %XMM3,%R9 |
(181) 0x42d60e VMOVQ %XMM13,%RBX |
(181) 0x42d613 LEA -0x1(%RCX),%R14D |
(181) 0x42d617 MOV (%R9),%RAX |
(181) 0x42d61a VMOVQ %XMM12,%R12 |
(181) 0x42d61f MOV (%RBX),%RSI |
(181) 0x42d622 MOV 0x10(%R9),%R13 |
(181) 0x42d626 MOV (%R12),%RDX |
(181) 0x42d62a LEA 0x1(%RDI),%R9 |
(181) 0x42d62e VMOVQ %XMM1,%R11 |
(181) 0x42d633 VMOVQ 0x10(%RBX),%XMM5 |
(181) 0x42d638 MOV %R9,0x18(%RSP) |
(181) 0x42d63d VMOVQ 0x10(%R12),%XMM4 |
(181) 0x42d644 MOV 0x10(%R11),%R15 |
(181) 0x42d648 IMUL %RAX,%R9 |
(181) 0x42d64c IMUL %RDI,%RSI |
(181) 0x42d650 IMUL %RDI,%RDX |
(181) 0x42d654 IMUL (%R11),%RDI |
(181) 0x42d658 MOV %R9,%R10 |
(181) 0x42d65b SUB %RAX,%R10 |
(181) 0x42d65e MOV %RSI,0x20(%RSP) |
(181) 0x42d663 MOV %RDX,0x28(%RSP) |
(181) 0x42d668 MOV %R10,0x30(%RSP) |
(181) 0x42d66d CMP $0x6,%R14D |
(181) 0x42d671 JBE 42db50 |
(181) 0x42d677 MOVSXD 0x3c(%RSP),%RAX |
(181) 0x42d67c VMOVQ %XMM5,%RBX |
(181) 0x42d681 VMOVQ %XMM4,%R14 |
(181) 0x42d686 LEA (%RSI,%RAX,1),%R11 |
(181) 0x42d68a LEA (%RDX,%RAX,1),%RDX |
(181) 0x42d68e LEA (%R9,%RAX,1),%RSI |
(181) 0x42d692 LEA (%R10,%RAX,1),%R10 |
(181) 0x42d696 LEA (%RBX,%R11,8),%R12 |
(181) 0x42d69a LEA (%R14,%RDX,8),%RBX |
(181) 0x42d69e MOV %ECX,%R14D |
(181) 0x42d6a1 LEA (%R13,%RSI,8),%R11 |
(181) 0x42d6a6 SHR $0x3,%R14D |
(181) 0x42d6aa SAL $0x6,%R14 |
(181) 0x42d6ae LEA -0x40(%R14),%RSI |
(181) 0x42d6b2 ADD %RDI,%RAX |
(181) 0x42d6b5 LEA (%R13,%R10,8),%R10 |
(181) 0x42d6ba SHR $0x6,%RSI |
(181) 0x42d6be INC %RSI |
(181) 0x42d6c1 LEA (%R15,%RAX,8),%RDX |
(181) 0x42d6c5 XOR %EAX,%EAX |
(181) 0x42d6c7 AND $0x7,%ESI |
(181) 0x42d6ca JE 42d817 |
(181) 0x42d6d0 CMP $0x1,%RSI |
(181) 0x42d6d4 JE 42d7e7 |
(181) 0x42d6da CMP $0x2,%RSI |
(181) 0x42d6de JE 42d7c0 |
(181) 0x42d6e4 CMP $0x3,%RSI |
(181) 0x42d6e8 JE 42d799 |
(181) 0x42d6ee CMP $0x4,%RSI |
(181) 0x42d6f2 JE 42d772 |
(181) 0x42d6f4 CMP $0x5,%RSI |
(181) 0x42d6f8 JE 42d74b |
(181) 0x42d6fa CMP $0x6,%RSI |
(181) 0x42d6fe JE 42d724 |
(181) 0x42d700 VMOVUPD (%R12),%ZMM6 |
(181) 0x42d707 MOV $0x40,%EAX |
(181) 0x42d70c VMOVUPD %ZMM6,(%RBX) |
(181) 0x42d712 VADDPD (%R11),%ZMM6,%ZMM0 |
(181) 0x42d718 VSUBPD (%R10),%ZMM0,%ZMM2 |
(181) 0x42d71e VMOVUPD %ZMM2,(%RDX) |
(181) 0x42d724 VMOVUPD (%R12,%RAX,1),%ZMM7 |
(181) 0x42d72b VMOVUPD %ZMM7,(%RBX,%RAX,1) |
(181) 0x42d732 VADDPD (%R11,%RAX,1),%ZMM7,%ZMM8 |
(181) 0x42d739 VSUBPD (%R10,%RAX,1),%ZMM8,%ZMM9 |
(181) 0x42d740 VMOVUPD %ZMM9,(%RDX,%RAX,1) |
(181) 0x42d747 ADD $0x40,%RAX |
(181) 0x42d74b VMOVUPD (%R12,%RAX,1),%ZMM10 |
(181) 0x42d752 VMOVUPD %ZMM10,(%RBX,%RAX,1) |
(181) 0x42d759 VADDPD (%R11,%RAX,1),%ZMM10,%ZMM11 |
(181) 0x42d760 VSUBPD (%R10,%RAX,1),%ZMM11,%ZMM14 |
(181) 0x42d767 VMOVUPD %ZMM14,(%RDX,%RAX,1) |
(181) 0x42d76e ADD $0x40,%RAX |
(181) 0x42d772 VMOVUPD (%R12,%RAX,1),%ZMM15 |
(181) 0x42d779 VMOVUPD %ZMM15,(%RBX,%RAX,1) |
(181) 0x42d780 VADDPD (%R11,%RAX,1),%ZMM15,%ZMM6 |
(181) 0x42d787 VSUBPD (%R10,%RAX,1),%ZMM6,%ZMM0 |
(181) 0x42d78e VMOVUPD %ZMM0,(%RDX,%RAX,1) |
(181) 0x42d795 ADD $0x40,%RAX |
(181) 0x42d799 VMOVUPD (%R12,%RAX,1),%ZMM2 |
(181) 0x42d7a0 VMOVUPD %ZMM2,(%RBX,%RAX,1) |
(181) 0x42d7a7 VADDPD (%R11,%RAX,1),%ZMM2,%ZMM7 |
(181) 0x42d7ae VSUBPD (%R10,%RAX,1),%ZMM7,%ZMM8 |
(181) 0x42d7b5 VMOVUPD %ZMM8,(%RDX,%RAX,1) |
(181) 0x42d7bc ADD $0x40,%RAX |
(181) 0x42d7c0 VMOVUPD (%R12,%RAX,1),%ZMM9 |
(181) 0x42d7c7 VMOVUPD %ZMM9,(%RBX,%RAX,1) |
(181) 0x42d7ce VADDPD (%R11,%RAX,1),%ZMM9,%ZMM10 |
(181) 0x42d7d5 VSUBPD (%R10,%RAX,1),%ZMM10,%ZMM11 |
(181) 0x42d7dc VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(181) 0x42d7e3 ADD $0x40,%RAX |
(181) 0x42d7e7 VMOVUPD (%R12,%RAX,1),%ZMM14 |
(181) 0x42d7ee VMOVUPD %ZMM14,(%RBX,%RAX,1) |
(181) 0x42d7f5 VADDPD (%R11,%RAX,1),%ZMM14,%ZMM15 |
(181) 0x42d7fc VSUBPD (%R10,%RAX,1),%ZMM15,%ZMM6 |
(181) 0x42d803 VMOVUPD %ZMM6,(%RDX,%RAX,1) |
(181) 0x42d80a ADD $0x40,%RAX |
(181) 0x42d80e CMP %R14,%RAX |
(181) 0x42d811 JE 42d961 |
(182) 0x42d817 VMOVUPD (%R12,%RAX,1),%ZMM0 |
(182) 0x42d81e VMOVUPD %ZMM0,(%RBX,%RAX,1) |
(182) 0x42d825 VADDPD (%R11,%RAX,1),%ZMM0,%ZMM2 |
(182) 0x42d82c VSUBPD (%R10,%RAX,1),%ZMM2,%ZMM7 |
(182) 0x42d833 VMOVUPD %ZMM7,(%RDX,%RAX,1) |
(182) 0x42d83a VMOVUPD 0x40(%R12,%RAX,1),%ZMM8 |
(182) 0x42d842 VMOVUPD %ZMM8,0x40(%RBX,%RAX,1) |
(182) 0x42d84a VADDPD 0x40(%R11,%RAX,1),%ZMM8,%ZMM9 |
(182) 0x42d852 VSUBPD 0x40(%R10,%RAX,1),%ZMM9,%ZMM10 |
(182) 0x42d85a VMOVUPD %ZMM10,0x40(%RDX,%RAX,1) |
(182) 0x42d862 VMOVUPD 0x80(%R12,%RAX,1),%ZMM11 |
(182) 0x42d86a VMOVUPD %ZMM11,0x80(%RBX,%RAX,1) |
(182) 0x42d872 VADDPD 0x80(%R11,%RAX,1),%ZMM11,%ZMM14 |
(182) 0x42d87a VSUBPD 0x80(%R10,%RAX,1),%ZMM14,%ZMM15 |
(182) 0x42d882 VMOVUPD %ZMM15,0x80(%RDX,%RAX,1) |
(182) 0x42d88a VMOVUPD 0xc0(%R12,%RAX,1),%ZMM6 |
(182) 0x42d892 VMOVUPD %ZMM6,0xc0(%RBX,%RAX,1) |
(182) 0x42d89a VADDPD 0xc0(%R11,%RAX,1),%ZMM6,%ZMM0 |
(182) 0x42d8a2 VSUBPD 0xc0(%R10,%RAX,1),%ZMM0,%ZMM2 |
(182) 0x42d8aa VMOVUPD %ZMM2,0xc0(%RDX,%RAX,1) |
(182) 0x42d8b2 VMOVUPD 0x100(%R12,%RAX,1),%ZMM7 |
(182) 0x42d8ba VMOVUPD %ZMM7,0x100(%RBX,%RAX,1) |
(182) 0x42d8c2 VADDPD 0x100(%R11,%RAX,1),%ZMM7,%ZMM8 |
(182) 0x42d8ca VSUBPD 0x100(%R10,%RAX,1),%ZMM8,%ZMM9 |
(182) 0x42d8d2 VMOVUPD %ZMM9,0x100(%RDX,%RAX,1) |
(182) 0x42d8da VMOVUPD 0x140(%R12,%RAX,1),%ZMM10 |
(182) 0x42d8e2 VMOVUPD %ZMM10,0x140(%RBX,%RAX,1) |
(182) 0x42d8ea VADDPD 0x140(%R11,%RAX,1),%ZMM10,%ZMM11 |
(182) 0x42d8f2 VSUBPD 0x140(%R10,%RAX,1),%ZMM11,%ZMM14 |
(182) 0x42d8fa VMOVUPD %ZMM14,0x140(%RDX,%RAX,1) |
(182) 0x42d902 VMOVUPD 0x180(%R12,%RAX,1),%ZMM15 |
(182) 0x42d90a VMOVUPD %ZMM15,0x180(%RBX,%RAX,1) |
(182) 0x42d912 VADDPD 0x180(%R11,%RAX,1),%ZMM15,%ZMM6 |
(182) 0x42d91a VSUBPD 0x180(%R10,%RAX,1),%ZMM6,%ZMM0 |
(182) 0x42d922 VMOVUPD %ZMM0,0x180(%RDX,%RAX,1) |
(182) 0x42d92a VMOVUPD 0x1c0(%R12,%RAX,1),%ZMM2 |
(182) 0x42d932 VMOVUPD %ZMM2,0x1c0(%RBX,%RAX,1) |
(182) 0x42d93a VADDPD 0x1c0(%R11,%RAX,1),%ZMM2,%ZMM7 |
(182) 0x42d942 VSUBPD 0x1c0(%R10,%RAX,1),%ZMM7,%ZMM8 |
(182) 0x42d94a VMOVUPD %ZMM8,0x1c0(%RDX,%RAX,1) |
(182) 0x42d952 ADD $0x200,%RAX |
(182) 0x42d958 CMP %R14,%RAX |
(182) 0x42d95b JNE 42d817 |
(181) 0x42d961 MOV 0x3c(%RSP),%R12D |
(181) 0x42d966 MOV %ECX,%ESI |
(181) 0x42d968 AND $-0x8,%ESI |
(181) 0x42d96b ADD %ESI,%R8D |
(181) 0x42d96e LEA (%RSI,%R12,1),%R12D |
(181) 0x42d972 TEST $0x7,%CL |
(181) 0x42d975 JE 42dae6 |
(181) 0x42d97b SUB %ESI,%ECX |
(181) 0x42d97d LEA -0x1(%RCX),%EBX |
(181) 0x42d980 CMP $0x2,%EBX |
(181) 0x42d983 JBE 42d9f7 |
(181) 0x42d985 MOVSXD 0x3c(%RSP),%R11 |
(181) 0x42d98a MOV 0x20(%RSP),%R10 |
(181) 0x42d98f MOV 0x28(%RSP),%RAX |
(181) 0x42d994 VMOVQ %XMM5,%R14 |
(181) 0x42d999 VMOVQ %XMM4,%RBX |
(181) 0x42d99e LEA (%R10,%R11,1),%RDX |
(181) 0x42d9a2 LEA (%RAX,%R11,1),%R10 |
(181) 0x42d9a6 ADD %RSI,%RDX |
(181) 0x42d9a9 ADD %RSI,%R10 |
(181) 0x42d9ac VMOVUPD (%R14,%RDX,8),%YMM9 |
(181) 0x42d9b2 MOV 0x30(%RSP),%R14 |
(181) 0x42d9b7 LEA (%R9,%R11,1),%RDX |
(181) 0x42d9bb ADD %RSI,%RDX |
(181) 0x42d9be LEA (%R14,%R11,1),%RAX |
(181) 0x42d9c2 ADD %RDI,%R11 |
(181) 0x42d9c5 VMOVUPD %YMM9,(%RBX,%R10,8) |
(181) 0x42d9cb ADD %RSI,%RAX |
(181) 0x42d9ce VADDPD (%R13,%RDX,8),%YMM9,%YMM10 |
(181) 0x42d9d5 ADD %RSI,%R11 |
(181) 0x42d9d8 VSUBPD (%R13,%RAX,8),%YMM10,%YMM11 |
(181) 0x42d9df VMOVUPD %YMM11,(%R15,%R11,8) |
(181) 0x42d9e5 TEST $0x3,%CL |
(181) 0x42d9e8 JE 42dae6 |
(181) 0x42d9ee AND $-0x4,%ECX |
(181) 0x42d9f1 ADD %ECX,%R8D |
(181) 0x42d9f4 ADD %ECX,%R12D |
(181) 0x42d9f7 MOV 0x20(%RSP),%RBX |
(181) 0x42d9fc MOVSXD %R12D,%R10 |
(181) 0x42d9ff VMOVQ %XMM5,%RCX |
(181) 0x42da04 VMOVQ %XMM4,%RDX |
(181) 0x42da09 LEA (%R9,%R10,1),%RAX |
(181) 0x42da0d LEA (%RDI,%R10,1),%R14 |
(181) 0x42da11 LEA (%RBX,%R10,1),%RSI |
(181) 0x42da15 VMOVSD (%RCX,%RSI,8),%XMM14 |
(181) 0x42da1a MOV 0x28(%RSP),%RSI |
(181) 0x42da1f LEA 0x1(%R8),%ECX |
(181) 0x42da23 LEA (%RSI,%R10,1),%R11 |
(181) 0x42da27 VMOVSD %XMM14,(%RDX,%R11,8) |
(181) 0x42da2d MOV 0x30(%RSP),%R11 |
(181) 0x42da32 VADDSD (%R13,%RAX,8),%XMM14,%XMM15 |
(181) 0x42da39 LEA 0x1(%R12),%EAX |
(181) 0x42da3e ADD %R11,%R10 |
(181) 0x42da41 VSUBSD (%R13,%R10,8),%XMM15,%XMM6 |
(181) 0x42da48 MOV 0x38(%RSP),%R10D |
(181) 0x42da4d VMOVSD %XMM6,(%R15,%R14,8) |
(181) 0x42da53 CMP %R10D,%ECX |
(181) 0x42da56 JAE 42dae6 |
(181) 0x42da5c CLTQ |
(181) 0x42da5e VMOVQ %XMM5,%R14 |
(181) 0x42da63 VMOVQ %XMM4,%RCX |
(181) 0x42da68 ADD $0x2,%R8D |
(181) 0x42da6c LEA (%RBX,%RAX,1),%RDX |
(181) 0x42da70 ADD $0x2,%R12D |
(181) 0x42da74 VMOVSD (%R14,%RDX,8),%XMM0 |
(181) 0x42da7a LEA (%RSI,%RAX,1),%RDX |
(181) 0x42da7e LEA (%RDI,%RAX,1),%R14 |
(181) 0x42da82 VMOVSD %XMM0,(%RCX,%RDX,8) |
(181) 0x42da87 LEA (%R9,%RAX,1),%RDX |
(181) 0x42da8b ADD %R11,%RAX |
(181) 0x42da8e MOV %R11,%RCX |
(181) 0x42da91 VADDSD (%R13,%RDX,8),%XMM0,%XMM2 |
(181) 0x42da98 VSUBSD (%R13,%RAX,8),%XMM2,%XMM7 |
(181) 0x42da9f VMOVSD %XMM7,(%R15,%R14,8) |
(181) 0x42daa5 CMP %R10D,%R8D |
(181) 0x42daa8 JAE 42dae6 |
(181) 0x42daaa MOVSXD %R12D,%R8 |
(181) 0x42daad VMOVQ %XMM4,%R11 |
(181) 0x42dab2 VMOVQ %XMM5,%R12 |
(181) 0x42dab7 ADD %R8,%RBX |
(181) 0x42daba ADD %R8,%RSI |
(181) 0x42dabd ADD %R8,%RDI |
(181) 0x42dac0 ADD %R8,%R9 |
(181) 0x42dac3 ADD %R8,%RCX |
(181) 0x42dac6 VMOVSD (%R12,%RBX,8),%XMM5 |
(181) 0x42dacc VMOVSD %XMM5,(%R11,%RSI,8) |
(181) 0x42dad2 VADDSD (%R13,%R9,8),%XMM5,%XMM4 |
(181) 0x42dad9 VSUBSD (%R13,%RCX,8),%XMM4,%XMM8 |
(181) 0x42dae0 VMOVSD %XMM8,(%R15,%RDI,8) |
(181) 0x42dae6 MOV 0x38(%RSP),%R8D |
(181) 0x42daeb MOV 0x18(%RSP),%RDI |
(181) 0x42daf0 LEA (%RDI),%R13D |
(181) 0x42daf3 CMP %R13D,0x10(%RSP) |
(181) 0x42daf8 JLE 42db20 |
(181) 0x42dafa MOV 0x8(%RSP),%ECX |
(181) 0x42dafe MOV 0xc(%RSP),%R9D |
(181) 0x42db03 MOV 0x14(%RSP),%R14D |
(181) 0x42db08 SUB %R8D,%ECX |
(181) 0x42db0b MOV %R9D,0x3c(%RSP) |
(181) 0x42db10 JMP 42d5f0 |
0x42db15 NOPW %CS:(%RAX,%RAX,1) |
0x42db20 VZEROUPPER |
0x42db23 LEA -0x28(%RBP),%RSP |
0x42db27 POP %RBX |
0x42db28 POP %R12 |
0x42db2a POP %R13 |
0x42db2c POP %R14 |
0x42db2e POP %R15 |
0x42db30 POP %RBP |
0x42db31 RET |
0x42db32 NOPW %CS:(%RAX,%RAX,1) |
0x42db3d NOPL (%RAX) |
(181) 0x42db40 LEA 0x1(%RDI),%R15 |
(181) 0x42db44 MOV %R15,0x18(%RSP) |
(181) 0x42db49 JMP 42daeb |
0x42db4b NOPL (%RAX,%RAX,1) |
(181) 0x42db50 MOV 0x3c(%RSP),%R12D |
(181) 0x42db55 XOR %ESI,%ESI |
(181) 0x42db57 JMP 42d97b |
0x42db5c INC %ECX |
0x42db5e XOR %EDX,%EDX |
0x42db60 JMP 42d591 |
0x42db65 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:62-66 |
Module | exec |
nb instructions | 78 |
nb uops | 76 |
loop length | 292 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 11% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R15),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0xc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42db5c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x64c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R8,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x8(%R15),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x14(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x18(%R15),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R15),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R15),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R10,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R13,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R11D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42d591 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:62-66 |
Module | exec |
nb instructions | 78 |
nb uops | 76 |
loop length | 292 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 12% |
all | 8% |
load | 11% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R15),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0xc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R13D,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42db5c <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x64c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R8,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 42db23 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x613> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xc(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x8(%R15),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x14(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x18(%R15),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ (%R15),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R15),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R10,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R13,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R11D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42d591 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.2– | 2.53 | 1.27 |
▼Loop 181 - advec_mom.cpp:62-66 - exec– | 0 | 0.01 |
○Loop 182 - advec_mom.cpp:65-66 - exec | 2.52 | 1.27 |