Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.38% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.38% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 218 - 221 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for simd collapse(2) |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42dce0 PUSH %RBP |
0x42dce1 MOV %RSP,%RBP |
0x42dce4 PUSH %R15 |
0x42dce6 PUSH %R14 |
0x42dce8 PUSH %R13 |
0x42dcea PUSH %R12 |
0x42dcec PUSH %RBX |
0x42dced AND $-0x40,%RSP |
0x42dcf1 ADD $-0x80,%RSP |
0x42dcf5 MOV 0x28(%RDI),%EAX |
0x42dcf8 MOV 0x2c(%RDI),%EDX |
0x42dcfb MOV 0x20(%RDI),%ESI |
0x42dcfe MOV 0x24(%RDI),%EBX |
0x42dd01 ADD $0x3,%EDX |
0x42dd04 LEA 0x1(%RAX),%R15D |
0x42dd08 INC %ESI |
0x42dd0a MOV %EDX,0x3c(%RSP) |
0x42dd0e MOV %ESI,0x38(%RSP) |
0x42dd12 CMP %EDX,%R15D |
0x42dd15 JGE 42e1e3 |
0x42dd1b LEA 0x3(%RBX),%R14D |
0x42dd1f MOV %EDX,%EBX |
0x42dd21 SUB %R15D,%EBX |
0x42dd24 CMP %R14D,%ESI |
0x42dd27 JGE 42e1e3 |
0x42dd2d MOV %R14D,%ECX |
0x42dd30 MOV %RDI,%R12 |
0x42dd33 SUB %ESI,%ECX |
0x42dd35 MOV %ECX,0x58(%RSP) |
0x42dd39 CALL 4046c0 <omp_get_num_threads@plt> |
0x42dd3e MOV %EAX,%R13D |
0x42dd41 CALL 4045b0 <omp_get_thread_num@plt> |
0x42dd46 XOR %EDX,%EDX |
0x42dd48 MOV %EAX,%EDI |
0x42dd4a MOV 0x58(%RSP),%EAX |
0x42dd4e IMUL %EBX,%EAX |
0x42dd51 DIV %R13D |
0x42dd54 MOV %EAX,%ECX |
0x42dd56 CMP %EDX,%EDI |
0x42dd58 JB 42e203 |
0x42dd5e IMUL %ECX,%EDI |
0x42dd61 LEA (%RDI,%RDX,1),%R11D |
0x42dd65 LEA (%RCX,%R11,1),%R8D |
0x42dd69 MOV %R8D,0x34(%RSP) |
0x42dd6e CMP %R8D,%R11D |
0x42dd71 JAE 42e1e3 |
0x42dd77 MOV %R11D,%EAX |
0x42dd7a XOR %EDX,%EDX |
0x42dd7c MOV 0x38(%RSP),%R9D |
0x42dd81 MOV (%R12),%R10 |
0x42dd85 DIVL 0x58(%RSP) |
0x42dd89 MOV 0x18(%R12),%RSI |
0x42dd8e MOV %R10,0x28(%RSP) |
0x42dd93 MOV %RSI,0x18(%RSP) |
0x42dd98 ADD %EDX,%R9D |
0x42dd9b ADD %R15D,%EAX |
0x42dd9e MOV %R14D,%EDX |
0x42dda1 MOV 0x10(%R12),%R15 |
0x42dda6 MOV 0x8(%R12),%R14 |
0x42ddab MOV %R9D,0x74(%RSP) |
0x42ddb0 SUB %R9D,%EDX |
0x42ddb3 MOVSXD %EAX,%R12 |
0x42ddb6 MOV %EAX,0x70(%RSP) |
0x42ddba MOV %R15,0x20(%RSP) |
0x42ddbf MOV %R14,0x10(%RSP) |
0x42ddc4 NOPL (%RAX) |
(148) 0x42ddc8 CMP %EDX,%ECX |
(148) 0x42ddca CMOVBE %ECX,%EDX |
(148) 0x42ddcd LEA (%R11,%RDX,1),%EBX |
(148) 0x42ddd1 MOV %EBX,0x5c(%RSP) |
(148) 0x42ddd5 CMP %EBX,%R11D |
(148) 0x42ddd8 JAE 42e1b5 |
(148) 0x42ddde MOV 0x20(%RSP),%RDI |
(148) 0x42dde3 MOV 0x28(%RSP),%RCX |
(148) 0x42dde8 MOV 0x18(%RSP),%R8 |
(148) 0x42dded MOV 0x70(%RSP),%EAX |
(148) 0x42ddf1 MOV (%RDI),%RBX |
(148) 0x42ddf4 MOV 0x10(%RDI),%R14 |
(148) 0x42ddf8 MOV 0x10(%RSP),%RDI |
(148) 0x42ddfd MOV (%R8),%R9 |
(148) 0x42de00 DEC %EAX |
(148) 0x42de02 MOV (%RCX),%R13 |
(148) 0x42de05 MOV 0x10(%R8),%RSI |
(148) 0x42de09 MOVSXD %EAX,%R10 |
(148) 0x42de0c IMUL %R12,%RBX |
(148) 0x42de10 MOV (%RDI),%R8 |
(148) 0x42de13 IMUL %R9,%R10 |
(148) 0x42de17 MOV 0x10(%RCX),%R15 |
(148) 0x42de1b LEA -0x1(%RDX),%EAX |
(148) 0x42de1e IMUL %R12,%R13 |
(148) 0x42de22 MOV 0x10(%RDI),%RCX |
(148) 0x42de26 IMUL %R12,%R9 |
(148) 0x42de2a MOV %RBX,0x48(%RSP) |
(148) 0x42de2f IMUL %R12,%R8 |
(148) 0x42de33 MOV %R10,0x50(%RSP) |
(148) 0x42de38 MOV %R13,0x40(%RSP) |
(148) 0x42de3d MOV %R9,0x68(%RSP) |
(148) 0x42de42 MOV %RCX,0x78(%RSP) |
(148) 0x42de47 MOV %R8,0x60(%RSP) |
(148) 0x42de4c CMP $0x6,%EAX |
(148) 0x42de4f JBE 42e1f8 |
(148) 0x42de55 MOVSXD 0x74(%RSP),%RAX |
(148) 0x42de5a LEA (%R13,%RAX,1),%R13 |
(148) 0x42de5f ADD %RAX,%RBX |
(148) 0x42de62 LEA (%R9,%RAX,1),%R9 |
(148) 0x42de66 LEA (%R15,%R13,8),%RCX |
(148) 0x42de6a LEA (%R14,%RBX,8),%R13 |
(148) 0x42de6e LEA (%R10,%RAX,1),%RBX |
(148) 0x42de72 ADD %R8,%RAX |
(148) 0x42de75 MOV 0x78(%RSP),%R8 |
(148) 0x42de7a LEA (%RSI,%RBX,8),%R10 |
(148) 0x42de7e LEA (%RSI,%R9,8),%RBX |
(148) 0x42de82 LEA (%R8,%RAX,8),%R9 |
(148) 0x42de86 MOV %EDX,%R8D |
(148) 0x42de89 XOR %EAX,%EAX |
(148) 0x42de8b SHR $0x3,%R8D |
(148) 0x42de8f SAL $0x6,%R8 |
(148) 0x42de93 LEA -0x40(%R8),%RDI |
(148) 0x42de97 SHR $0x6,%RDI |
(148) 0x42de9b INC %RDI |
(148) 0x42de9e AND $0x3,%EDI |
(148) 0x42dea1 JE 42df44 |
(148) 0x42dea7 CMP $0x1,%RDI |
(148) 0x42deab JE 42df0c |
(148) 0x42dead CMP $0x2,%RDI |
(148) 0x42deb1 JE 42dedd |
(148) 0x42deb3 VMOVUPD (%R13),%ZMM0 |
(148) 0x42deba VMOVUPD (%RBX),%ZMM3 |
(148) 0x42dec0 MOV $0x40,%EAX |
(148) 0x42dec5 VFMSUB132PD (%RCX),%ZMM3,%ZMM0 |
(148) 0x42decb VADDPD (%R10),%ZMM0,%ZMM1 |
(148) 0x42ded1 VDIVPD (%R9),%ZMM1,%ZMM2 |
(148) 0x42ded7 VMOVUPD %ZMM2,(%RCX) |
(148) 0x42dedd VMOVUPD (%R13,%RAX,1),%ZMM4 |
(148) 0x42dee5 VMOVUPD (%RBX,%RAX,1),%ZMM5 |
(148) 0x42deec VFMSUB132PD (%RCX,%RAX,1),%ZMM5,%ZMM4 |
(148) 0x42def3 VADDPD (%R10,%RAX,1),%ZMM4,%ZMM6 |
(148) 0x42defa VDIVPD (%R9,%RAX,1),%ZMM6,%ZMM7 |
(148) 0x42df01 VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(148) 0x42df08 ADD $0x40,%RAX |
(148) 0x42df0c VMOVUPD (%R13,%RAX,1),%ZMM8 |
(148) 0x42df14 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(148) 0x42df1b VFMSUB132PD (%RCX,%RAX,1),%ZMM9,%ZMM8 |
(148) 0x42df22 VADDPD (%R10,%RAX,1),%ZMM8,%ZMM10 |
(148) 0x42df29 VDIVPD (%R9,%RAX,1),%ZMM10,%ZMM11 |
(148) 0x42df30 VMOVUPD %ZMM11,(%RCX,%RAX,1) |
(148) 0x42df37 ADD $0x40,%RAX |
(148) 0x42df3b CMP %RAX,%R8 |
(148) 0x42df3e JE 42e00e |
(149) 0x42df44 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(149) 0x42df4c VMOVUPD (%RBX,%RAX,1),%ZMM13 |
(149) 0x42df53 VFMSUB132PD (%RCX,%RAX,1),%ZMM13,%ZMM12 |
(149) 0x42df5a VADDPD (%R10,%RAX,1),%ZMM12,%ZMM14 |
(149) 0x42df61 VDIVPD (%R9,%RAX,1),%ZMM14,%ZMM15 |
(149) 0x42df68 VMOVUPD %ZMM15,(%RCX,%RAX,1) |
(149) 0x42df6f VMOVUPD 0x40(%R13,%RAX,1),%ZMM0 |
(149) 0x42df77 VMOVUPD 0x40(%RBX,%RAX,1),%ZMM3 |
(149) 0x42df7f VFMSUB132PD 0x40(%RCX,%RAX,1),%ZMM3,%ZMM0 |
(149) 0x42df87 VADDPD 0x40(%R10,%RAX,1),%ZMM0,%ZMM1 |
(149) 0x42df8f VDIVPD 0x40(%R9,%RAX,1),%ZMM1,%ZMM2 |
(149) 0x42df97 VMOVUPD %ZMM2,0x40(%RCX,%RAX,1) |
(149) 0x42df9f VMOVUPD 0x80(%R13,%RAX,1),%ZMM5 |
(149) 0x42dfa7 VMOVUPD 0x80(%RBX,%RAX,1),%ZMM4 |
(149) 0x42dfaf VFMSUB132PD 0x80(%RCX,%RAX,1),%ZMM4,%ZMM5 |
(149) 0x42dfb7 VADDPD 0x80(%R10,%RAX,1),%ZMM5,%ZMM6 |
(149) 0x42dfbf VDIVPD 0x80(%R9,%RAX,1),%ZMM6,%ZMM7 |
(149) 0x42dfc7 VMOVUPD %ZMM7,0x80(%RCX,%RAX,1) |
(149) 0x42dfcf VMOVUPD 0xc0(%R13,%RAX,1),%ZMM8 |
(149) 0x42dfd7 VMOVUPD 0xc0(%RBX,%RAX,1),%ZMM9 |
(149) 0x42dfdf VFMSUB132PD 0xc0(%RCX,%RAX,1),%ZMM9,%ZMM8 |
(149) 0x42dfe7 VADDPD 0xc0(%R10,%RAX,1),%ZMM8,%ZMM10 |
(149) 0x42dfef VDIVPD 0xc0(%R9,%RAX,1),%ZMM10,%ZMM11 |
(149) 0x42dff7 VMOVUPD %ZMM11,0xc0(%RCX,%RAX,1) |
(149) 0x42dfff ADD $0x100,%RAX |
(149) 0x42e005 CMP %RAX,%R8 |
(149) 0x42e008 JNE 42df44 |
(148) 0x42e00e MOV 0x74(%RSP),%R13D |
(148) 0x42e013 MOV %EDX,%ECX |
(148) 0x42e015 AND $-0x8,%ECX |
(148) 0x42e018 ADD %ECX,%R11D |
(148) 0x42e01b LEA (%RCX,%R13,1),%EDI |
(148) 0x42e01f TEST $0x7,%DL |
(148) 0x42e022 JE 42e1b0 |
(148) 0x42e028 SUB %ECX,%EDX |
(148) 0x42e02a LEA -0x1(%RDX),%R10D |
(148) 0x42e02e CMP $0x2,%R10D |
(148) 0x42e032 JBE 42e0ab |
(148) 0x42e034 MOVSXD 0x74(%RSP),%RAX |
(148) 0x42e039 MOV 0x40(%RSP),%RBX |
(148) 0x42e03e MOV 0x60(%RSP),%R8 |
(148) 0x42e043 MOV 0x48(%RSP),%R10 |
(148) 0x42e048 LEA (%RBX,%RAX,1),%R9 |
(148) 0x42e04c MOV 0x68(%RSP),%R13 |
(148) 0x42e051 ADD %RCX,%R9 |
(148) 0x42e054 LEA (%R8,%RAX,1),%R8 |
(148) 0x42e058 ADD %RAX,%R10 |
(148) 0x42e05b LEA (%R15,%R9,8),%RBX |
(148) 0x42e05f MOV 0x50(%RSP),%R9 |
(148) 0x42e064 ADD %RCX,%R10 |
(148) 0x42e067 ADD %RCX,%R8 |
(148) 0x42e06a VMOVUPD (%RBX),%YMM12 |
(148) 0x42e06e ADD %RAX,%R9 |
(148) 0x42e071 ADD %R13,%RAX |
(148) 0x42e074 ADD %RCX,%RAX |
(148) 0x42e077 ADD %RCX,%R9 |
(148) 0x42e07a MOV 0x78(%RSP),%RCX |
(148) 0x42e07f VMOVUPD (%RSI,%RAX,8),%YMM13 |
(148) 0x42e084 VFMSUB132PD (%R14,%R10,8),%YMM13,%YMM12 |
(148) 0x42e08a VADDPD (%RSI,%R9,8),%YMM12,%YMM14 |
(148) 0x42e090 VDIVPD (%RCX,%R8,8),%YMM14,%YMM15 |
(148) 0x42e096 VMOVUPD %YMM15,(%RBX) |
(148) 0x42e09a TEST $0x3,%DL |
(148) 0x42e09d JE 42e1b0 |
(148) 0x42e0a3 AND $-0x4,%EDX |
(148) 0x42e0a6 ADD %EDX,%R11D |
(148) 0x42e0a9 ADD %EDX,%EDI |
(148) 0x42e0ab MOV 0x48(%RSP),%R13 |
(148) 0x42e0b0 MOV 0x68(%RSP),%R8 |
(148) 0x42e0b5 MOVSXD %EDI,%RAX |
(148) 0x42e0b8 MOV 0x40(%RSP),%RBX |
(148) 0x42e0bd MOV 0x50(%RSP),%R10 |
(148) 0x42e0c2 LEA (%R13,%RAX,1),%R9 |
(148) 0x42e0c7 ADD %RAX,%R8 |
(148) 0x42e0ca VMOVSD (%R14,%R9,8),%XMM0 |
(148) 0x42e0d0 VMOVSD (%RSI,%R8,8),%XMM3 |
(148) 0x42e0d6 LEA (%RBX,%RAX,1),%RDX |
(148) 0x42e0da LEA (%R10,%RAX,1),%RCX |
(148) 0x42e0de LEA (%R15,%RDX,8),%RDX |
(148) 0x42e0e2 MOV 0x78(%RSP),%R9 |
(148) 0x42e0e7 MOV 0x5c(%RSP),%R8D |
(148) 0x42e0ec VFMSUB132SD (%RDX),%XMM3,%XMM0 |
(148) 0x42e0f1 VADDSD (%RSI,%RCX,8),%XMM0,%XMM1 |
(148) 0x42e0f6 MOV 0x60(%RSP),%RCX |
(148) 0x42e0fb ADD %RCX,%RAX |
(148) 0x42e0fe VDIVSD (%R9,%RAX,8),%XMM1,%XMM2 |
(148) 0x42e104 VMOVSD %XMM2,(%RDX) |
(148) 0x42e108 LEA 0x1(%R11),%EDX |
(148) 0x42e10c LEA 0x1(%RDI),%EAX |
(148) 0x42e10f CMP %R8D,%EDX |
(148) 0x42e112 JAE 42e1b0 |
(148) 0x42e118 MOV 0x68(%RSP),%R8 |
(148) 0x42e11d CLTQ |
(148) 0x42e11f ADD $0x2,%R11D |
(148) 0x42e123 ADD $0x2,%EDI |
(148) 0x42e126 LEA (%R13,%RAX,1),%R9 |
(148) 0x42e12b LEA (%RBX,%RAX,1),%RCX |
(148) 0x42e12f ADD %RAX,%R8 |
(148) 0x42e132 VMOVSD (%R14,%R9,8),%XMM5 |
(148) 0x42e138 LEA (%R15,%RCX,8),%RDX |
(148) 0x42e13c LEA (%R10,%RAX,1),%RCX |
(148) 0x42e140 VMOVSD (%RSI,%R8,8),%XMM4 |
(148) 0x42e146 MOV 0x60(%RSP),%R9 |
(148) 0x42e14b VFMSUB132SD (%RDX),%XMM4,%XMM5 |
(148) 0x42e150 ADD %R9,%RAX |
(148) 0x42e153 VADDSD (%RSI,%RCX,8),%XMM5,%XMM6 |
(148) 0x42e158 MOV 0x78(%RSP),%RCX |
(148) 0x42e15d VDIVSD (%RCX,%RAX,8),%XMM6,%XMM7 |
(148) 0x42e162 MOV 0x5c(%RSP),%EAX |
(148) 0x42e166 VMOVSD %XMM7,(%RDX) |
(148) 0x42e16a CMP %EAX,%R11D |
(148) 0x42e16d JAE 42e1b0 |
(148) 0x42e16f MOVSXD %EDI,%R11 |
(148) 0x42e172 MOV 0x68(%RSP),%RDI |
(148) 0x42e177 ADD %R11,%R13 |
(148) 0x42e17a ADD %R11,%RBX |
(148) 0x42e17d ADD %R11,%R10 |
(148) 0x42e180 ADD %R11,%R9 |
(148) 0x42e183 ADD %R11,%RDI |
(148) 0x42e186 VMOVSD (%R14,%R13,8),%XMM8 |
(148) 0x42e18c LEA (%R15,%RBX,8),%R15 |
(148) 0x42e190 MOV 0x78(%RSP),%R14 |
(148) 0x42e195 VMOVSD (%RSI,%RDI,8),%XMM9 |
(148) 0x42e19a VFMSUB132SD (%R15),%XMM9,%XMM8 |
(148) 0x42e19f VADDSD (%RSI,%R10,8),%XMM8,%XMM10 |
(148) 0x42e1a5 VDIVSD (%R14,%R9,8),%XMM10,%XMM11 |
(148) 0x42e1ab VMOVSD %XMM11,(%R15) |
(148) 0x42e1b0 MOV 0x5c(%RSP),%R11D |
(148) 0x42e1b5 INCL 0x70(%RSP) |
(148) 0x42e1b9 INC %R12 |
(148) 0x42e1bc MOV 0x70(%RSP),%ESI |
(148) 0x42e1c0 CMP %ESI,0x3c(%RSP) |
(148) 0x42e1c4 JLE 42e1e0 |
(148) 0x42e1c6 MOV 0x34(%RSP),%ECX |
(148) 0x42e1ca MOV 0x38(%RSP),%R8D |
(148) 0x42e1cf MOV 0x58(%RSP),%EDX |
(148) 0x42e1d3 MOV %R8D,0x74(%RSP) |
(148) 0x42e1d8 SUB %R11D,%ECX |
(148) 0x42e1db JMP 42ddc8 |
0x42e1e0 VZEROUPPER |
0x42e1e3 LEA -0x28(%RBP),%RSP |
0x42e1e7 POP %RBX |
0x42e1e8 POP %R12 |
0x42e1ea POP %R13 |
0x42e1ec POP %R14 |
0x42e1ee POP %R15 |
0x42e1f0 POP %RBP |
0x42e1f1 RET |
0x42e1f2 NOPW (%RAX,%RAX,1) |
(148) 0x42e1f8 MOV 0x74(%RSP),%EDI |
(148) 0x42e1fc XOR %ECX,%ECX |
(148) 0x42e1fe JMP 42e028 |
0x42e203 INC %ECX |
0x42e205 XOR %EDX,%EDX |
0x42e207 JMP 42dd5e |
0x42e20c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.19 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.81 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 269 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.26-15.25 |
Stall cycles | 0.00-0.65 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42dd5e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 269 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.26-15.25 |
Stall cycles | 0.00-0.65 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e1e3 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42dd5e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.38 | 1.12 |
▼Loop 148 - advec_mom.cpp:220-221 - exec– | 0.01 | 0 |
○Loop 149 - advec_mom.cpp:221-221 - exec | 3.37 | 1.11 |