Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:146-149 [...] | Coverage: 3.39% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:146-149 [...] | Coverage: 3.39% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 146 - 149 |
-------------------------------------------------------------------------------- |
146: #pragma omp parallel for simd collapse(2) |
147: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
148: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
149: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i - 1, j + 0) - mom_flux(i, j)) / node_mass_post(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42bfb0 PUSH %RBP |
0x42bfb1 MOV %RSP,%RBP |
0x42bfb4 PUSH %R15 |
0x42bfb6 PUSH %R14 |
0x42bfb8 PUSH %R13 |
0x42bfba PUSH %R12 |
0x42bfbc PUSH %RBX |
0x42bfbd AND $-0x40,%RSP |
0x42bfc1 ADD $-0x80,%RSP |
0x42bfc5 MOV 0x28(%RDI),%EAX |
0x42bfc8 MOV 0x2c(%RDI),%EDX |
0x42bfcb MOV 0x20(%RDI),%EBX |
0x42bfce MOV 0x24(%RDI),%ECX |
0x42bfd1 ADD $0x3,%EDX |
0x42bfd4 LEA 0x1(%RAX),%R15D |
0x42bfd8 LEA 0x1(%RBX),%ESI |
0x42bfdb MOV %EDX,0x44(%RSP) |
0x42bfdf MOV %ESI,0x40(%RSP) |
0x42bfe3 CMP %EDX,%R15D |
0x42bfe6 JGE 42c493 |
0x42bfec MOV %EDX,%EBX |
0x42bfee LEA 0x3(%RCX),%R14D |
0x42bff2 SUB %R15D,%EBX |
0x42bff5 CMP %R14D,%ESI |
0x42bff8 JGE 42c493 |
0x42bffe MOV %RDI,%R13 |
0x42c001 MOV %R14D,%EDI |
0x42c004 SUB %ESI,%EDI |
0x42c006 MOV %EDI,0x68(%RSP) |
0x42c00a CALL 4046c0 <omp_get_num_threads@plt> |
0x42c00f MOV %EAX,%R12D |
0x42c012 CALL 4045b0 <omp_get_thread_num@plt> |
0x42c017 XOR %EDX,%EDX |
0x42c019 MOV %EAX,%R8D |
0x42c01c MOV 0x68(%RSP),%EAX |
0x42c020 IMUL %EBX,%EAX |
0x42c023 DIV %R12D |
0x42c026 MOV %EAX,%EDI |
0x42c028 CMP %EDX,%R8D |
0x42c02b JB 42c4b2 |
0x42c031 IMUL %EDI,%R8D |
0x42c035 LEA (%R8,%RDX,1),%EBX |
0x42c039 LEA (%RDI,%RBX,1),%R9D |
0x42c03d MOV %R9D,0x3c(%RSP) |
0x42c042 CMP %R9D,%EBX |
0x42c045 JAE 42c493 |
0x42c04b MOV %EBX,%EAX |
0x42c04d XOR %EDX,%EDX |
0x42c04f MOV 0x40(%RSP),%ESI |
0x42c053 MOV (%R13),%R10 |
0x42c057 DIVL 0x68(%RSP) |
0x42c05b MOV 0x10(%R13),%R11 |
0x42c05f MOV %R10,0x30(%RSP) |
0x42c064 MOV %R11,0x28(%RSP) |
0x42c069 ADD %EDX,%ESI |
0x42c06b ADD %R15D,%EAX |
0x42c06e MOV %R14D,%EDX |
0x42c071 MOV 0x18(%R13),%R15 |
0x42c075 MOV 0x8(%R13),%R14 |
0x42c079 SUB %ESI,%EDX |
0x42c07b MOVSXD %EAX,%R12 |
0x42c07e MOV %R15,0x20(%RSP) |
0x42c083 MOV %R14,0x18(%RSP) |
0x42c088 NOPL (%RAX,%RAX,1) |
(139) 0x42c090 CMP %EDX,%EDI |
(139) 0x42c092 CMOVBE %EDI,%EDX |
(139) 0x42c095 LEA (%RBX,%RDX,1),%ECX |
(139) 0x42c098 MOV %ECX,0x6c(%RSP) |
(139) 0x42c09c CMP %ECX,%EBX |
(139) 0x42c09e JAE 42c469 |
(139) 0x42c0a4 MOV 0x30(%RSP),%R13 |
(139) 0x42c0a9 MOV 0x28(%RSP),%RDI |
(139) 0x42c0ae MOV 0x18(%RSP),%RAX |
(139) 0x42c0b3 MOV 0x20(%RSP),%R9 |
(139) 0x42c0b8 MOV 0x10(%R13),%R14 |
(139) 0x42c0bc MOV (%R13),%RCX |
(139) 0x42c0c0 MOV (%RAX),%R10 |
(139) 0x42c0c3 MOV (%RDI),%R13 |
(139) 0x42c0c6 IMUL %R12,%RCX |
(139) 0x42c0ca MOV 0x10(%RDI),%R15 |
(139) 0x42c0ce MOV (%R9),%R11 |
(139) 0x42c0d1 LEA -0x1(%RDX),%EDI |
(139) 0x42c0d4 IMUL %R12,%R13 |
(139) 0x42c0d8 MOV 0x10(%R9),%R8 |
(139) 0x42c0dc MOV 0x10(%RAX),%R9 |
(139) 0x42c0e0 MOV %R14,0x50(%RSP) |
(139) 0x42c0e5 IMUL %R12,%R10 |
(139) 0x42c0e9 MOV %R15,0x70(%RSP) |
(139) 0x42c0ee MOV %RCX,0x48(%RSP) |
(139) 0x42c0f3 IMUL %R12,%R11 |
(139) 0x42c0f7 MOV %R13,0x58(%RSP) |
(139) 0x42c0fc MOV %R9,0x78(%RSP) |
(139) 0x42c101 MOV %R10,0x60(%RSP) |
(139) 0x42c106 CMP $0x6,%EDI |
(139) 0x42c109 JBE 42c4a8 |
(139) 0x42c10f MOVSXD %ESI,%RAX |
(139) 0x42c112 ADD %RAX,%R13 |
(139) 0x42c115 ADD %RAX,%RCX |
(139) 0x42c118 LEA (%R11,%RAX,1),%RDI |
(139) 0x42c11c ADD %R10,%RAX |
(139) 0x42c11f MOV %EDX,%R10D |
(139) 0x42c122 LEA (%R15,%R13,8),%R15 |
(139) 0x42c126 LEA (%R9,%RAX,8),%R13 |
(139) 0x42c12a SAL $0x3,%RDI |
(139) 0x42c12e SHR $0x3,%R10D |
(139) 0x42c132 LEA (%R14,%RCX,8),%RCX |
(139) 0x42c136 LEA -0x8(%R8,%RDI,1),%R14 |
(139) 0x42c13b XOR %EAX,%EAX |
(139) 0x42c13d SAL $0x6,%R10 |
(139) 0x42c141 ADD %R8,%RDI |
(139) 0x42c144 LEA -0x40(%R10),%R9 |
(139) 0x42c148 SHR $0x6,%R9 |
(139) 0x42c14c INC %R9 |
(139) 0x42c14f AND $0x3,%R9D |
(139) 0x42c153 JE 42c1f6 |
(139) 0x42c159 CMP $0x1,%R9 |
(139) 0x42c15d JE 42c1be |
(139) 0x42c15f CMP $0x2,%R9 |
(139) 0x42c163 JE 42c18f |
(139) 0x42c165 VMOVUPD (%R15),%ZMM0 |
(139) 0x42c16b VMOVUPD (%RDI),%ZMM3 |
(139) 0x42c171 MOV $0x40,%EAX |
(139) 0x42c176 VFMSUB132PD (%RCX),%ZMM3,%ZMM0 |
(139) 0x42c17c VADDPD (%R14),%ZMM0,%ZMM1 |
(139) 0x42c182 VDIVPD (%R13),%ZMM1,%ZMM2 |
(139) 0x42c189 VMOVUPD %ZMM2,(%RCX) |
(139) 0x42c18f VMOVUPD (%R15,%RAX,1),%ZMM4 |
(139) 0x42c196 VMOVUPD (%RDI,%RAX,1),%ZMM5 |
(139) 0x42c19d VFMSUB132PD (%RCX,%RAX,1),%ZMM5,%ZMM4 |
(139) 0x42c1a4 VADDPD (%R14,%RAX,1),%ZMM4,%ZMM6 |
(139) 0x42c1ab VDIVPD (%R13,%RAX,1),%ZMM6,%ZMM7 |
(139) 0x42c1b3 VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(139) 0x42c1ba ADD $0x40,%RAX |
(139) 0x42c1be VMOVUPD (%R15,%RAX,1),%ZMM8 |
(139) 0x42c1c5 VMOVUPD (%RDI,%RAX,1),%ZMM9 |
(139) 0x42c1cc VFMSUB132PD (%RCX,%RAX,1),%ZMM9,%ZMM8 |
(139) 0x42c1d3 VADDPD (%R14,%RAX,1),%ZMM8,%ZMM10 |
(139) 0x42c1da VDIVPD (%R13,%RAX,1),%ZMM10,%ZMM11 |
(139) 0x42c1e2 VMOVUPD %ZMM11,(%RCX,%RAX,1) |
(139) 0x42c1e9 ADD $0x40,%RAX |
(139) 0x42c1ed CMP %R10,%RAX |
(139) 0x42c1f0 JE 42c2c0 |
(140) 0x42c1f6 VMOVUPD (%R15,%RAX,1),%ZMM12 |
(140) 0x42c1fd VMOVUPD (%RDI,%RAX,1),%ZMM13 |
(140) 0x42c204 VFMSUB132PD (%RCX,%RAX,1),%ZMM13,%ZMM12 |
(140) 0x42c20b VADDPD (%R14,%RAX,1),%ZMM12,%ZMM14 |
(140) 0x42c212 VDIVPD (%R13,%RAX,1),%ZMM14,%ZMM15 |
(140) 0x42c21a VMOVUPD %ZMM15,(%RCX,%RAX,1) |
(140) 0x42c221 VMOVUPD 0x40(%R15,%RAX,1),%ZMM0 |
(140) 0x42c229 VMOVUPD 0x40(%RDI,%RAX,1),%ZMM3 |
(140) 0x42c231 VFMSUB132PD 0x40(%RCX,%RAX,1),%ZMM3,%ZMM0 |
(140) 0x42c239 VADDPD 0x40(%R14,%RAX,1),%ZMM0,%ZMM1 |
(140) 0x42c241 VDIVPD 0x40(%R13,%RAX,1),%ZMM1,%ZMM2 |
(140) 0x42c249 VMOVUPD %ZMM2,0x40(%RCX,%RAX,1) |
(140) 0x42c251 VMOVUPD 0x80(%R15,%RAX,1),%ZMM5 |
(140) 0x42c259 VMOVUPD 0x80(%RDI,%RAX,1),%ZMM4 |
(140) 0x42c261 VFMSUB132PD 0x80(%RCX,%RAX,1),%ZMM4,%ZMM5 |
(140) 0x42c269 VADDPD 0x80(%R14,%RAX,1),%ZMM5,%ZMM6 |
(140) 0x42c271 VDIVPD 0x80(%R13,%RAX,1),%ZMM6,%ZMM7 |
(140) 0x42c279 VMOVUPD %ZMM7,0x80(%RCX,%RAX,1) |
(140) 0x42c281 VMOVUPD 0xc0(%R15,%RAX,1),%ZMM8 |
(140) 0x42c289 VMOVUPD 0xc0(%RDI,%RAX,1),%ZMM9 |
(140) 0x42c291 VFMSUB132PD 0xc0(%RCX,%RAX,1),%ZMM9,%ZMM8 |
(140) 0x42c299 VADDPD 0xc0(%R14,%RAX,1),%ZMM8,%ZMM10 |
(140) 0x42c2a1 VDIVPD 0xc0(%R13,%RAX,1),%ZMM10,%ZMM11 |
(140) 0x42c2a9 VMOVUPD %ZMM11,0xc0(%RCX,%RAX,1) |
(140) 0x42c2b1 ADD $0x100,%RAX |
(140) 0x42c2b7 CMP %R10,%RAX |
(140) 0x42c2ba JNE 42c1f6 |
(139) 0x42c2c0 MOV %EDX,%R13D |
(139) 0x42c2c3 AND $-0x8,%R13D |
(139) 0x42c2c7 ADD %R13D,%EBX |
(139) 0x42c2ca LEA (%R13,%RSI,1),%ECX |
(139) 0x42c2cf TEST $0x7,%DL |
(139) 0x42c2d2 JE 42c465 |
(139) 0x42c2d8 SUB %R13D,%EDX |
(139) 0x42c2db LEA -0x1(%RDX),%R15D |
(139) 0x42c2df CMP $0x2,%R15D |
(139) 0x42c2e3 JBE 42c358 |
(139) 0x42c2e5 MOV 0x48(%RSP),%RDI |
(139) 0x42c2ea MOVSXD %ESI,%RSI |
(139) 0x42c2ed MOV 0x50(%RSP),%R14 |
(139) 0x42c2f2 MOV 0x60(%RSP),%R9 |
(139) 0x42c2f7 MOV 0x58(%RSP),%RAX |
(139) 0x42c2fc ADD %RSI,%RDI |
(139) 0x42c2ff ADD %R13,%RDI |
(139) 0x42c302 LEA (%R9,%RSI,1),%R15 |
(139) 0x42c306 LEA (%R14,%RDI,8),%R10 |
(139) 0x42c30a LEA (%R11,%RSI,1),%RDI |
(139) 0x42c30e ADD %RAX,%RSI |
(139) 0x42c311 ADD %R13,%R15 |
(139) 0x42c314 ADD %R13,%RDI |
(139) 0x42c317 ADD %R13,%RSI |
(139) 0x42c31a VMOVUPD (%R10),%YMM12 |
(139) 0x42c31f MOV 0x70(%RSP),%R13 |
(139) 0x42c324 VMOVUPD -0x8(%R8,%RDI,8),%YMM13 |
(139) 0x42c32b VFMADD132PD (%R13,%RSI,8),%YMM13,%YMM12 |
(139) 0x42c332 MOV 0x78(%RSP),%RSI |
(139) 0x42c337 VSUBPD (%R8,%RDI,8),%YMM12,%YMM14 |
(139) 0x42c33d VDIVPD (%RSI,%R15,8),%YMM14,%YMM15 |
(139) 0x42c343 VMOVUPD %YMM15,(%R10) |
(139) 0x42c348 TEST $0x3,%DL |
(139) 0x42c34b JE 42c465 |
(139) 0x42c351 AND $-0x4,%EDX |
(139) 0x42c354 ADD %EDX,%EBX |
(139) 0x42c356 ADD %EDX,%ECX |
(139) 0x42c358 MOV 0x48(%RSP),%R15 |
(139) 0x42c35d MOVSXD %ECX,%RAX |
(139) 0x42c360 MOV 0x50(%RSP),%R14 |
(139) 0x42c365 LEA -0x1(%RCX),%R10D |
(139) 0x42c369 MOV 0x58(%RSP),%R9 |
(139) 0x42c36e MOVSXD %R10D,%R13 |
(139) 0x42c371 MOV 0x60(%RSP),%R10 |
(139) 0x42c376 LEA (%R15,%RAX,1),%RDX |
(139) 0x42c37a ADD %R11,%R13 |
(139) 0x42c37d LEA (%R14,%RDX,8),%RSI |
(139) 0x42c381 MOV 0x70(%RSP),%RDX |
(139) 0x42c386 LEA (%R9,%RAX,1),%RDI |
(139) 0x42c38a VMOVSD (%R8,%R13,8),%XMM3 |
(139) 0x42c390 MOV 0x78(%RSP),%R13 |
(139) 0x42c395 VMOVSD (%RDX,%RDI,8),%XMM0 |
(139) 0x42c39a LEA (%R11,%RAX,1),%RDI |
(139) 0x42c39e ADD %R10,%RAX |
(139) 0x42c3a1 LEA (%R8,%RDI,8),%RDX |
(139) 0x42c3a5 MOV 0x6c(%RSP),%EDI |
(139) 0x42c3a9 VFMADD132SD (%RSI),%XMM3,%XMM0 |
(139) 0x42c3ae VSUBSD (%RDX),%XMM0,%XMM1 |
(139) 0x42c3b2 VDIVSD (%R13,%RAX,8),%XMM1,%XMM2 |
(139) 0x42c3b9 VMOVSD %XMM2,(%RSI) |
(139) 0x42c3bd LEA 0x1(%RBX),%ESI |
(139) 0x42c3c0 LEA 0x1(%RCX),%EAX |
(139) 0x42c3c3 CMP %EDI,%ESI |
(139) 0x42c3c5 JAE 42c465 |
(139) 0x42c3cb CLTQ |
(139) 0x42c3cd MOV %R14,%R13 |
(139) 0x42c3d0 ADD $0x2,%EBX |
(139) 0x42c3d3 ADD $0x2,%ECX |
(139) 0x42c3d6 LEA (%R15,%RAX,1),%RSI |
(139) 0x42c3da LEA (%R14,%RSI,8),%RSI |
(139) 0x42c3de LEA (%R11,%RAX,1),%R14 |
(139) 0x42c3e2 LEA (%R8,%R14,8),%RDI |
(139) 0x42c3e6 MOV %R9,%R14 |
(139) 0x42c3e9 LEA (%R9,%RAX,1),%R9 |
(139) 0x42c3ed ADD %R10,%RAX |
(139) 0x42c3f0 MOV %RDI,0x60(%RSP) |
(139) 0x42c3f5 MOV 0x70(%RSP),%RDI |
(139) 0x42c3fa VMOVSD (%RDI,%R9,8),%XMM5 |
(139) 0x42c400 MOV 0x60(%RSP),%R9 |
(139) 0x42c405 VMOVSD (%R9),%XMM4 |
(139) 0x42c40a MOV %R10,%R9 |
(139) 0x42c40d MOV 0x78(%RSP),%R10 |
(139) 0x42c412 VFMSUB132SD (%RSI),%XMM4,%XMM5 |
(139) 0x42c417 VADDSD (%RDX),%XMM5,%XMM6 |
(139) 0x42c41b MOV 0x6c(%RSP),%EDX |
(139) 0x42c41f VDIVSD (%R10,%RAX,8),%XMM6,%XMM7 |
(139) 0x42c425 VMOVSD %XMM7,(%RSI) |
(139) 0x42c429 CMP %EDX,%EBX |
(139) 0x42c42b JAE 42c465 |
(139) 0x42c42d MOVSXD %ECX,%RCX |
(139) 0x42c430 MOV 0x60(%RSP),%RBX |
(139) 0x42c435 ADD %RCX,%R14 |
(139) 0x42c438 ADD %RCX,%R11 |
(139) 0x42c43b ADD %RCX,%R15 |
(139) 0x42c43e ADD %RCX,%R9 |
(139) 0x42c441 VMOVSD (%RDI,%R14,8),%XMM8 |
(139) 0x42c447 VMOVSD (%R8,%R11,8),%XMM9 |
(139) 0x42c44d LEA (%R13,%R15,8),%RAX |
(139) 0x42c452 VFMSUB132SD (%RAX),%XMM9,%XMM8 |
(139) 0x42c457 VADDSD (%RBX),%XMM8,%XMM10 |
(139) 0x42c45b VDIVSD (%R10,%R9,8),%XMM10,%XMM11 |
(139) 0x42c461 VMOVSD %XMM11,(%RAX) |
(139) 0x42c465 MOV 0x6c(%RSP),%EBX |
(139) 0x42c469 INC %R12 |
(139) 0x42c46c LEA (%R12),%R8D |
(139) 0x42c470 CMP %R8D,0x44(%RSP) |
(139) 0x42c475 JLE 42c490 |
(139) 0x42c477 MOV 0x3c(%RSP),%EDI |
(139) 0x42c47b MOV 0x68(%RSP),%EDX |
(139) 0x42c47f MOV 0x40(%RSP),%ESI |
(139) 0x42c483 SUB %EBX,%EDI |
(139) 0x42c485 JMP 42c090 |
0x42c48a NOPW (%RAX,%RAX,1) |
0x42c490 VZEROUPPER |
0x42c493 LEA -0x28(%RBP),%RSP |
0x42c497 POP %RBX |
0x42c498 POP %R12 |
0x42c49a POP %R13 |
0x42c49c POP %R14 |
0x42c49e POP %R15 |
0x42c4a0 POP %RBP |
0x42c4a1 RET |
0x42c4a2 NOPW (%RAX,%RAX,1) |
(139) 0x42c4a8 MOV %ESI,%ECX |
(139) 0x42c4aa XOR %R13D,%R13D |
(139) 0x42c4ad JMP 42c2d8 |
0x42c4b2 INC %EDI |
0x42c4b4 XOR %EDX,%EDX |
0x42c4b6 JMP 42c031 |
0x42c4bb NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.19 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.81 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_mom.cpp:146-149 |
Module | exec |
nb instructions | 79 |
nb uops | 89 |
loop length | 268 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 14.83 cycles |
front end | 14.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.00 | 5.87 | 5.70 | 8.00 | 8.00 | 8.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.00 | 5.87 | 5.70 | 8.00 | 8.00 | 8.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.07-14.18 |
Stall cycles | 0.00 |
Front-end | 14.83 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 14.83 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42c4b2 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x502> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RBX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x68(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ESI,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42c031 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:146-149 |
Module | exec |
nb instructions | 79 |
nb uops | 89 |
loop length | 268 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 14.83 cycles |
front end | 14.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.00 | 5.87 | 5.70 | 8.00 | 8.00 | 8.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.00 | 5.87 | 5.70 | 8.00 | 8.00 | 8.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.07-14.18 |
Stall cycles | 0.00 |
Front-end | 14.83 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 14.83 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42c4b2 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x502> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RBX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c493 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x68(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ESI,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42c031 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.7+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.39 | 1.12 |
▼Loop 139 - advec_mom.cpp:149-149 - exec– | 0.01 | 0.01 |
○Loop 140 - advec_mom.cpp:149-149 - exec | 3.37 | 1.11 |