Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.05% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.05% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
0x42c990 PUSH %RBP |
0x42c991 MOV %RSP,%RBP |
0x42c994 PUSH %R15 |
0x42c996 PUSH %R14 |
0x42c998 PUSH %R13 |
0x42c99a PUSH %R12 |
0x42c99c PUSH %RBX |
0x42c99d AND $-0x40,%RSP |
0x42c9a1 SUB $0x100,%RSP |
0x42c9a8 MOV 0x34(%RDI),%EAX |
0x42c9ab MOV 0x28(%RDI),%ESI |
0x42c9ae MOV 0x30(%RDI),%R12D |
0x42c9b2 MOV 0x2c(%RDI),%EDX |
0x42c9b5 ADD $0x4,%EAX |
0x42c9b8 LEA 0x1(%RSI),%ECX |
0x42c9bb MOV %EAX,0x68(%RSP) |
0x42c9bf MOV %ECX,0x64(%RSP) |
0x42c9c3 CMP %EAX,%R12D |
0x42c9c6 JGE 42d253 |
0x42c9cc MOV %EAX,%R14D |
0x42c9cf LEA 0x3(%RDX),%R15D |
0x42c9d3 SUB %R12D,%R14D |
0x42c9d6 CMP %R15D,%ECX |
0x42c9d9 JGE 42d253 |
0x42c9df MOV %RDI,%RBX |
0x42c9e2 MOV %R15D,%EDI |
0x42c9e5 SUB %ECX,%EDI |
0x42c9e7 MOV %EDI,0x6c(%RSP) |
0x42c9eb CALL 4046c0 <omp_get_num_threads@plt> |
0x42c9f0 MOV %EAX,%R13D |
0x42c9f3 CALL 4045b0 <omp_get_thread_num@plt> |
0x42c9f8 XOR %EDX,%EDX |
0x42c9fa MOV %EAX,%R8D |
0x42c9fd MOV 0x6c(%RSP),%EAX |
0x42ca01 IMUL %R14D,%EAX |
0x42ca05 DIV %R13D |
0x42ca08 MOV %EAX,%ECX |
0x42ca0a CMP %EDX,%R8D |
0x42ca0d JB 42d276 |
0x42ca13 IMUL %ECX,%R8D |
0x42ca17 LEA (%R8,%RDX,1),%R10D |
0x42ca1b LEA (%RCX,%R10,1),%R9D |
0x42ca1f MOV %R9D,0x60(%RSP) |
0x42ca24 CMP %R9D,%R10D |
0x42ca27 JAE 42d253 |
0x42ca2d MOV %R10D,%EAX |
0x42ca30 XOR %EDX,%EDX |
0x42ca32 MOV 0x64(%RSP),%R11D |
0x42ca37 MOV (%RBX),%RSI |
0x42ca3a DIVL 0x6c(%RSP) |
0x42ca3e MOV 0x10(%RBX),%R14 |
0x42ca42 MOV 0x8(%RBX),%RDI |
0x42ca46 MOV %R10D,0xfc(%RSP) |
0x42ca4e VMOVSD 0x35e72(%RIP),%XMM3 |
0x42ca56 MOV %RSI,0x40(%RSP) |
0x42ca5b MOV %R14,0x30(%RSP) |
0x42ca60 MOV %RDI,0x28(%RSP) |
0x42ca65 MOV %R15D,%R8D |
0x42ca68 MOV 0x20(%RBX),%R15 |
0x42ca6c MOV 0x18(%RBX),%RBX |
0x42ca70 VBROADCASTSD %XMM3,%YMM4 |
0x42ca75 VBROADCASTSD %XMM3,%ZMM2 |
0x42ca7b MOV %R15,0x38(%RSP) |
0x42ca80 MOV %RBX,0x20(%RSP) |
0x42ca85 ADD %R12D,%EAX |
0x42ca88 ADD %EDX,%R11D |
0x42ca8b MOV %EAX,0xac(%RSP) |
0x42ca92 CLTQ |
0x42ca94 SUB %R11D,%R8D |
0x42ca97 MOV %R11D,0xf8(%RSP) |
0x42ca9f MOV %RAX,0x88(%RSP) |
0x42caa7 NOPW (%RAX,%RAX,1) |
(143) 0x42cab0 CMP %R8D,%ECX |
(143) 0x42cab3 MOV 0xfc(%RSP),%R12D |
(143) 0x42cabb CMOVBE %ECX,%R8D |
(143) 0x42cabf LEA (%R12,%R8,1),%ECX |
(143) 0x42cac3 MOV %ECX,0xa8(%RSP) |
(143) 0x42caca CMP %ECX,%R12D |
(143) 0x42cacd JAE 42d20d |
(143) 0x42cad3 MOV 0x40(%RSP),%R10 |
(143) 0x42cad8 MOV 0x30(%RSP),%R14 |
(143) 0x42cadd MOV 0xac(%RSP),%EAX |
(143) 0x42cae4 MOV 0x38(%RSP),%R9 |
(143) 0x42cae9 MOV (%R10),%RCX |
(143) 0x42caec MOV 0x10(%R14),%RDX |
(143) 0x42caf0 MOV 0x10(%R10),%R13 |
(143) 0x42caf4 MOV 0x28(%RSP),%R10 |
(143) 0x42caf9 DEC %EAX |
(143) 0x42cafb CLTQ |
(143) 0x42cafd MOV (%R9),%R11 |
(143) 0x42cb00 MOV 0x10(%R9),%R15 |
(143) 0x42cb04 MOV %RDX,0xd8(%RSP) |
(143) 0x42cb0c MOV (%R10),%RDX |
(143) 0x42cb0f MOV %RAX,%RSI |
(143) 0x42cb12 MOV %RAX,%R9 |
(143) 0x42cb15 MOV 0x88(%RSP),%R12 |
(143) 0x42cb1d MOV (%R14),%RBX |
(143) 0x42cb20 IMUL %RCX,%RSI |
(143) 0x42cb24 MOV 0x10(%R10),%R14 |
(143) 0x42cb28 IMUL %RDX,%RAX |
(143) 0x42cb2c IMUL %R12,%RBX |
(143) 0x42cb30 MOV %R14,0xb8(%RSP) |
(143) 0x42cb38 IMUL %R11,%R9 |
(143) 0x42cb3c MOV %RSI,0x90(%RSP) |
(143) 0x42cb44 MOV %RAX,0xe0(%RSP) |
(143) 0x42cb4c MOV 0x20(%RSP),%RAX |
(143) 0x42cb51 IMUL %R12,%RCX |
(143) 0x42cb55 IMUL %R12,%R11 |
(143) 0x42cb59 MOV %RBX,0xb0(%RSP) |
(143) 0x42cb61 LEA -0x1(%R8),%EBX |
(143) 0x42cb65 IMUL %R12,%RDX |
(143) 0x42cb69 MOV 0x10(%RAX),%RDI |
(143) 0x42cb6d MOV %R9,0xc0(%RSP) |
(143) 0x42cb75 IMUL (%RAX),%R12 |
(143) 0x42cb79 MOV %RCX,0xc8(%RSP) |
(143) 0x42cb81 MOV %R11,0xd0(%RSP) |
(143) 0x42cb89 MOV %RDX,0xf0(%RSP) |
(143) 0x42cb91 MOV %RDI,0xe8(%RSP) |
(143) 0x42cb99 MOV %R12,0xa0(%RSP) |
(143) 0x42cba1 CMP $0x6,%EBX |
(143) 0x42cba4 JBE 42d268 |
(143) 0x42cbaa MOVSXD 0xf8(%RSP),%RAX |
(143) 0x42cbb2 LEA (%R11,%RAX,1),%RDI |
(143) 0x42cbb6 LEA (%RSI,%RAX,1),%R10 |
(143) 0x42cbba ADD %RAX,%RCX |
(143) 0x42cbbd SAL $0x3,%RDI |
(143) 0x42cbc1 SAL $0x3,%R10 |
(143) 0x42cbc5 LEA (%R9,%RAX,1),%RSI |
(143) 0x42cbc9 SAL $0x3,%RCX |
(143) 0x42cbcd LEA (%R15,%RDI,1),%R11 |
(143) 0x42cbd1 SAL $0x3,%RSI |
(143) 0x42cbd5 LEA -0x8(%R13,%R10,1),%RDX |
(143) 0x42cbda MOV %R11,0x78(%RSP) |
(143) 0x42cbdf LEA (%R13,%RCX,1),%R14 |
(143) 0x42cbe4 LEA -0x8(%R13,%RCX,1),%R11 |
(143) 0x42cbe9 MOV 0xb0(%RSP),%RCX |
(143) 0x42cbf1 LEA (%R15,%RSI,1),%R9 |
(143) 0x42cbf5 MOV %RDX,0x80(%RSP) |
(143) 0x42cbfd LEA (%R13,%R10,1),%RBX |
(143) 0x42cc02 MOV 0xe0(%RSP),%RDX |
(143) 0x42cc0a LEA -0x8(%R15,%RDI,1),%R10 |
(143) 0x42cc0f MOV 0xd8(%RSP),%RDI |
(143) 0x42cc17 MOV %R9,0x70(%RSP) |
(143) 0x42cc1c LEA -0x8(%R15,%RSI,1),%R9 |
(143) 0x42cc21 LEA (%RCX,%RAX,1),%RSI |
(143) 0x42cc25 MOV 0xb8(%RSP),%RCX |
(143) 0x42cc2d LEA (%RDI,%RSI,8),%RDI |
(143) 0x42cc31 LEA (%RDX,%RAX,1),%RSI |
(143) 0x42cc35 MOV 0xf0(%RSP),%RDX |
(143) 0x42cc3d LEA (%RCX,%RSI,8),%RCX |
(143) 0x42cc41 MOV 0xb8(%RSP),%RSI |
(143) 0x42cc49 LEA (%RDX,%RAX,1),%RDX |
(143) 0x42cc4d ADD %R12,%RAX |
(143) 0x42cc50 LEA (%RSI,%RDX,8),%RSI |
(143) 0x42cc54 MOV 0xe8(%RSP),%RDX |
(143) 0x42cc5c LEA (%RDX,%RAX,8),%RDX |
(143) 0x42cc60 MOV %R8D,%EAX |
(143) 0x42cc63 SHR $0x3,%EAX |
(143) 0x42cc66 MOV %RAX,%R12 |
(143) 0x42cc69 SAL $0x6,%RAX |
(143) 0x42cc6d MOV %RAX,0x98(%RSP) |
(143) 0x42cc75 XOR %EAX,%EAX |
(143) 0x42cc77 AND $0x1,%R12D |
(143) 0x42cc7b JE 42cd04 |
(143) 0x42cc81 MOV 0x78(%RSP),%RAX |
(143) 0x42cc86 VMOVUPD (%R10),%ZMM5 |
(143) 0x42cc8c MOV 0x70(%RSP),%R12 |
(143) 0x42cc91 VMOVUPD (%R9),%ZMM8 |
(143) 0x42cc97 VMOVUPD (%RAX),%ZMM6 |
(143) 0x42cc9d VMULPD (%R11),%ZMM5,%ZMM1 |
(143) 0x42cca3 VMOVUPD (%R12),%ZMM7 |
(143) 0x42ccaa MOV 0x80(%RSP),%RAX |
(143) 0x42ccb2 VMULPD (%R14),%ZMM6,%ZMM0 |
(143) 0x42ccb8 MOV 0x98(%RSP),%R12 |
(143) 0x42ccc0 VFMADD231PD (%RAX),%ZMM8,%ZMM1 |
(143) 0x42ccc6 MOV $0x40,%EAX |
(143) 0x42cccb VFMADD231PD (%RBX),%ZMM7,%ZMM0 |
(143) 0x42ccd1 VADDPD %ZMM1,%ZMM0,%ZMM9 |
(143) 0x42ccd7 VMULPD %ZMM2,%ZMM9,%ZMM10 |
(143) 0x42ccdd VMOVUPD %ZMM10,(%RDI) |
(143) 0x42cce3 VMOVUPD (%RSI),%ZMM11 |
(143) 0x42cce9 VSUBPD (%RCX),%ZMM11,%ZMM12 |
(143) 0x42ccef VADDPD %ZMM10,%ZMM12,%ZMM13 |
(143) 0x42ccf5 VMOVUPD %ZMM13,(%RDX) |
(143) 0x42ccfb CMP %R12,%RAX |
(143) 0x42ccfe JE 42ce22 |
(143) 0x42cd04 MOV %R15,0x50(%RSP) |
(143) 0x42cd09 MOV 0x70(%RSP),%R12 |
(143) 0x42cd0e MOV %R8D,0x5c(%RSP) |
(143) 0x42cd13 MOV 0x80(%RSP),%R8 |
(143) 0x42cd1b MOV %R13,0x48(%RSP) |
(143) 0x42cd20 MOV 0x78(%RSP),%R13 |
(144) 0x42cd25 VMOVUPD (%R13,%RAX,1),%ZMM14 |
(144) 0x42cd2d VMOVUPD (%R10,%RAX,1),%ZMM0 |
(144) 0x42cd34 VMOVUPD (%R12,%RAX,1),%ZMM6 |
(144) 0x42cd3b VMOVUPD (%R9,%RAX,1),%ZMM5 |
(144) 0x42cd42 VMULPD (%R14,%RAX,1),%ZMM14,%ZMM15 |
(144) 0x42cd49 MOV 0x98(%RSP),%R15 |
(144) 0x42cd51 VMULPD (%R11,%RAX,1),%ZMM0,%ZMM7 |
(144) 0x42cd58 VFMADD231PD (%RBX,%RAX,1),%ZMM6,%ZMM15 |
(144) 0x42cd5f VFMADD231PD (%R8,%RAX,1),%ZMM5,%ZMM7 |
(144) 0x42cd66 VADDPD %ZMM7,%ZMM15,%ZMM1 |
(144) 0x42cd6c VMULPD %ZMM2,%ZMM1,%ZMM8 |
(144) 0x42cd72 VMOVUPD %ZMM8,(%RDI,%RAX,1) |
(144) 0x42cd79 VMOVUPD (%RSI,%RAX,1),%ZMM9 |
(144) 0x42cd80 VSUBPD (%RCX,%RAX,1),%ZMM9,%ZMM10 |
(144) 0x42cd87 VADDPD %ZMM8,%ZMM10,%ZMM11 |
(144) 0x42cd8d VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(144) 0x42cd94 VMOVUPD 0x40(%R13,%RAX,1),%ZMM12 |
(144) 0x42cd9c VMOVUPD 0x40(%R10,%RAX,1),%ZMM15 |
(144) 0x42cda4 VMOVUPD 0x40(%R12,%RAX,1),%ZMM14 |
(144) 0x42cdac VMOVUPD 0x40(%R9,%RAX,1),%ZMM0 |
(144) 0x42cdb4 VMULPD 0x40(%R14,%RAX,1),%ZMM12,%ZMM13 |
(144) 0x42cdbc VMULPD 0x40(%R11,%RAX,1),%ZMM15,%ZMM6 |
(144) 0x42cdc4 VFMADD231PD 0x40(%RBX,%RAX,1),%ZMM14,%ZMM13 |
(144) 0x42cdcc VFMADD231PD 0x40(%R8,%RAX,1),%ZMM0,%ZMM6 |
(144) 0x42cdd4 VADDPD %ZMM6,%ZMM13,%ZMM7 |
(144) 0x42cdda VMULPD %ZMM2,%ZMM7,%ZMM8 |
(144) 0x42cde0 VMOVUPD %ZMM8,0x40(%RDI,%RAX,1) |
(144) 0x42cde8 VMOVUPD 0x40(%RSI,%RAX,1),%ZMM5 |
(144) 0x42cdf0 VSUBPD 0x40(%RCX,%RAX,1),%ZMM5,%ZMM1 |
(144) 0x42cdf8 VADDPD %ZMM8,%ZMM1,%ZMM9 |
(144) 0x42cdfe VMOVUPD %ZMM9,0x40(%RDX,%RAX,1) |
(144) 0x42ce06 SUB $-0x80,%RAX |
(144) 0x42ce0a CMP %R15,%RAX |
(144) 0x42ce0d JNE 42cd25 |
(143) 0x42ce13 MOV 0x5c(%RSP),%R8D |
(143) 0x42ce18 MOV 0x50(%RSP),%R15 |
(143) 0x42ce1d MOV 0x48(%RSP),%R13 |
(143) 0x42ce22 MOV 0xf8(%RSP),%EBX |
(143) 0x42ce29 MOV %R8D,%EDX |
(143) 0x42ce2c AND $-0x8,%EDX |
(143) 0x42ce2f ADD %EDX,0xfc(%RSP) |
(143) 0x42ce36 LEA (%RDX,%RBX,1),%ECX |
(143) 0x42ce39 TEST $0x7,%R8B |
(143) 0x42ce3d JE 42d1fd |
(143) 0x42ce43 MOV %R8D,%ESI |
(143) 0x42ce46 SUB %EDX,%ESI |
(143) 0x42ce48 LEA -0x1(%RSI),%R14D |
(143) 0x42ce4c CMP $0x2,%R14D |
(143) 0x42ce50 JBE 42cf5b |
(143) 0x42ce56 MOVSXD 0xf8(%RSP),%RAX |
(143) 0x42ce5e MOV 0x90(%RSP),%R9 |
(143) 0x42ce66 MOV 0xc8(%RSP),%R11 |
(143) 0x42ce6e MOV 0xd0(%RSP),%R10 |
(143) 0x42ce76 LEA (%R9,%RAX,1),%R8 |
(143) 0x42ce7a MOV 0xc0(%RSP),%RDI |
(143) 0x42ce82 MOV 0xe0(%RSP),%RBX |
(143) 0x42ce8a LEA (%R11,%RAX,1),%R9 |
(143) 0x42ce8e LEA (%R10,%RAX,1),%R10 |
(143) 0x42ce92 ADD %RDX,%R8 |
(143) 0x42ce95 MOV 0xb0(%RSP),%R12 |
(143) 0x42ce9d ADD %RDX,%R9 |
(143) 0x42cea0 ADD %RDX,%R10 |
(143) 0x42cea3 VMOVUPD (%R13,%R8,8),%YMM12 |
(143) 0x42ceaa VMOVUPD -0x8(%R13,%R8,8),%YMM15 |
(143) 0x42ceb1 VMOVUPD (%R13,%R9,8),%YMM10 |
(143) 0x42ceb8 VMOVUPD -0x8(%R15,%R10,8),%YMM13 |
(143) 0x42cebf ADD %RAX,%RDI |
(143) 0x42cec2 LEA (%RBX,%RAX,1),%R11 |
(143) 0x42cec6 ADD %RDX,%RDI |
(143) 0x42cec9 MOV 0xf0(%RSP),%RBX |
(143) 0x42ced1 MOV 0xa0(%RSP),%R14 |
(143) 0x42ced9 LEA (%R12,%RAX,1),%R12 |
(143) 0x42cedd VMULPD (%R15,%R10,8),%YMM10,%YMM11 |
(143) 0x42cee3 ADD %RDX,%R12 |
(143) 0x42cee6 ADD %RDX,%R11 |
(143) 0x42cee9 MOV 0xb8(%RSP),%R8 |
(143) 0x42cef1 VMULPD -0x8(%R13,%R9,8),%YMM13,%YMM14 |
(143) 0x42cef8 ADD %RAX,%RBX |
(143) 0x42cefb ADD %R14,%RAX |
(143) 0x42cefe ADD %RDX,%RBX |
(143) 0x42cf01 ADD %RDX,%RAX |
(143) 0x42cf04 MOV 0xd8(%RSP),%RDX |
(143) 0x42cf0c VFMADD231PD (%R15,%RDI,8),%YMM12,%YMM11 |
(143) 0x42cf12 VFMADD231PD -0x8(%R15,%RDI,8),%YMM15,%YMM14 |
(143) 0x42cf19 MOV 0xe8(%RSP),%RDI |
(143) 0x42cf21 VADDPD %YMM14,%YMM11,%YMM6 |
(143) 0x42cf26 VMULPD %YMM4,%YMM6,%YMM0 |
(143) 0x42cf2a VMOVUPD %YMM0,(%RDX,%R12,8) |
(143) 0x42cf30 VMOVUPD (%R8,%RBX,8),%YMM7 |
(143) 0x42cf36 VSUBPD (%R8,%R11,8),%YMM7,%YMM8 |
(143) 0x42cf3c VADDPD %YMM0,%YMM8,%YMM5 |
(143) 0x42cf40 VMOVUPD %YMM5,(%RDI,%RAX,8) |
(143) 0x42cf45 TEST $0x3,%SIL |
(143) 0x42cf49 JE 42d1fd |
(143) 0x42cf4f AND $-0x4,%ESI |
(143) 0x42cf52 ADD %ESI,0xfc(%RSP) |
(143) 0x42cf59 ADD %ESI,%ECX |
(143) 0x42cf5b MOV 0xc0(%RSP),%R10 |
(143) 0x42cf63 MOV 0xd0(%RSP),%RBX |
(143) 0x42cf6b MOVSXD %ECX,%RAX |
(143) 0x42cf6e LEA -0x1(%RCX),%EDX |
(143) 0x42cf71 MOVSXD %EDX,%RDX |
(143) 0x42cf74 MOV 0xc8(%RSP),%R14 |
(143) 0x42cf7c MOV 0x90(%RSP),%R12 |
(143) 0x42cf84 LEA (%R10,%RAX,1),%R9 |
(143) 0x42cf88 ADD %RAX,%RBX |
(143) 0x42cf8b LEA (%R15,%R9,8),%RDI |
(143) 0x42cf8f LEA (%R15,%RBX,8),%R9 |
(143) 0x42cf93 MOV 0xd0(%RSP),%RBX |
(143) 0x42cf9b LEA (%R14,%RAX,1),%R11 |
(143) 0x42cf9f VMOVSD (%R9),%XMM1 |
(143) 0x42cfa4 VMOVSD (%RDI),%XMM10 |
(143) 0x42cfa8 LEA (%R12,%RAX,1),%RSI |
(143) 0x42cfac ADD %RDX,%RBX |
(143) 0x42cfaf LEA (%R13,%R11,8),%R8 |
(143) 0x42cfb4 LEA (%RDX,%R10,1),%R11 |
(143) 0x42cfb8 VMOVSD (%R15,%RBX,8),%XMM11 |
(143) 0x42cfbe LEA (%RDX,%R12,1),%R10 |
(143) 0x42cfc2 ADD %R14,%RDX |
(143) 0x42cfc5 VMOVSD (%R15,%R11,8),%XMM13 |
(143) 0x42cfcb VMULSD (%R8),%XMM1,%XMM9 |
(143) 0x42cfd0 LEA (%R13,%RSI,8),%RSI |
(143) 0x42cfd5 MOV 0xb0(%RSP),%R14 |
(143) 0x42cfdd VMULSD (%R13,%RDX,8),%XMM11,%XMM12 |
(143) 0x42cfe4 MOV 0xd8(%RSP),%R11 |
(143) 0x42cfec LEA (%R14,%RAX,1),%RDX |
(143) 0x42cff0 MOV 0xe0(%RSP),%R14 |
(143) 0x42cff8 VFMADD231SD (%RSI),%XMM10,%XMM9 |
(143) 0x42cffd VFMADD231SD (%R13,%R10,8),%XMM13,%XMM12 |
(143) 0x42d004 MOV 0xa0(%RSP),%R10 |
(143) 0x42d00c LEA (%R10,%RAX,1),%RBX |
(143) 0x42d010 MOV 0xa8(%RSP),%R10D |
(143) 0x42d018 VADDSD %XMM12,%XMM9,%XMM14 |
(143) 0x42d01d VMULSD %XMM3,%XMM14,%XMM15 |
(143) 0x42d021 VMOVSD %XMM15,(%R11,%RDX,8) |
(143) 0x42d027 MOV 0xf0(%RSP),%RDX |
(143) 0x42d02f MOV 0xfc(%RSP),%R11D |
(143) 0x42d037 ADD %RAX,%RDX |
(143) 0x42d03a ADD %R14,%RAX |
(143) 0x42d03d MOV 0xb8(%RSP),%R14 |
(143) 0x42d045 INC %R11D |
(143) 0x42d048 VMOVSD (%R14,%RDX,8),%XMM6 |
(143) 0x42d04e VSUBSD (%R14,%RAX,8),%XMM6,%XMM0 |
(143) 0x42d054 MOV 0xe8(%RSP),%RAX |
(143) 0x42d05c VADDSD %XMM15,%XMM0,%XMM7 |
(143) 0x42d061 VMOVSD %XMM7,(%RAX,%RBX,8) |
(143) 0x42d066 LEA 0x1(%RCX),%EAX |
(143) 0x42d069 CMP %R10D,%R11D |
(143) 0x42d06c JAE 42d1fd |
(143) 0x42d072 MOV 0xc0(%RSP),%R11 |
(143) 0x42d07a CLTQ |
(143) 0x42d07c VMOVSD (%R9),%XMM9 |
(143) 0x42d081 ADD $0x2,%ECX |
(143) 0x42d084 LEA (%R12,%RAX,1),%RBX |
(143) 0x42d088 VMOVSD (%RDI),%XMM11 |
(143) 0x42d08c MOV 0xb0(%RSP),%R9 |
(143) 0x42d094 LEA (%R11,%RAX,1),%R10 |
(143) 0x42d098 MOV 0xd0(%RSP),%R11 |
(143) 0x42d0a0 LEA (%R13,%RBX,8),%RDX |
(143) 0x42d0a5 MOV 0xc8(%RSP),%RBX |
(143) 0x42d0ad VMULSD (%R8),%XMM9,%XMM10 |
(143) 0x42d0b2 LEA (%R15,%R10,8),%R10 |
(143) 0x42d0b6 MOV 0xd8(%RSP),%RDI |
(143) 0x42d0be ADD %RAX,%R11 |
(143) 0x42d0c1 ADD %RAX,%RBX |
(143) 0x42d0c4 VMOVSD (%R10),%XMM1 |
(143) 0x42d0c9 MOV 0xf0(%RSP),%R8 |
(143) 0x42d0d1 LEA (%R15,%R11,8),%R11 |
(143) 0x42d0d5 LEA (%R13,%RBX,8),%RBX |
(143) 0x42d0da VMOVSD (%R11),%XMM8 |
(143) 0x42d0df VMULSD (%RBX),%XMM8,%XMM5 |
(143) 0x42d0e3 VFMADD231SD (%RSI),%XMM11,%XMM10 |
(143) 0x42d0e8 LEA (%R9,%RAX,1),%RSI |
(143) 0x42d0ec VFMADD132SD (%RDX),%XMM5,%XMM1 |
(143) 0x42d0f1 VADDSD %XMM1,%XMM10,%XMM12 |
(143) 0x42d0f5 VMULSD %XMM3,%XMM12,%XMM13 |
(143) 0x42d0f9 VMOVSD %XMM13,(%RDI,%RSI,8) |
(143) 0x42d0fe LEA (%R8,%RAX,1),%RDI |
(143) 0x42d102 MOV 0xa0(%RSP),%RSI |
(143) 0x42d10a MOV 0xe0(%RSP),%R8 |
(143) 0x42d112 VMOVSD (%R14,%RDI,8),%XMM14 |
(143) 0x42d118 MOV 0xfc(%RSP),%EDI |
(143) 0x42d11f ADD %RAX,%RSI |
(143) 0x42d122 ADD %R8,%RAX |
(143) 0x42d125 VSUBSD (%R14,%RAX,8),%XMM14,%XMM15 |
(143) 0x42d12b MOV 0xe8(%RSP),%RAX |
(143) 0x42d133 ADD $0x2,%EDI |
(143) 0x42d136 VADDSD %XMM13,%XMM15,%XMM6 |
(143) 0x42d13b VMOVSD %XMM6,(%RAX,%RSI,8) |
(143) 0x42d140 MOV 0xa8(%RSP),%ESI |
(143) 0x42d147 CMP %ESI,%EDI |
(143) 0x42d149 JAE 42d1fd |
(143) 0x42d14f MOV 0xd0(%RSP),%RDI |
(143) 0x42d157 MOVSXD %ECX,%RCX |
(143) 0x42d15a MOV 0xc8(%RSP),%R8 |
(143) 0x42d162 VMOVSD (%RBX),%XMM5 |
(143) 0x42d166 MOV 0xc0(%RSP),%RAX |
(143) 0x42d16e ADD %RCX,%R12 |
(143) 0x42d171 ADD %RCX,%R9 |
(143) 0x42d174 ADD %RCX,%RDI |
(143) 0x42d177 ADD %RCX,%R8 |
(143) 0x42d17a VMOVSD (%R10),%XMM9 |
(143) 0x42d17f MOV 0xe0(%RSP),%R10 |
(143) 0x42d187 VMOVSD (%R15,%RDI,8),%XMM0 |
(143) 0x42d18d VMULSD (%R11),%XMM5,%XMM1 |
(143) 0x42d192 ADD %RCX,%RAX |
(143) 0x42d195 VMOVSD (%R15,%RAX,8),%XMM7 |
(143) 0x42d19b ADD %RCX,%R10 |
(143) 0x42d19e MOV 0xa0(%RSP),%R15 |
(143) 0x42d1a6 VMULSD (%R13,%R8,8),%XMM0,%XMM8 |
(143) 0x42d1ad ADD %RCX,%R15 |
(143) 0x42d1b0 VFMADD231SD (%RDX),%XMM9,%XMM1 |
(143) 0x42d1b5 MOV 0xf0(%RSP),%RDX |
(143) 0x42d1bd VFMADD231SD (%R13,%R12,8),%XMM7,%XMM8 |
(143) 0x42d1c4 MOV 0xd8(%RSP),%R13 |
(143) 0x42d1cc ADD %RCX,%RDX |
(143) 0x42d1cf VADDSD %XMM1,%XMM8,%XMM10 |
(143) 0x42d1d3 VMULSD %XMM3,%XMM10,%XMM11 |
(143) 0x42d1d7 VMOVSD %XMM11,(%R13,%R9,8) |
(143) 0x42d1de VMOVSD (%R14,%RDX,8),%XMM12 |
(143) 0x42d1e4 VSUBSD (%R14,%R10,8),%XMM12,%XMM13 |
(143) 0x42d1ea MOV 0xe8(%RSP),%R14 |
(143) 0x42d1f2 VADDSD %XMM11,%XMM13,%XMM14 |
(143) 0x42d1f7 VMOVSD %XMM14,(%R14,%R15,8) |
(143) 0x42d1fd MOV 0xa8(%RSP),%R11D |
(143) 0x42d205 MOV %R11D,0xfc(%RSP) |
(143) 0x42d20d INCL 0xac(%RSP) |
(143) 0x42d214 INCQ 0x88(%RSP) |
(143) 0x42d21c MOV 0xac(%RSP),%ESI |
(143) 0x42d223 CMP %ESI,0x68(%RSP) |
(143) 0x42d227 JLE 42d250 |
(143) 0x42d229 MOV 0x60(%RSP),%ECX |
(143) 0x42d22d MOV 0xfc(%RSP),%EAX |
(143) 0x42d234 MOV 0x64(%RSP),%R12D |
(143) 0x42d239 MOV 0x6c(%RSP),%R8D |
(143) 0x42d23e SUB %EAX,%ECX |
(143) 0x42d240 MOV %R12D,0xf8(%RSP) |
(143) 0x42d248 JMP 42cab0 |
0x42d24d NOPL (%RAX) |
0x42d250 VZEROUPPER |
0x42d253 LEA -0x28(%RBP),%RSP |
0x42d257 POP %RBX |
0x42d258 POP %R12 |
0x42d25a POP %R13 |
0x42d25c POP %R14 |
0x42d25e POP %R15 |
0x42d260 POP %RBP |
0x42d261 RET |
0x42d262 NOPW (%RAX,%RAX,1) |
(143) 0x42d268 MOV 0xf8(%RSP),%ECX |
(143) 0x42d26f XOR %EDX,%EDX |
(143) 0x42d271 JMP 42ce43 |
0x42d276 INC %ECX |
0x42d278 XOR %EDX,%EDX |
0x42d27a JMP 42ca13 |
0x42d27f NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.43 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.57 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42d276 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x35e72(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ca13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42d276 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42d253 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x35e72(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ca13 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.05 | 2.28 |
▼Loop 143 - advec_mom.cpp:169-172 - exec– | 0 | 0 |
○Loop 144 - advec_mom.cpp:170-172 - exec | 3.04 | 2.27 |