Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.36% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:136-140 [...] | Coverage: 1.36% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 136 - 140 |
-------------------------------------------------------------------------------- |
136: #pragma omp parallel for simd collapse(2) |
137: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
138: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
139: pre_vol(i, j) = volume(i, j) + (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
140: post_vol(i, j) = pre_vol(i, j) - (vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x426da0 PUSH %RBP |
0x426da1 MOV %RSP,%RBP |
0x426da4 PUSH %R15 |
0x426da6 PUSH %R14 |
0x426da8 PUSH %R13 |
0x426daa PUSH %R12 |
0x426dac MOV %RDI,%R12 |
0x426daf PUSH %RBX |
0x426db0 AND $-0x40,%RSP |
0x426db4 SUB $0xc0,%RSP |
0x426dbb MOV 0x30(%RDI),%EAX |
0x426dbe MOV 0x34(%RDI),%ECX |
0x426dc1 MOV 0x28(%RDI),%EDI |
0x426dc4 MOV 0x2c(%R12),%EDX |
0x426dc9 ADD $0x4,%ECX |
0x426dcc LEA -0x1(%RAX),%R15D |
0x426dd0 DEC %EDI |
0x426dd2 MOV %ECX,0x5c(%RSP) |
0x426dd6 MOV %EDI,0x58(%RSP) |
0x426dda CMP %ECX,%R15D |
0x426ddd JGE 42754b |
0x426de3 MOV %ECX,%EBX |
0x426de5 LEA 0x4(%RDX),%R14D |
0x426de9 SUB %R15D,%EBX |
0x426dec CMP %R14D,%EDI |
0x426def JGE 42754b |
0x426df5 MOV %R14D,%ESI |
0x426df8 SUB %EDI,%ESI |
0x426dfa MOV %ESI,0x78(%RSP) |
0x426dfe CALL 4046c0 <omp_get_num_threads@plt> |
0x426e03 MOV %EAX,%R13D |
0x426e06 CALL 4045b0 <omp_get_thread_num@plt> |
0x426e0b XOR %EDX,%EDX |
0x426e0d MOV %EAX,%R8D |
0x426e10 MOV 0x78(%RSP),%EAX |
0x426e14 IMUL %EBX,%EAX |
0x426e17 DIV %R13D |
0x426e1a MOV %EAX,%R13D |
0x426e1d CMP %EDX,%R8D |
0x426e20 JB 42757f |
0x426e26 IMUL %R13D,%R8D |
0x426e2a LEA (%R8,%RDX,1),%R9D |
0x426e2e LEA (%R13,%R9,1),%R10D |
0x426e33 MOV %R10D,0x54(%RSP) |
0x426e38 CMP %R10D,%R9D |
0x426e3b JAE 42754b |
0x426e41 MOV %R9D,%EAX |
0x426e44 XOR %EDX,%EDX |
0x426e46 MOV 0x58(%RSP),%R11D |
0x426e4b MOV (%R12),%RCX |
0x426e4f DIVL 0x78(%RSP) |
0x426e53 MOV 0x10(%R12),%RDI |
0x426e58 MOV 0x18(%R12),%RBX |
0x426e5d MOV %RCX,0x48(%RSP) |
0x426e62 MOV %RDI,0x40(%RSP) |
0x426e67 MOV %RBX,0x30(%RSP) |
0x426e6c MOV %R14D,%R10D |
0x426e6f MOV 0x8(%R12),%R14 |
0x426e74 MOV 0x20(%R12),%R12 |
0x426e79 MOV %R14,0x38(%RSP) |
0x426e7e MOV %R12,0x28(%RSP) |
0x426e83 ADD %EDX,%R11D |
0x426e86 LEA (%RAX,%R15,1),%R15D |
0x426e8a MOV %R11D,0xa0(%RSP) |
0x426e92 SUB %R11D,%R10D |
0x426e95 MOVSXD %R15D,%RCX |
0x426e98 NOPL (%RAX,%RAX,1) |
(114) 0x426ea0 CMP %R10D,%R13D |
(114) 0x426ea3 CMOVBE %R13D,%R10D |
(114) 0x426ea7 LEA (%R9,%R10,1),%ESI |
(114) 0x426eab MOV %R10D,%EDX |
(114) 0x426eae MOV %ESI,0x7c(%RSP) |
(114) 0x426eb2 CMP %ESI,%R9D |
(114) 0x426eb5 JAE 427560 |
(114) 0x426ebb MOV 0x40(%RSP),%R10 |
(114) 0x426ec0 MOV 0x48(%RSP),%R8 |
(114) 0x426ec5 LEA 0x1(%RCX),%RSI |
(114) 0x426ec9 MOV 0x38(%RSP),%R11 |
(114) 0x426ece MOV 0x30(%RSP),%R12 |
(114) 0x426ed3 MOV %RSI,0x60(%RSP) |
(114) 0x426ed8 MOV (%R10),%RAX |
(114) 0x426edb MOV 0x10(%R8),%R15 |
(114) 0x426edf MOV (%R8),%R14 |
(114) 0x426ee2 MOV 0x10(%R12),%R13 |
(114) 0x426ee7 IMUL %RAX,%RSI |
(114) 0x426eeb MOV 0x10(%R10),%RDI |
(114) 0x426eef MOV 0x10(%R11),%R8 |
(114) 0x426ef3 MOV %R15,0x90(%RSP) |
(114) 0x426efb MOV 0x28(%RSP),%R10 |
(114) 0x426f00 MOV (%R11),%R11 |
(114) 0x426f03 IMUL %RCX,%R14 |
(114) 0x426f07 MOV %R13,0xa8(%RSP) |
(114) 0x426f0f MOV (%R12),%R12 |
(114) 0x426f13 IMUL %RCX,%R11 |
(114) 0x426f17 MOV %RSI,%RBX |
(114) 0x426f1a MOV %RSI,0x68(%RSP) |
(114) 0x426f1f IMUL %RCX,%R12 |
(114) 0x426f23 SUB %RAX,%RBX |
(114) 0x426f26 MOV 0x10(%R10),%RAX |
(114) 0x426f2a MOV %R14,0x88(%RSP) |
(114) 0x426f32 IMUL (%R10),%RCX |
(114) 0x426f36 MOV %RBX,0x98(%RSP) |
(114) 0x426f3e MOV %R11,0x70(%RSP) |
(114) 0x426f43 MOV %R12,0x80(%RSP) |
(114) 0x426f4b MOV %RCX,0xb8(%RSP) |
(114) 0x426f53 LEA -0x1(%RDX),%ECX |
(114) 0x426f56 MOV %RAX,0xb0(%RSP) |
(114) 0x426f5e CMP $0x6,%ECX |
(114) 0x426f61 JBE 427570 |
(114) 0x426f67 MOVSXD 0xa0(%RSP),%RAX |
(114) 0x426f6f LEA (%RBX,%RAX,1),%RBX |
(114) 0x426f73 LEA 0x1(%R11,%RAX,1),%R11 |
(114) 0x426f78 LEA (%RSI,%RAX,1),%R10 |
(114) 0x426f7c LEA (%R14,%RAX,1),%R14 |
(114) 0x426f80 SAL $0x3,%R11 |
(114) 0x426f84 LEA (%RDI,%RBX,8),%RSI |
(114) 0x426f88 MOV 0xb8(%RSP),%RBX |
(114) 0x426f90 LEA (%R15,%R14,8),%R15 |
(114) 0x426f94 LEA (%R8,%R11,1),%R13 |
(114) 0x426f98 LEA -0x8(%R8,%R11,1),%R14 |
(114) 0x426f9d MOV 0xb0(%RSP),%R11 |
(114) 0x426fa5 LEA (%R12,%RAX,1),%R12 |
(114) 0x426fa9 ADD %RBX,%RAX |
(114) 0x426fac LEA (%RDI,%R10,8),%RCX |
(114) 0x426fb0 MOV 0xa8(%RSP),%R10 |
(114) 0x426fb8 LEA (%R11,%RAX,8),%RBX |
(114) 0x426fbc MOV %EDX,%R11D |
(114) 0x426fbf XOR %EAX,%EAX |
(114) 0x426fc1 SHR $0x3,%R11D |
(114) 0x426fc5 LEA (%R10,%R12,8),%R12 |
(114) 0x426fc9 SAL $0x6,%R11 |
(114) 0x426fcd LEA -0x40(%R11),%R10 |
(114) 0x426fd1 SHR $0x6,%R10 |
(114) 0x426fd5 INC %R10 |
(114) 0x426fd8 AND $0x3,%R10D |
(114) 0x426fdc JE 4270e4 |
(114) 0x426fe2 CMP $0x1,%R10 |
(114) 0x426fe6 JE 42708b |
(114) 0x426fec CMP $0x2,%R10 |
(114) 0x426ff0 JE 42703b |
(114) 0x426ff2 VMOVUPD (%RCX),%ZMM3 |
(114) 0x426ff8 VMOVUPD (%R14),%ZMM6 |
(114) 0x426ffe MOV $0x40,%EAX |
(114) 0x427003 VADDPD (%R15),%ZMM3,%ZMM0 |
(114) 0x427009 VADDPD (%RSI),%ZMM6,%ZMM1 |
(114) 0x42700f VSUBPD %ZMM1,%ZMM0,%ZMM2 |
(114) 0x427015 VADDPD (%R13),%ZMM2,%ZMM4 |
(114) 0x42701c VMOVUPD %ZMM4,(%R12) |
(114) 0x427023 VMOVUPD (%RSI),%ZMM7 |
(114) 0x427029 VSUBPD (%RCX),%ZMM7,%ZMM5 |
(114) 0x42702f VADDPD %ZMM4,%ZMM5,%ZMM8 |
(114) 0x427035 VMOVUPD %ZMM8,(%RBX) |
(114) 0x42703b VMOVUPD (%RCX,%RAX,1),%ZMM9 |
(114) 0x427042 VMOVUPD (%R14,%RAX,1),%ZMM11 |
(114) 0x427049 VADDPD (%R15,%RAX,1),%ZMM9,%ZMM10 |
(114) 0x427050 VADDPD (%RSI,%RAX,1),%ZMM11,%ZMM12 |
(114) 0x427057 VSUBPD %ZMM12,%ZMM10,%ZMM13 |
(114) 0x42705d VADDPD (%R13,%RAX,1),%ZMM13,%ZMM14 |
(114) 0x427065 VMOVUPD %ZMM14,(%R12,%RAX,1) |
(114) 0x42706c VMOVUPD (%RSI,%RAX,1),%ZMM15 |
(114) 0x427073 VSUBPD (%RCX,%RAX,1),%ZMM15,%ZMM3 |
(114) 0x42707a VADDPD %ZMM14,%ZMM3,%ZMM0 |
(114) 0x427080 VMOVUPD %ZMM0,(%RBX,%RAX,1) |
(114) 0x427087 ADD $0x40,%RAX |
(114) 0x42708b VMOVUPD (%RCX,%RAX,1),%ZMM6 |
(114) 0x427092 VMOVUPD (%R14,%RAX,1),%ZMM1 |
(114) 0x427099 VADDPD (%R15,%RAX,1),%ZMM6,%ZMM2 |
(114) 0x4270a0 VADDPD (%RSI,%RAX,1),%ZMM1,%ZMM4 |
(114) 0x4270a7 VSUBPD %ZMM4,%ZMM2,%ZMM7 |
(114) 0x4270ad VADDPD (%R13,%RAX,1),%ZMM7,%ZMM5 |
(114) 0x4270b5 VMOVUPD %ZMM5,(%R12,%RAX,1) |
(114) 0x4270bc VMOVUPD (%RSI,%RAX,1),%ZMM8 |
(114) 0x4270c3 VSUBPD (%RCX,%RAX,1),%ZMM8,%ZMM9 |
(114) 0x4270ca VADDPD %ZMM5,%ZMM9,%ZMM10 |
(114) 0x4270d0 VMOVUPD %ZMM10,(%RBX,%RAX,1) |
(114) 0x4270d7 ADD $0x40,%RAX |
(114) 0x4270db CMP %RAX,%R11 |
(114) 0x4270de JE 42723b |
(115) 0x4270e4 VMOVUPD (%RCX,%RAX,1),%ZMM11 |
(115) 0x4270eb VMOVUPD (%R14,%RAX,1),%ZMM13 |
(115) 0x4270f2 VADDPD (%R15,%RAX,1),%ZMM11,%ZMM12 |
(115) 0x4270f9 VADDPD (%RSI,%RAX,1),%ZMM13,%ZMM14 |
(115) 0x427100 VSUBPD %ZMM14,%ZMM12,%ZMM15 |
(115) 0x427106 VADDPD (%R13,%RAX,1),%ZMM15,%ZMM3 |
(115) 0x42710e VMOVUPD %ZMM3,(%R12,%RAX,1) |
(115) 0x427115 VMOVUPD (%RSI,%RAX,1),%ZMM0 |
(115) 0x42711c VSUBPD (%RCX,%RAX,1),%ZMM0,%ZMM6 |
(115) 0x427123 VADDPD %ZMM3,%ZMM6,%ZMM2 |
(115) 0x427129 VMOVUPD %ZMM2,(%RBX,%RAX,1) |
(115) 0x427130 VMOVUPD 0x40(%RCX,%RAX,1),%ZMM1 |
(115) 0x427138 VMOVUPD 0x40(%R14,%RAX,1),%ZMM4 |
(115) 0x427140 VADDPD 0x40(%R15,%RAX,1),%ZMM1,%ZMM7 |
(115) 0x427148 VADDPD 0x40(%RSI,%RAX,1),%ZMM4,%ZMM5 |
(115) 0x427150 VSUBPD %ZMM5,%ZMM7,%ZMM8 |
(115) 0x427156 VADDPD 0x40(%R13,%RAX,1),%ZMM8,%ZMM9 |
(115) 0x42715e VMOVUPD %ZMM9,0x40(%R12,%RAX,1) |
(115) 0x427166 VMOVUPD 0x40(%RSI,%RAX,1),%ZMM10 |
(115) 0x42716e VSUBPD 0x40(%RCX,%RAX,1),%ZMM10,%ZMM11 |
(115) 0x427176 VADDPD %ZMM9,%ZMM11,%ZMM12 |
(115) 0x42717c VMOVUPD %ZMM12,0x40(%RBX,%RAX,1) |
(115) 0x427184 VMOVUPD 0x80(%RCX,%RAX,1),%ZMM13 |
(115) 0x42718c VMOVUPD 0x80(%R14,%RAX,1),%ZMM15 |
(115) 0x427194 VADDPD 0x80(%R15,%RAX,1),%ZMM13,%ZMM14 |
(115) 0x42719c VADDPD 0x80(%RSI,%RAX,1),%ZMM15,%ZMM3 |
(115) 0x4271a4 VSUBPD %ZMM3,%ZMM14,%ZMM0 |
(115) 0x4271aa VADDPD 0x80(%R13,%RAX,1),%ZMM0,%ZMM6 |
(115) 0x4271b2 VMOVUPD %ZMM6,0x80(%R12,%RAX,1) |
(115) 0x4271ba VMOVUPD 0x80(%RSI,%RAX,1),%ZMM2 |
(115) 0x4271c2 VSUBPD 0x80(%RCX,%RAX,1),%ZMM2,%ZMM1 |
(115) 0x4271ca VADDPD %ZMM6,%ZMM1,%ZMM7 |
(115) 0x4271d0 VMOVUPD %ZMM7,0x80(%RBX,%RAX,1) |
(115) 0x4271d8 VMOVUPD 0xc0(%RCX,%RAX,1),%ZMM4 |
(115) 0x4271e0 VMOVUPD 0xc0(%R14,%RAX,1),%ZMM8 |
(115) 0x4271e8 VADDPD 0xc0(%R15,%RAX,1),%ZMM4,%ZMM5 |
(115) 0x4271f0 VADDPD 0xc0(%RSI,%RAX,1),%ZMM8,%ZMM9 |
(115) 0x4271f8 VSUBPD %ZMM9,%ZMM5,%ZMM10 |
(115) 0x4271fe VADDPD 0xc0(%R13,%RAX,1),%ZMM10,%ZMM11 |
(115) 0x427206 VMOVUPD %ZMM11,0xc0(%R12,%RAX,1) |
(115) 0x42720e VMOVUPD 0xc0(%RSI,%RAX,1),%ZMM12 |
(115) 0x427216 VSUBPD 0xc0(%RCX,%RAX,1),%ZMM12,%ZMM13 |
(115) 0x42721e VADDPD %ZMM11,%ZMM13,%ZMM14 |
(115) 0x427224 VMOVUPD %ZMM14,0xc0(%RBX,%RAX,1) |
(115) 0x42722c ADD $0x100,%RAX |
(115) 0x427232 CMP %RAX,%R11 |
(115) 0x427235 JNE 4270e4 |
(114) 0x42723b MOV 0xa0(%RSP),%R15D |
(114) 0x427243 MOV %EDX,%R13D |
(114) 0x427246 AND $-0x8,%R13D |
(114) 0x42724a ADD %R13D,%R9D |
(114) 0x42724d LEA (%R13,%R15,1),%ESI |
(114) 0x427252 TEST $0x7,%DL |
(114) 0x427255 JE 427516 |
(114) 0x42725b SUB %R13D,%EDX |
(114) 0x42725e LEA -0x1(%RDX),%ECX |
(114) 0x427261 CMP $0x2,%ECX |
(114) 0x427264 JBE 427332 |
(114) 0x42726a MOVSXD 0xa0(%RSP),%RAX |
(114) 0x427272 MOV 0x98(%RSP),%RBX |
(114) 0x42727a MOV 0x68(%RSP),%R14 |
(114) 0x42727f MOV 0x70(%RSP),%R15 |
(114) 0x427284 LEA (%RBX,%RAX,1),%R10 |
(114) 0x427288 MOV 0x88(%RSP),%RBX |
(114) 0x427290 LEA (%R14,%RAX,1),%R12 |
(114) 0x427294 ADD %R13,%R10 |
(114) 0x427297 LEA (%R15,%RAX,1),%R14 |
(114) 0x42729b MOV 0x90(%RSP),%R15 |
(114) 0x4272a3 ADD %R13,%R12 |
(114) 0x4272a6 LEA (%RDI,%R10,8),%RCX |
(114) 0x4272aa LEA (%RBX,%RAX,1),%R10 |
(114) 0x4272ae MOV 0xa8(%RSP),%RBX |
(114) 0x4272b6 LEA (%RDI,%R12,8),%R11 |
(114) 0x4272ba ADD %R13,%R10 |
(114) 0x4272bd LEA 0x1(%R13,%R14,1),%R12 |
(114) 0x4272c2 MOV 0x80(%RSP),%R14 |
(114) 0x4272ca VMOVUPD (%R8,%R12,8),%YMM3 |
(114) 0x4272d0 VMOVUPD (%R15,%R10,8),%YMM15 |
(114) 0x4272d6 VSUBPD (%RCX),%YMM3,%YMM6 |
(114) 0x4272da VADDPD (%R11),%YMM15,%YMM0 |
(114) 0x4272df VADDPD %YMM6,%YMM0,%YMM2 |
(114) 0x4272e3 VSUBPD -0x8(%R8,%R12,8),%YMM2,%YMM7 |
(114) 0x4272ea LEA (%R14,%RAX,1),%R12 |
(114) 0x4272ee ADD %R13,%R12 |
(114) 0x4272f1 VMOVUPD %YMM7,(%RBX,%R12,8) |
(114) 0x4272f7 VMOVUPD (%RCX),%YMM1 |
(114) 0x4272fb VSUBPD (%R11),%YMM1,%YMM4 |
(114) 0x427300 MOV 0xb8(%RSP),%R11 |
(114) 0x427308 ADD %R11,%RAX |
(114) 0x42730b VADDPD %YMM7,%YMM4,%YMM5 |
(114) 0x42730f ADD %R13,%RAX |
(114) 0x427312 MOV 0xb0(%RSP),%R13 |
(114) 0x42731a VMOVUPD %YMM5,(%R13,%RAX,8) |
(114) 0x427321 TEST $0x3,%DL |
(114) 0x427324 JE 427516 |
(114) 0x42732a AND $-0x4,%EDX |
(114) 0x42732d ADD %EDX,%R9D |
(114) 0x427330 ADD %EDX,%ESI |
(114) 0x427332 MOV 0x68(%RSP),%R15 |
(114) 0x427337 MOVSXD %ESI,%RDX |
(114) 0x42733a MOV 0x70(%RSP),%R14 |
(114) 0x42733f MOV 0x98(%RSP),%R13 |
(114) 0x427347 LEA (%R15,%RDX,1),%RAX |
(114) 0x42734b LEA (%RDI,%RAX,8),%R10 |
(114) 0x42734f LEA 0x1(%RSI),%EAX |
(114) 0x427352 CLTQ |
(114) 0x427354 LEA (%R13,%RDX,1),%RCX |
(114) 0x427359 LEA (%R14,%RAX,1),%R12 |
(114) 0x42735d LEA (%RDI,%RCX,8),%R11 |
(114) 0x427361 MOV 0x90(%RSP),%RCX |
(114) 0x427369 LEA (%R8,%R12,8),%RBX |
(114) 0x42736d MOV 0x88(%RSP),%R12 |
(114) 0x427375 MOV %RBX,0xa0(%RSP) |
(114) 0x42737d LEA (%R12,%RDX,1),%RBX |
(114) 0x427381 VMOVSD (%RCX,%RBX,8),%XMM8 |
(114) 0x427386 LEA (%R14,%RDX,1),%RCX |
(114) 0x42738a MOV 0xa0(%RSP),%RBX |
(114) 0x427392 VADDSD (%R10),%XMM8,%XMM9 |
(114) 0x427397 VMOVSD (%RBX),%XMM10 |
(114) 0x42739b MOV 0x80(%RSP),%RBX |
(114) 0x4273a3 VSUBSD (%R11),%XMM10,%XMM11 |
(114) 0x4273a8 ADD %RDX,%RBX |
(114) 0x4273ab VADDSD %XMM11,%XMM9,%XMM12 |
(114) 0x4273b0 VSUBSD (%R8,%RCX,8),%XMM12,%XMM13 |
(114) 0x4273b6 MOV 0xa8(%RSP),%RCX |
(114) 0x4273be VMOVSD %XMM13,(%RCX,%RBX,8) |
(114) 0x4273c3 MOV 0xb8(%RSP),%RBX |
(114) 0x4273cb VMOVSD (%R11),%XMM14 |
(114) 0x4273d0 MOV 0x7c(%RSP),%R11D |
(114) 0x4273d5 ADD %RBX,%RDX |
(114) 0x4273d8 VSUBSD (%R10),%XMM14,%XMM15 |
(114) 0x4273dd MOV 0xb0(%RSP),%R10 |
(114) 0x4273e5 VADDSD %XMM13,%XMM15,%XMM0 |
(114) 0x4273ea VMOVSD %XMM0,(%R10,%RDX,8) |
(114) 0x4273f0 LEA 0x1(%R9),%EDX |
(114) 0x4273f4 CMP %R11D,%EDX |
(114) 0x4273f7 JAE 427516 |
(114) 0x4273fd LEA (%RAX,%R13,1),%R13 |
(114) 0x427401 ADD %RAX,%R12 |
(114) 0x427404 LEA (%RAX,%R15,1),%RCX |
(114) 0x427408 ADD $0x2,%R9D |
(114) 0x42740c LEA (%RDI,%R13,8),%R11 |
(114) 0x427410 MOV 0x90(%RSP),%R13 |
(114) 0x427418 LEA (%RDI,%RCX,8),%R10 |
(114) 0x42741c LEA 0x2(%RSI),%EBX |
(114) 0x42741f VMOVSD (%R13,%R12,8),%XMM3 |
(114) 0x427426 MOV 0xa0(%RSP),%R12 |
(114) 0x42742e MOVSXD %EBX,%RDX |
(114) 0x427431 LEA (%R14,%RDX,1),%RCX |
(114) 0x427435 MOV 0xa8(%RSP),%R13 |
(114) 0x42743d VMOVSD (%R12),%XMM2 |
(114) 0x427443 VADDSD (%R10),%XMM3,%XMM6 |
(114) 0x427448 LEA (%R8,%RCX,8),%RBX |
(114) 0x42744c MOV 0x80(%RSP),%R12 |
(114) 0x427454 VADDSD (%R11),%XMM2,%XMM7 |
(114) 0x427459 MOV %R12,%RCX |
(114) 0x42745c ADD %RAX,%RCX |
(114) 0x42745f VSUBSD %XMM7,%XMM6,%XMM1 |
(114) 0x427463 VADDSD (%RBX),%XMM1,%XMM4 |
(114) 0x427467 VMOVSD %XMM4,(%R13,%RCX,8) |
(114) 0x42746e MOV 0xb8(%RSP),%RCX |
(114) 0x427476 VMOVSD (%R11),%XMM5 |
(114) 0x42747b MOV 0xb0(%RSP),%R11 |
(114) 0x427483 ADD %RCX,%RAX |
(114) 0x427486 VSUBSD (%R10),%XMM5,%XMM8 |
(114) 0x42748b VADDSD %XMM4,%XMM8,%XMM9 |
(114) 0x42748f VMOVSD %XMM9,(%R11,%RAX,8) |
(114) 0x427495 MOV 0x7c(%RSP),%EAX |
(114) 0x427499 CMP %EAX,%R9D |
(114) 0x42749c JAE 427516 |
(114) 0x42749e MOV 0x98(%RSP),%R9 |
(114) 0x4274a6 ADD %RDX,%R15 |
(114) 0x4274a9 MOV 0x90(%RSP),%RAX |
(114) 0x4274b1 ADD $0x3,%ESI |
(114) 0x4274b4 LEA (%RDI,%R15,8),%R10 |
(114) 0x4274b8 MOVSXD %ESI,%RSI |
(114) 0x4274bb ADD %RDX,%R12 |
(114) 0x4274be ADD %RDX,%R9 |
(114) 0x4274c1 ADD %R14,%RSI |
(114) 0x4274c4 LEA (%RDI,%R9,8),%RCX |
(114) 0x4274c8 MOV 0x88(%RSP),%RDI |
(114) 0x4274d0 VMOVSD (%RCX),%XMM12 |
(114) 0x4274d4 ADD %RDX,%RDI |
(114) 0x4274d7 VMOVSD (%RAX,%RDI,8),%XMM10 |
(114) 0x4274dc VADDSD (%RBX),%XMM12,%XMM13 |
(114) 0x4274e0 VADDSD (%R10),%XMM10,%XMM11 |
(114) 0x4274e5 VSUBSD %XMM13,%XMM11,%XMM14 |
(114) 0x4274ea VADDSD (%R8,%RSI,8),%XMM14,%XMM15 |
(114) 0x4274f0 MOV 0xb8(%RSP),%R8 |
(114) 0x4274f8 ADD %RDX,%R8 |
(114) 0x4274fb VMOVSD %XMM15,(%R13,%R12,8) |
(114) 0x427502 VMOVSD (%RCX),%XMM0 |
(114) 0x427506 VSUBSD (%R10),%XMM0,%XMM3 |
(114) 0x42750b VADDSD %XMM15,%XMM3,%XMM6 |
(114) 0x427510 VMOVSD %XMM6,(%R11,%R8,8) |
(114) 0x427516 MOV 0x7c(%RSP),%R9D |
(114) 0x42751b MOV 0x60(%RSP),%RCX |
(114) 0x427520 LEA (%RCX),%EDX |
(114) 0x427522 CMP %EDX,0x5c(%RSP) |
(114) 0x427526 JLE 427548 |
(114) 0x427528 MOV 0x54(%RSP),%R13D |
(114) 0x42752d MOV 0x58(%RSP),%EBX |
(114) 0x427531 MOV 0x78(%RSP),%R10D |
(114) 0x427536 MOV %EBX,0xa0(%RSP) |
(114) 0x42753d SUB %R9D,%R13D |
(114) 0x427540 JMP 426ea0 |
0x427545 NOPL (%RAX) |
0x427548 VZEROUPPER |
0x42754b LEA -0x28(%RBP),%RSP |
0x42754f POP %RBX |
0x427550 POP %R12 |
0x427552 POP %R13 |
0x427554 POP %R14 |
0x427556 POP %R15 |
0x427558 POP %RBP |
0x427559 RET |
0x42755a NOPW (%RAX,%RAX,1) |
(114) 0x427560 LEA 0x1(%RCX),%R13 |
(114) 0x427564 MOV %R13,0x60(%RSP) |
(114) 0x427569 JMP 42751b |
0x42756b NOPL (%RAX,%RAX,1) |
(114) 0x427570 MOV 0xa0(%RSP),%ESI |
(114) 0x427577 XOR %R13D,%R13D |
(114) 0x42757a JMP 42725b |
0x42757f INC %R13D |
0x427582 XOR %EDX,%EDX |
0x427584 JMP 426e26 |
0x427589 NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.09 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.90 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 83 |
nb uops | 93 |
loop length | 305 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 8.00 | 6.33 | 6.33 | 9.00 | 6.07 | 5.90 | 9.00 | 9.00 | 9.00 | 5.93 | 6.33 |
cycles | 6.10 | 11.93 | 6.33 | 6.33 | 9.00 | 6.07 | 5.90 | 9.00 | 9.00 | 9.00 | 5.93 | 6.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.75-14.83 |
Stall cycles | 0.00 |
Front-end | 15.50 |
Dispatch | 11.93 |
DIV/SQRT | 12.00 |
Overall L1 | 15.50 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 8% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%R12),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RDX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42757f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7df> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R13D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R13,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x78(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R11D,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R15D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 426e26 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x86> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:136-140 |
Module | exec |
nb instructions | 83 |
nb uops | 93 |
loop length | 305 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.50 cycles |
front end | 15.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 8.00 | 6.33 | 6.33 | 9.00 | 6.07 | 5.90 | 9.00 | 9.00 | 9.00 | 5.93 | 6.33 |
cycles | 6.10 | 11.93 | 6.33 | 6.33 | 9.00 | 6.07 | 5.90 | 9.00 | 9.00 | 9.00 | 5.93 | 6.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.75-14.83 |
Stall cycles | 0.00 |
Front-end | 15.50 |
Dispatch | 11.93 |
DIV/SQRT | 12.00 |
Overall L1 | 15.50 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 8% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xc0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x34(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%R12),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RDX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42757f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7df> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R13D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R13,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42754b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x7ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x78(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R11D,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R15D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 426e26 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.4.lto_priv.0+0x86> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 1.36 | 0.45 |
▼Loop 114 - advec_cell.cpp:136-140 - exec– | 0 | 0 |
○Loop 115 - advec_cell.cpp:139-140 - exec | 1.36 | 0.45 |