Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.22% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.22% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 71 - 75 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for simd collapse(2) |
72: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
73: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
74: post_vol(i, j) = volume(i, j); |
75: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42a230 PUSH %RBP |
0x42a231 MOV %RSP,%RBP |
0x42a234 PUSH %R15 |
0x42a236 PUSH %R14 |
0x42a238 PUSH %R13 |
0x42a23a PUSH %R12 |
0x42a23c PUSH %RBX |
0x42a23d AND $-0x40,%RSP |
0x42a241 ADD $-0x80,%RSP |
0x42a245 MOV 0x28(%RDI),%EAX |
0x42a248 MOV 0x2c(%RDI),%EDX |
0x42a24b MOV 0x20(%RDI),%EBX |
0x42a24e MOV 0x24(%RDI),%ECX |
0x42a251 ADD $0x4,%EDX |
0x42a254 LEA -0x1(%RAX),%R15D |
0x42a258 LEA -0x1(%RBX),%ESI |
0x42a25b MOV %EDX,0x48(%RSP) |
0x42a25f MOV %ESI,0x44(%RSP) |
0x42a263 CMP %EDX,%R15D |
0x42a266 JGE 42a813 |
0x42a26c MOV %EDX,%EBX |
0x42a26e LEA 0x4(%RCX),%R14D |
0x42a272 SUB %R15D,%EBX |
0x42a275 CMP %R14D,%ESI |
0x42a278 JGE 42a813 |
0x42a27e MOV %RDI,%R13 |
0x42a281 MOV %R14D,%EDI |
0x42a284 SUB %ESI,%EDI |
0x42a286 MOV %EDI,0x4c(%RSP) |
0x42a28a CALL 4046c0 <omp_get_num_threads@plt> |
0x42a28f MOV %EAX,%R12D |
0x42a292 CALL 4045b0 <omp_get_thread_num@plt> |
0x42a297 XOR %EDX,%EDX |
0x42a299 MOV %EAX,%R8D |
0x42a29c MOV 0x4c(%RSP),%EAX |
0x42a2a0 IMUL %EBX,%EAX |
0x42a2a3 DIV %R12D |
0x42a2a6 MOV %EAX,%EDI |
0x42a2a8 CMP %EDX,%R8D |
0x42a2ab JB 42a834 |
0x42a2b1 IMUL %EDI,%R8D |
0x42a2b5 LEA (%R8,%RDX,1),%R11D |
0x42a2b9 LEA (%RDI,%R11,1),%R9D |
0x42a2bd MOV %R9D,0x40(%RSP) |
0x42a2c2 CMP %R9D,%R11D |
0x42a2c5 JAE 42a813 |
0x42a2cb MOV %R11D,%EAX |
0x42a2ce XOR %EDX,%EDX |
0x42a2d0 MOV 0x44(%RSP),%R10D |
0x42a2d5 MOV 0x18(%R13),%RSI |
0x42a2d9 DIVL 0x4c(%RSP) |
0x42a2dd MOV 0x10(%R13),%RBX |
0x42a2e1 MOV %RSI,0x30(%RSP) |
0x42a2e6 MOV %RBX,0x20(%RSP) |
0x42a2eb ADD %EDX,%R10D |
0x42a2ee ADD %R15D,%EAX |
0x42a2f1 MOV %R14D,%EDX |
0x42a2f4 MOV 0x8(%R13),%R15 |
0x42a2f8 MOV (%R13),%R14 |
0x42a2fc MOV %R10D,0x74(%RSP) |
0x42a301 SUB %R10D,%EDX |
0x42a304 MOVSXD %EAX,%R12 |
0x42a307 MOV %R15,0x38(%RSP) |
0x42a30c MOV %R14,0x28(%RSP) |
0x42a311 NOPL (%RAX) |
(130) 0x42a318 CMP %EDX,%EDI |
(130) 0x42a31a CMOVBE %EDI,%EDX |
(130) 0x42a31d LEA (%R11,%RDX,1),%ECX |
(130) 0x42a321 MOV %ECX,0x70(%RSP) |
(130) 0x42a325 CMP %ECX,%R11D |
(130) 0x42a328 JAE 42a7e6 |
(130) 0x42a32e MOV 0x38(%RSP),%R13 |
(130) 0x42a333 MOV 0x30(%RSP),%RDI |
(130) 0x42a338 MOV 0x20(%RSP),%R10 |
(130) 0x42a33d MOV 0x28(%RSP),%RAX |
(130) 0x42a342 MOV (%R13),%R8 |
(130) 0x42a346 MOV (%RDI),%R9 |
(130) 0x42a349 MOV 0x10(%RDI),%R14 |
(130) 0x42a34d MOV (%R10),%RDI |
(130) 0x42a350 IMUL %R12,%R8 |
(130) 0x42a354 MOV 0x10(%R13),%R15 |
(130) 0x42a358 MOV 0x10(%R10),%RSI |
(130) 0x42a35c IMUL %R12,%R9 |
(130) 0x42a360 MOV (%RAX),%R13 |
(130) 0x42a363 MOV 0x10(%RAX),%RBX |
(130) 0x42a367 LEA -0x1(%RDX),%EAX |
(130) 0x42a36a IMUL %R12,%RDI |
(130) 0x42a36e MOV %RSI,0x78(%RSP) |
(130) 0x42a373 MOV %R8,0x58(%RSP) |
(130) 0x42a378 IMUL %R12,%R13 |
(130) 0x42a37c MOV %R9,0x60(%RSP) |
(130) 0x42a381 MOV %RDI,0x68(%RSP) |
(130) 0x42a386 CMP $0x6,%EAX |
(130) 0x42a389 JBE 42a828 |
(130) 0x42a38f MOVSXD 0x74(%RSP),%RAX |
(130) 0x42a394 MOV 0x68(%RSP),%RSI |
(130) 0x42a399 LEA (%R9,%RAX,1),%RCX |
(130) 0x42a39d LEA (%R8,%RAX,1),%R8 |
(130) 0x42a3a1 LEA (%R14,%RCX,8),%R9 |
(130) 0x42a3a5 MOV 0x78(%RSP),%RCX |
(130) 0x42a3aa LEA 0x1(%R13,%RAX,1),%RDI |
(130) 0x42a3af ADD %RSI,%RAX |
(130) 0x42a3b2 SAL $0x3,%RDI |
(130) 0x42a3b6 LEA (%R15,%R8,8),%R10 |
(130) 0x42a3ba LEA (%RCX,%RAX,8),%RSI |
(130) 0x42a3be MOV %EDX,%ECX |
(130) 0x42a3c0 LEA (%RBX,%RDI,1),%R8 |
(130) 0x42a3c4 XOR %EAX,%EAX |
(130) 0x42a3c6 SHR $0x3,%ECX |
(130) 0x42a3c9 LEA -0x8(%RBX,%RDI,1),%RDI |
(130) 0x42a3ce SAL $0x6,%RCX |
(130) 0x42a3d2 MOV %RCX,0x50(%RSP) |
(130) 0x42a3d7 SUB $0x40,%RCX |
(130) 0x42a3db SHR $0x6,%RCX |
(130) 0x42a3df INC %RCX |
(130) 0x42a3e2 AND $0x7,%ECX |
(130) 0x42a3e5 JE 42a533 |
(130) 0x42a3eb CMP $0x1,%RCX |
(130) 0x42a3ef JE 42a501 |
(130) 0x42a3f5 CMP $0x2,%RCX |
(130) 0x42a3f9 JE 42a4da |
(130) 0x42a3ff CMP $0x3,%RCX |
(130) 0x42a403 JE 42a4b3 |
(130) 0x42a409 CMP $0x4,%RCX |
(130) 0x42a40d JE 42a48c |
(130) 0x42a40f CMP $0x5,%RCX |
(130) 0x42a413 JE 42a465 |
(130) 0x42a415 CMP $0x6,%RCX |
(130) 0x42a419 JE 42a43e |
(130) 0x42a41b VMOVUPD (%R10),%ZMM0 |
(130) 0x42a421 MOV $0x40,%EAX |
(130) 0x42a426 VMOVUPD %ZMM0,(%R9) |
(130) 0x42a42c VADDPD (%R8),%ZMM0,%ZMM1 |
(130) 0x42a432 VSUBPD (%RDI),%ZMM1,%ZMM2 |
(130) 0x42a438 VMOVUPD %ZMM2,(%RSI) |
(130) 0x42a43e VMOVUPD (%R10,%RAX,1),%ZMM3 |
(130) 0x42a445 VMOVUPD %ZMM3,(%R9,%RAX,1) |
(130) 0x42a44c VADDPD (%R8,%RAX,1),%ZMM3,%ZMM4 |
(130) 0x42a453 VSUBPD (%RDI,%RAX,1),%ZMM4,%ZMM5 |
(130) 0x42a45a VMOVUPD %ZMM5,(%RSI,%RAX,1) |
(130) 0x42a461 ADD $0x40,%RAX |
(130) 0x42a465 VMOVUPD (%R10,%RAX,1),%ZMM6 |
(130) 0x42a46c VMOVUPD %ZMM6,(%R9,%RAX,1) |
(130) 0x42a473 VADDPD (%R8,%RAX,1),%ZMM6,%ZMM7 |
(130) 0x42a47a VSUBPD (%RDI,%RAX,1),%ZMM7,%ZMM8 |
(130) 0x42a481 VMOVUPD %ZMM8,(%RSI,%RAX,1) |
(130) 0x42a488 ADD $0x40,%RAX |
(130) 0x42a48c VMOVUPD (%R10,%RAX,1),%ZMM9 |
(130) 0x42a493 VMOVUPD %ZMM9,(%R9,%RAX,1) |
(130) 0x42a49a VADDPD (%R8,%RAX,1),%ZMM9,%ZMM10 |
(130) 0x42a4a1 VSUBPD (%RDI,%RAX,1),%ZMM10,%ZMM11 |
(130) 0x42a4a8 VMOVUPD %ZMM11,(%RSI,%RAX,1) |
(130) 0x42a4af ADD $0x40,%RAX |
(130) 0x42a4b3 VMOVUPD (%R10,%RAX,1),%ZMM12 |
(130) 0x42a4ba VMOVUPD %ZMM12,(%R9,%RAX,1) |
(130) 0x42a4c1 VADDPD (%R8,%RAX,1),%ZMM12,%ZMM13 |
(130) 0x42a4c8 VSUBPD (%RDI,%RAX,1),%ZMM13,%ZMM14 |
(130) 0x42a4cf VMOVUPD %ZMM14,(%RSI,%RAX,1) |
(130) 0x42a4d6 ADD $0x40,%RAX |
(130) 0x42a4da VMOVUPD (%R10,%RAX,1),%ZMM15 |
(130) 0x42a4e1 VMOVUPD %ZMM15,(%R9,%RAX,1) |
(130) 0x42a4e8 VADDPD (%R8,%RAX,1),%ZMM15,%ZMM0 |
(130) 0x42a4ef VSUBPD (%RDI,%RAX,1),%ZMM0,%ZMM1 |
(130) 0x42a4f6 VMOVUPD %ZMM1,(%RSI,%RAX,1) |
(130) 0x42a4fd ADD $0x40,%RAX |
(130) 0x42a501 VMOVUPD (%R10,%RAX,1),%ZMM2 |
(130) 0x42a508 VMOVUPD %ZMM2,(%R9,%RAX,1) |
(130) 0x42a50f VADDPD (%R8,%RAX,1),%ZMM2,%ZMM3 |
(130) 0x42a516 VSUBPD (%RDI,%RAX,1),%ZMM3,%ZMM4 |
(130) 0x42a51d VMOVUPD %ZMM4,(%RSI,%RAX,1) |
(130) 0x42a524 ADD $0x40,%RAX |
(130) 0x42a528 CMP %RAX,0x50(%RSP) |
(130) 0x42a52d JE 42a67f |
(131) 0x42a533 VMOVUPD (%R10,%RAX,1),%ZMM5 |
(131) 0x42a53a VMOVUPD %ZMM5,(%R9,%RAX,1) |
(131) 0x42a541 VADDPD (%R8,%RAX,1),%ZMM5,%ZMM6 |
(131) 0x42a548 VSUBPD (%RDI,%RAX,1),%ZMM6,%ZMM7 |
(131) 0x42a54f VMOVUPD %ZMM7,(%RSI,%RAX,1) |
(131) 0x42a556 VMOVUPD 0x40(%R10,%RAX,1),%ZMM8 |
(131) 0x42a55e VMOVUPD %ZMM8,0x40(%R9,%RAX,1) |
(131) 0x42a566 VADDPD 0x40(%R8,%RAX,1),%ZMM8,%ZMM9 |
(131) 0x42a56e VSUBPD 0x40(%RDI,%RAX,1),%ZMM9,%ZMM10 |
(131) 0x42a576 VMOVUPD %ZMM10,0x40(%RSI,%RAX,1) |
(131) 0x42a57e VMOVUPD 0x80(%R10,%RAX,1),%ZMM11 |
(131) 0x42a586 VMOVUPD %ZMM11,0x80(%R9,%RAX,1) |
(131) 0x42a58e VADDPD 0x80(%R8,%RAX,1),%ZMM11,%ZMM12 |
(131) 0x42a596 VSUBPD 0x80(%RDI,%RAX,1),%ZMM12,%ZMM13 |
(131) 0x42a59e VMOVUPD %ZMM13,0x80(%RSI,%RAX,1) |
(131) 0x42a5a6 VMOVUPD 0xc0(%R10,%RAX,1),%ZMM14 |
(131) 0x42a5ae VMOVUPD %ZMM14,0xc0(%R9,%RAX,1) |
(131) 0x42a5b6 VADDPD 0xc0(%R8,%RAX,1),%ZMM14,%ZMM15 |
(131) 0x42a5be VSUBPD 0xc0(%RDI,%RAX,1),%ZMM15,%ZMM0 |
(131) 0x42a5c6 VMOVUPD %ZMM0,0xc0(%RSI,%RAX,1) |
(131) 0x42a5ce VMOVUPD 0x100(%R10,%RAX,1),%ZMM1 |
(131) 0x42a5d6 VMOVUPD %ZMM1,0x100(%R9,%RAX,1) |
(131) 0x42a5de VADDPD 0x100(%R8,%RAX,1),%ZMM1,%ZMM2 |
(131) 0x42a5e6 VSUBPD 0x100(%RDI,%RAX,1),%ZMM2,%ZMM3 |
(131) 0x42a5ee VMOVUPD %ZMM3,0x100(%RSI,%RAX,1) |
(131) 0x42a5f6 VMOVUPD 0x140(%R10,%RAX,1),%ZMM4 |
(131) 0x42a5fe VMOVUPD %ZMM4,0x140(%R9,%RAX,1) |
(131) 0x42a606 VADDPD 0x140(%R8,%RAX,1),%ZMM4,%ZMM5 |
(131) 0x42a60e VSUBPD 0x140(%RDI,%RAX,1),%ZMM5,%ZMM6 |
(131) 0x42a616 VMOVUPD %ZMM6,0x140(%RSI,%RAX,1) |
(131) 0x42a61e VMOVUPD 0x180(%R10,%RAX,1),%ZMM7 |
(131) 0x42a626 VMOVUPD %ZMM7,0x180(%R9,%RAX,1) |
(131) 0x42a62e VADDPD 0x180(%R8,%RAX,1),%ZMM7,%ZMM8 |
(131) 0x42a636 VSUBPD 0x180(%RDI,%RAX,1),%ZMM8,%ZMM9 |
(131) 0x42a63e VMOVUPD %ZMM9,0x180(%RSI,%RAX,1) |
(131) 0x42a646 VMOVUPD 0x1c0(%R10,%RAX,1),%ZMM10 |
(131) 0x42a64e VMOVUPD %ZMM10,0x1c0(%R9,%RAX,1) |
(131) 0x42a656 VADDPD 0x1c0(%R8,%RAX,1),%ZMM10,%ZMM11 |
(131) 0x42a65e VSUBPD 0x1c0(%RDI,%RAX,1),%ZMM11,%ZMM12 |
(131) 0x42a666 VMOVUPD %ZMM12,0x1c0(%RSI,%RAX,1) |
(131) 0x42a66e ADD $0x200,%RAX |
(131) 0x42a674 CMP %RAX,0x50(%RSP) |
(131) 0x42a679 JNE 42a533 |
(130) 0x42a67f MOV 0x74(%RSP),%R10D |
(130) 0x42a684 MOV %EDX,%R9D |
(130) 0x42a687 AND $-0x8,%R9D |
(130) 0x42a68b ADD %R9D,%R11D |
(130) 0x42a68e LEA (%R9,%R10,1),%ESI |
(130) 0x42a692 TEST $0x7,%DL |
(130) 0x42a695 JE 42a7e1 |
(130) 0x42a69b SUB %R9D,%EDX |
(130) 0x42a69e LEA -0x1(%RDX),%R8D |
(130) 0x42a6a2 CMP $0x2,%R8D |
(130) 0x42a6a6 JBE 42a710 |
(130) 0x42a6a8 MOVSXD 0x74(%RSP),%RAX |
(130) 0x42a6ad MOV 0x58(%RSP),%R10 |
(130) 0x42a6b2 MOV 0x60(%RSP),%R8 |
(130) 0x42a6b7 ADD %RAX,%R10 |
(130) 0x42a6ba LEA (%R13,%RAX,1),%RDI |
(130) 0x42a6bf ADD %R9,%R10 |
(130) 0x42a6c2 ADD %RAX,%R8 |
(130) 0x42a6c5 LEA 0x1(%R9,%RDI,1),%RCX |
(130) 0x42a6ca MOV 0x68(%RSP),%RDI |
(130) 0x42a6cf VMOVUPD (%R15,%R10,8),%YMM13 |
(130) 0x42a6d5 ADD %R9,%R8 |
(130) 0x42a6d8 ADD %RDI,%RAX |
(130) 0x42a6db VMOVUPD %YMM13,(%R14,%R8,8) |
(130) 0x42a6e1 ADD %R9,%RAX |
(130) 0x42a6e4 MOV 0x78(%RSP),%R9 |
(130) 0x42a6e9 VMOVUPD (%RBX,%RCX,8),%YMM14 |
(130) 0x42a6ee VSUBPD -0x8(%RBX,%RCX,8),%YMM14,%YMM15 |
(130) 0x42a6f4 VADDPD %YMM13,%YMM15,%YMM0 |
(130) 0x42a6f9 VMOVUPD %YMM0,(%R9,%RAX,8) |
(130) 0x42a6ff TEST $0x3,%DL |
(130) 0x42a702 JE 42a7e1 |
(130) 0x42a708 AND $-0x4,%EDX |
(130) 0x42a70b ADD %EDX,%R11D |
(130) 0x42a70e ADD %EDX,%ESI |
(130) 0x42a710 MOV 0x58(%RSP),%R9 |
(130) 0x42a715 MOVSXD %ESI,%RDX |
(130) 0x42a718 MOV 0x60(%RSP),%R10 |
(130) 0x42a71d LEA (%R9,%RDX,1),%RAX |
(130) 0x42a721 LEA (%R10,%RDX,1),%RCX |
(130) 0x42a725 VMOVSD (%R15,%RAX,8),%XMM1 |
(130) 0x42a72b LEA 0x1(%RSI),%EAX |
(130) 0x42a72e CLTQ |
(130) 0x42a730 LEA (%R13,%RAX,1),%R8 |
(130) 0x42a735 VMOVSD %XMM1,(%R14,%RCX,8) |
(130) 0x42a73b LEA (%RBX,%R8,8),%RCX |
(130) 0x42a73f MOV 0x68(%RSP),%R8 |
(130) 0x42a744 VMOVSD (%RCX),%XMM2 |
(130) 0x42a748 LEA (%R8,%RDX,1),%RDI |
(130) 0x42a74c ADD %R13,%RDX |
(130) 0x42a74f VSUBSD (%RBX,%RDX,8),%XMM2,%XMM3 |
(130) 0x42a754 MOV 0x78(%RSP),%RDX |
(130) 0x42a759 VADDSD %XMM1,%XMM3,%XMM4 |
(130) 0x42a75d VMOVSD %XMM4,(%RDX,%RDI,8) |
(130) 0x42a762 MOV 0x70(%RSP),%EDI |
(130) 0x42a766 LEA 0x1(%R11),%EDX |
(130) 0x42a76a CMP %EDI,%EDX |
(130) 0x42a76c JAE 42a7e1 |
(130) 0x42a76e LEA (%RAX,%R9,1),%RDX |
(130) 0x42a772 LEA (%RAX,%R10,1),%RDI |
(130) 0x42a776 ADD %R8,%RAX |
(130) 0x42a779 ADD $0x2,%R11D |
(130) 0x42a77d VMOVSD (%R15,%RDX,8),%XMM5 |
(130) 0x42a783 LEA 0x2(%RSI),%EDX |
(130) 0x42a786 MOVSXD %EDX,%RDX |
(130) 0x42a789 VMOVSD %XMM5,(%R14,%RDI,8) |
(130) 0x42a78f LEA (%R13,%RDX,1),%RDI |
(130) 0x42a794 LEA (%RBX,%RDI,8),%RDI |
(130) 0x42a798 VADDSD (%RDI),%XMM5,%XMM6 |
(130) 0x42a79c VSUBSD (%RCX),%XMM6,%XMM7 |
(130) 0x42a7a0 MOV 0x78(%RSP),%RCX |
(130) 0x42a7a5 VMOVSD %XMM7,(%RCX,%RAX,8) |
(130) 0x42a7aa MOV 0x70(%RSP),%EAX |
(130) 0x42a7ae CMP %EAX,%R11D |
(130) 0x42a7b1 JAE 42a7e1 |
(130) 0x42a7b3 ADD %RDX,%R9 |
(130) 0x42a7b6 ADD $0x3,%ESI |
(130) 0x42a7b9 ADD %RDX,%R10 |
(130) 0x42a7bc ADD %RDX,%R8 |
(130) 0x42a7bf VMOVSD (%R15,%R9,8),%XMM8 |
(130) 0x42a7c5 MOVSXD %ESI,%R11 |
(130) 0x42a7c8 ADD %R13,%R11 |
(130) 0x42a7cb VMOVSD %XMM8,(%R14,%R10,8) |
(130) 0x42a7d1 VADDSD (%RBX,%R11,8),%XMM8,%XMM9 |
(130) 0x42a7d7 VSUBSD (%RDI),%XMM9,%XMM10 |
(130) 0x42a7db VMOVSD %XMM10,(%RCX,%R8,8) |
(130) 0x42a7e1 MOV 0x70(%RSP),%R11D |
(130) 0x42a7e6 INC %R12 |
(130) 0x42a7e9 LEA (%R12),%R15D |
(130) 0x42a7ed CMP %R15D,0x48(%RSP) |
(130) 0x42a7f2 JLE 42a810 |
(130) 0x42a7f4 MOV 0x40(%RSP),%EDI |
(130) 0x42a7f8 MOV 0x44(%RSP),%R14D |
(130) 0x42a7fd MOV 0x4c(%RSP),%EDX |
(130) 0x42a801 MOV %R14D,0x74(%RSP) |
(130) 0x42a806 SUB %R11D,%EDI |
(130) 0x42a809 JMP 42a318 |
0x42a80e XCHG %AX,%AX |
0x42a810 VZEROUPPER |
0x42a813 LEA -0x28(%RBP),%RSP |
0x42a817 POP %RBX |
0x42a818 POP %R12 |
0x42a81a POP %R13 |
0x42a81c POP %R14 |
0x42a81e POP %R15 |
0x42a820 POP %RBP |
0x42a821 RET |
0x42a822 NOPW (%RAX,%RAX,1) |
(130) 0x42a828 MOV 0x74(%RSP),%ESI |
(130) 0x42a82c XOR %R9D,%R9D |
(130) 0x42a82f JMP 42a69b |
0x42a834 INC %EDI |
0x42a836 XOR %EDX,%EDX |
0x42a838 JMP 42a2b1 |
0x42a83d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.17 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.81 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a834 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42a2b1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a834 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a813 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42a2b1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.22 | 0.74 |
▼Loop 130 - advec_mom.cpp:74-75 - exec– | 0.01 | 0 |
○Loop 131 - advec_mom.cpp:74-75 - exec | 2.22 | 0.73 |