Function: clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double>& ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.03% |
---|
Function: clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double>& ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 120 - 124 |
-------------------------------------------------------------------------------- |
120: #pragma omp parallel for simd |
121: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
122: for (int j = 0; j < depth; ++j) { |
123: int index = buffer_offset + j + k * depth; |
124: right_snd[index] = field(x_max + 1 - j, k); |
0x43c2d0 PUSH %RBP |
0x43c2d1 MOV %RSP,%RBP |
0x43c2d4 PUSH %R15 |
0x43c2d6 PUSH %R14 |
0x43c2d8 PUSH %R13 |
0x43c2da PUSH %R12 |
0x43c2dc MOV %RDI,%R12 |
0x43c2df PUSH %RBX |
0x43c2e0 AND $-0x40,%RSP |
0x43c2e4 ADD $-0x80,%RSP |
0x43c2e8 MOV 0x1c(%RDI),%EBX |
0x43c2eb MOV 0x14(%RDI),%R14D |
0x43c2ef CALL 4046c0 <omp_get_num_threads@plt> |
0x43c2f4 MOV %EAX,%R13D |
0x43c2f7 SUB %EBX,%R14D |
0x43c2fa CALL 4045b0 <omp_get_thread_num@plt> |
0x43c2ff INC %R14D |
0x43c302 MOV %EAX,%ECX |
0x43c304 MOV 0x18(%R12),%EAX |
0x43c309 ADD 0x24(%R12),%EAX |
0x43c30e LEA 0x2(%RBX,%RAX,1),%EAX |
0x43c312 SUB %R14D,%EAX |
0x43c315 CLTD |
0x43c316 IDIV %R13D |
0x43c319 CMP %EDX,%ECX |
0x43c31b JL 43c878 |
0x43c321 IMUL %EAX,%ECX |
0x43c324 ADD %ECX,%EDX |
0x43c326 ADD %EDX,%EAX |
0x43c328 CMP %EAX,%EDX |
0x43c32a JGE 43c5c8 |
0x43c330 MOVSXD 0x20(%R12),%RSI |
0x43c335 MOV 0x10(%R12),%EDI |
0x43c33a ADD %R14D,%EAX |
0x43c33d ADD %R14D,%EDX |
0x43c340 MOV (%R12),%R9 |
0x43c344 MOV 0x8(%R12),%R11 |
0x43c349 MOV %EAX,0x70(%RSP) |
0x43c34d MOV %ESI,0x28(%RSP) |
0x43c351 MOV %EDI,0x20(%RSP) |
0x43c355 TEST %EBX,%EBX |
0x43c357 JLE 43c5c8 |
0x43c35d MOV %EBX,%R12D |
0x43c360 MOV %EBX,%EAX |
0x43c362 MOV (%R9),%R14 |
0x43c365 LEA -0x1(%RBX),%R13D |
0x43c369 IMUL %EDX,%R12D |
0x43c36d SHR $0x3,%EAX |
0x43c370 MOV %R13D,0x74(%RSP) |
0x43c375 MOV 0x8(%R11),%R11 |
0x43c379 SAL $0x6,%RAX |
0x43c37d MOVSXD %EBX,%R15 |
0x43c380 MOV %EBX,%R13D |
0x43c383 MOV %R14,0x68(%RSP) |
0x43c388 LEA (,%R15,8),%RCX |
0x43c390 AND $0x7,%R13D |
0x43c394 MOV %RAX,0x30(%RSP) |
0x43c399 MOV $0x8,%R14D |
0x43c39f MOVSXD %R12D,%R8 |
0x43c3a2 LEA 0x1(%RDI),%EAX |
0x43c3a5 MOV %R13D,0x48(%RSP) |
0x43c3aa MOV 0x10(%R9),%R10 |
0x43c3ae ADD %RSI,%R8 |
0x43c3b1 MOV %EBX,%ESI |
0x43c3b3 MOV %EAX,0x24(%RSP) |
0x43c3b7 LEA (%R11,%RCX,1),%R9 |
0x43c3bb AND $-0x8,%ESI |
0x43c3be CLTQ |
0x43c3c0 MOVSXD %EDI,%RDI |
0x43c3c3 SUB %RCX,%R14 |
0x43c3c6 DEC %R13D |
0x43c3c9 MOV %ESI,0x4c(%RSP) |
0x43c3cd VMOVDQA64 0x27169(%RIP),%ZMM0 |
0x43c3d7 MOVSXD %EDX,%RDX |
0x43c3da MOV %RAX,0x40(%RSP) |
0x43c3df MOV %R9,0x38(%RSP) |
0x43c3e4 MOV %RDI,0x60(%RSP) |
0x43c3e9 MOV %R14,0x58(%RSP) |
0x43c3ee MOV %R13D,0x2c(%RSP) |
0x43c3f3 MOV %RCX,0x78(%RSP) |
0x43c3f8 NOPL (%RAX,%RAX,1) |
(219) 0x43c400 MOV 0x68(%RSP),%R9 |
(219) 0x43c405 LEA (,%R8,8),%RAX |
(219) 0x43c40d IMUL %RDX,%R9 |
(219) 0x43c411 CMPL $0x2,0x74(%RSP) |
(219) 0x43c416 JBE 43c460 |
(219) 0x43c418 MOV 0x60(%RSP),%RCX |
(219) 0x43c41d MOV 0x58(%RSP),%RSI |
(219) 0x43c422 LEA (,%R8,8),%RAX |
(219) 0x43c42a MOV 0x78(%RSP),%R14 |
(219) 0x43c42f LEA (%R11,%RAX,1),%R13 |
(219) 0x43c433 LEA 0x2(%R9,%RCX,1),%RCX |
(219) 0x43c438 SAL $0x3,%RCX |
(219) 0x43c43c ADD %RAX,%R14 |
(219) 0x43c43f LEA -0x8(%RCX,%RSI,1),%RDI |
(219) 0x43c444 ADD %R11,%R14 |
(219) 0x43c447 ADD %R10,%RDI |
(219) 0x43c44a CMP %R14,%RDI |
(219) 0x43c44d JAE 43c5e0 |
(219) 0x43c453 LEA (%R10,%RCX,1),%RSI |
(219) 0x43c457 CMP %RSI,%R13 |
(219) 0x43c45a JAE 43c5e0 |
(219) 0x43c460 MOV 0x38(%RSP),%R13 |
(219) 0x43c465 LEA (%R11,%RAX,1),%RCX |
(219) 0x43c469 MOV 0x40(%RSP),%RSI |
(219) 0x43c46e ADD %R13,%RAX |
(219) 0x43c471 ADD %RSI,%R9 |
(219) 0x43c474 MOV %RAX,%R14 |
(219) 0x43c477 LEA (%R10,%R9,8),%R9 |
(219) 0x43c47b SUB %RCX,%R14 |
(219) 0x43c47e SUB $0x8,%R14 |
(219) 0x43c482 SHR $0x3,%R14 |
(219) 0x43c486 INC %R14 |
(219) 0x43c489 AND $0x7,%R14D |
(219) 0x43c48d JE 43c53e |
(219) 0x43c493 CMP $0x1,%R14 |
(219) 0x43c497 JE 43c527 |
(219) 0x43c49d CMP $0x2,%R14 |
(219) 0x43c4a1 JE 43c515 |
(219) 0x43c4a3 CMP $0x3,%R14 |
(219) 0x43c4a7 JE 43c503 |
(219) 0x43c4a9 CMP $0x4,%R14 |
(219) 0x43c4ad JE 43c4f1 |
(219) 0x43c4af CMP $0x5,%R14 |
(219) 0x43c4b3 JE 43c4df |
(219) 0x43c4b5 CMP $0x6,%R14 |
(219) 0x43c4b9 JE 43c4cd |
(219) 0x43c4bb VMOVSD (%R9),%XMM5 |
(219) 0x43c4c0 ADD $0x8,%RCX |
(219) 0x43c4c4 SUB $0x8,%R9 |
(219) 0x43c4c8 VMOVSD %XMM5,-0x8(%RCX) |
(219) 0x43c4cd VMOVSD (%R9),%XMM6 |
(219) 0x43c4d2 ADD $0x8,%RCX |
(219) 0x43c4d6 SUB $0x8,%R9 |
(219) 0x43c4da VMOVSD %XMM6,-0x8(%RCX) |
(219) 0x43c4df VMOVSD (%R9),%XMM7 |
(219) 0x43c4e4 ADD $0x8,%RCX |
(219) 0x43c4e8 SUB $0x8,%R9 |
(219) 0x43c4ec VMOVSD %XMM7,-0x8(%RCX) |
(219) 0x43c4f1 VMOVSD (%R9),%XMM8 |
(219) 0x43c4f6 ADD $0x8,%RCX |
(219) 0x43c4fa SUB $0x8,%R9 |
(219) 0x43c4fe VMOVSD %XMM8,-0x8(%RCX) |
(219) 0x43c503 VMOVSD (%R9),%XMM9 |
(219) 0x43c508 ADD $0x8,%RCX |
(219) 0x43c50c SUB $0x8,%R9 |
(219) 0x43c510 VMOVSD %XMM9,-0x8(%RCX) |
(219) 0x43c515 VMOVSD (%R9),%XMM10 |
(219) 0x43c51a ADD $0x8,%RCX |
(219) 0x43c51e SUB $0x8,%R9 |
(219) 0x43c522 VMOVSD %XMM10,-0x8(%RCX) |
(219) 0x43c527 VMOVSD (%R9),%XMM11 |
(219) 0x43c52c ADD $0x8,%RCX |
(219) 0x43c530 SUB $0x8,%R9 |
(219) 0x43c534 VMOVSD %XMM11,-0x8(%RCX) |
(219) 0x43c539 CMP %RCX,%RAX |
(219) 0x43c53c JE 43c5b0 |
(219) 0x43c53e MOV 0x78(%RSP),%RDI |
(218) 0x43c543 VMOVSD (%R9),%XMM12 |
(218) 0x43c548 ADD $0x40,%RCX |
(218) 0x43c54c SUB $0x40,%R9 |
(218) 0x43c550 VMOVSD %XMM12,-0x40(%RCX) |
(218) 0x43c555 VMOVSD 0x38(%R9),%XMM13 |
(218) 0x43c55b VMOVSD %XMM13,-0x38(%RCX) |
(218) 0x43c560 VMOVSD 0x30(%R9),%XMM14 |
(218) 0x43c566 VMOVSD %XMM14,-0x30(%RCX) |
(218) 0x43c56b VMOVSD 0x28(%R9),%XMM15 |
(218) 0x43c571 VMOVSD %XMM15,-0x28(%RCX) |
(218) 0x43c576 VMOVSD 0x20(%R9),%XMM1 |
(218) 0x43c57c VMOVSD %XMM1,-0x20(%RCX) |
(218) 0x43c581 VMOVSD 0x18(%R9),%XMM2 |
(218) 0x43c587 VMOVSD %XMM2,-0x18(%RCX) |
(218) 0x43c58c VMOVSD 0x10(%R9),%XMM3 |
(218) 0x43c592 VMOVSD %XMM3,-0x10(%RCX) |
(218) 0x43c597 VMOVSD 0x8(%R9),%XMM4 |
(218) 0x43c59d VMOVSD %XMM4,-0x8(%RCX) |
(218) 0x43c5a2 CMP %RCX,%RAX |
(218) 0x43c5a5 JNE 43c543 |
(219) 0x43c5a7 MOV %RDI,0x78(%RSP) |
(219) 0x43c5ac NOPL (%RAX) |
(219) 0x43c5b0 INC %RDX |
(219) 0x43c5b3 ADD %EBX,%R12D |
(219) 0x43c5b6 ADD %R15,%R8 |
(219) 0x43c5b9 LEA (%RDX),%EAX |
(219) 0x43c5bb CMP %EAX,0x70(%RSP) |
(219) 0x43c5bf JG 43c400 |
0x43c5c5 VZEROUPPER |
0x43c5c8 LEA -0x28(%RBP),%RSP |
0x43c5cc POP %RBX |
0x43c5cd POP %R12 |
0x43c5cf POP %R13 |
0x43c5d1 POP %R14 |
0x43c5d3 POP %R15 |
0x43c5d5 POP %RBP |
0x43c5d6 RET |
0x43c5d7 NOPW (%RAX,%RAX,1) |
(219) 0x43c5e0 CMPL $0x6,0x74(%RSP) |
(219) 0x43c5e5 JBE 43c89f |
(219) 0x43c5eb MOV 0x30(%RSP),%R14 |
(219) 0x43c5f0 LEA -0x40(%R10,%RCX,1),%RAX |
(219) 0x43c5f5 MOV %R13,%RSI |
(219) 0x43c5f8 MOV %RAX,%RDI |
(219) 0x43c5fb SUB %R14,%RDI |
(219) 0x43c5fe MOV %RDI,0x50(%RSP) |
(219) 0x43c603 LEA -0x40(%R14),%RDI |
(219) 0x43c607 SHR $0x6,%RDI |
(219) 0x43c60b INC %RDI |
(219) 0x43c60e AND $0x7,%EDI |
(219) 0x43c611 JE 43c6e8 |
(219) 0x43c617 CMP $0x1,%RDI |
(219) 0x43c61b JE 43c6c4 |
(219) 0x43c621 CMP $0x2,%RDI |
(219) 0x43c625 JE 43c6ab |
(219) 0x43c62b CMP $0x3,%RDI |
(219) 0x43c62f JE 43c692 |
(219) 0x43c631 CMP $0x4,%RDI |
(219) 0x43c635 JE 43c679 |
(219) 0x43c637 CMP $0x5,%RDI |
(219) 0x43c63b JE 43c660 |
(219) 0x43c63d CMP $0x6,%RDI |
(219) 0x43c641 JNE 43c881 |
(219) 0x43c647 VXORPS %XMM2,%XMM2,%XMM2 |
(219) 0x43c64b VPERMPD (%RAX),%ZMM0,%ZMM2 |
(219) 0x43c651 ADD $0x40,%RSI |
(219) 0x43c655 SUB $0x40,%RAX |
(219) 0x43c659 VMOVUPD %ZMM2,-0x40(%RSI) |
(219) 0x43c660 VXORPS %XMM3,%XMM3,%XMM3 |
(219) 0x43c664 VPERMPD (%RAX),%ZMM0,%ZMM3 |
(219) 0x43c66a ADD $0x40,%RSI |
(219) 0x43c66e SUB $0x40,%RAX |
(219) 0x43c672 VMOVUPD %ZMM3,-0x40(%RSI) |
(219) 0x43c679 VXORPS %XMM4,%XMM4,%XMM4 |
(219) 0x43c67d VPERMPD (%RAX),%ZMM0,%ZMM4 |
(219) 0x43c683 ADD $0x40,%RSI |
(219) 0x43c687 SUB $0x40,%RAX |
(219) 0x43c68b VMOVUPD %ZMM4,-0x40(%RSI) |
(219) 0x43c692 VXORPS %XMM5,%XMM5,%XMM5 |
(219) 0x43c696 VPERMPD (%RAX),%ZMM0,%ZMM5 |
(219) 0x43c69c ADD $0x40,%RSI |
(219) 0x43c6a0 SUB $0x40,%RAX |
(219) 0x43c6a4 VMOVUPD %ZMM5,-0x40(%RSI) |
(219) 0x43c6ab VXORPS %XMM6,%XMM6,%XMM6 |
(219) 0x43c6af VPERMPD (%RAX),%ZMM0,%ZMM6 |
(219) 0x43c6b5 ADD $0x40,%RSI |
(219) 0x43c6b9 SUB $0x40,%RAX |
(219) 0x43c6bd VMOVUPD %ZMM6,-0x40(%RSI) |
(219) 0x43c6c4 VXORPS %XMM7,%XMM7,%XMM7 |
(219) 0x43c6c8 VPERMPD (%RAX),%ZMM0,%ZMM7 |
(219) 0x43c6ce SUB $0x40,%RAX |
(219) 0x43c6d2 ADD $0x40,%RSI |
(219) 0x43c6d6 VMOVUPD %ZMM7,-0x40(%RSI) |
(219) 0x43c6dd CMP %RAX,0x50(%RSP) |
(219) 0x43c6e2 JE 43c7a1 |
(219) 0x43c6e8 MOV 0x78(%RSP),%R13 |
(220) 0x43c6ed VXORPS %XMM8,%XMM8,%XMM8 |
(220) 0x43c6f2 VPERMPD (%RAX),%ZMM0,%ZMM8 |
(220) 0x43c6f8 SUB $0x200,%RAX |
(220) 0x43c6fe ADD $0x200,%RSI |
(220) 0x43c705 VMOVUPD %ZMM8,-0x200(%RSI) |
(220) 0x43c70c VXORPS %XMM9,%XMM9,%XMM9 |
(220) 0x43c711 VPERMPD 0x1c0(%RAX),%ZMM0,%ZMM9 |
(220) 0x43c718 VMOVUPD %ZMM9,-0x1c0(%RSI) |
(220) 0x43c71f VXORPS %XMM10,%XMM10,%XMM10 |
(220) 0x43c724 VPERMPD 0x180(%RAX),%ZMM0,%ZMM10 |
(220) 0x43c72b VMOVUPD %ZMM10,-0x180(%RSI) |
(220) 0x43c732 VXORPS %XMM11,%XMM11,%XMM11 |
(220) 0x43c737 VPERMPD 0x140(%RAX),%ZMM0,%ZMM11 |
(220) 0x43c73e VMOVUPD %ZMM11,-0x140(%RSI) |
(220) 0x43c745 VXORPS %XMM12,%XMM12,%XMM12 |
(220) 0x43c74a VPERMPD 0x100(%RAX),%ZMM0,%ZMM12 |
(220) 0x43c751 VMOVUPD %ZMM12,-0x100(%RSI) |
(220) 0x43c758 VXORPS %XMM13,%XMM13,%XMM13 |
(220) 0x43c75d VPERMPD 0xc0(%RAX),%ZMM0,%ZMM13 |
(220) 0x43c764 VMOVUPD %ZMM13,-0xc0(%RSI) |
(220) 0x43c76b VXORPS %XMM14,%XMM14,%XMM14 |
(220) 0x43c770 VPERMPD 0x80(%RAX),%ZMM0,%ZMM14 |
(220) 0x43c777 VMOVUPD %ZMM14,-0x80(%RSI) |
(220) 0x43c77e VXORPS %XMM15,%XMM15,%XMM15 |
(220) 0x43c783 VPERMPD 0x40(%RAX),%ZMM0,%ZMM15 |
(220) 0x43c78a VMOVUPD %ZMM15,-0x40(%RSI) |
(220) 0x43c791 CMP %RAX,0x50(%RSP) |
(220) 0x43c796 JNE 43c6ed |
(219) 0x43c79c MOV %R13,0x78(%RSP) |
(219) 0x43c7a1 CMP %EBX,0x4c(%RSP) |
(219) 0x43c7a5 JE 43c5b0 |
(219) 0x43c7ab CMPL $0x2,0x2c(%RSP) |
(219) 0x43c7b0 MOV 0x48(%RSP),%R14D |
(219) 0x43c7b5 JBE 43c8ac |
(219) 0x43c7bb MOV 0x4c(%RSP),%R13D |
(219) 0x43c7c0 MOV %R13D,%EAX |
(219) 0x43c7c3 MOV %RAX,%RSI |
(219) 0x43c7c6 ADD %R10,%RCX |
(219) 0x43c7c9 ADD %R8,%RAX |
(219) 0x43c7cc NEG %RSI |
(219) 0x43c7cf VXORPS %XMM1,%XMM1,%XMM1 |
(219) 0x43c7d3 VPERMPD $0x1b,-0x20(%RCX,%RSI,8),%YMM1 |
(219) 0x43c7db VMOVUPD %YMM1,(%R11,%RAX,8) |
(219) 0x43c7e1 MOV %R14D,%EAX |
(219) 0x43c7e4 AND $-0x4,%EAX |
(219) 0x43c7e7 ADD %R13D,%EAX |
(219) 0x43c7ea AND $0x3,%R14D |
(219) 0x43c7ee JE 43c5b0 |
(219) 0x43c7f4 MOV 0x24(%RSP),%EDI |
(219) 0x43c7f8 MOV 0x28(%RSP),%R13D |
(219) 0x43c7fd MOV %EDI,%ECX |
(219) 0x43c7ff LEA (%R13,%RAX,1),%ESI |
(219) 0x43c804 SUB %EAX,%ECX |
(219) 0x43c806 ADD %R12D,%ESI |
(219) 0x43c809 MOVSXD %ECX,%R14 |
(219) 0x43c80c MOVSXD %ESI,%RCX |
(219) 0x43c80f ADD %R9,%R14 |
(219) 0x43c812 VMOVSD (%R10,%R14,8),%XMM2 |
(219) 0x43c818 LEA 0x1(%RAX),%R14D |
(219) 0x43c81c VMOVSD %XMM2,(%R11,%RCX,8) |
(219) 0x43c822 CMP %R14D,%EBX |
(219) 0x43c825 JLE 43c5b0 |
(219) 0x43c82b MOV 0x20(%RSP),%ESI |
(219) 0x43c82f ADD %R13D,%R14D |
(219) 0x43c832 ADD %R12D,%R14D |
(219) 0x43c835 SUB %EAX,%ESI |
(219) 0x43c837 MOVSXD %R14D,%R14 |
(219) 0x43c83a ADD $0x2,%EAX |
(219) 0x43c83d MOVSXD %ESI,%RCX |
(219) 0x43c840 ADD %R9,%RCX |
(219) 0x43c843 VMOVSD (%R10,%RCX,8),%XMM3 |
(219) 0x43c849 VMOVSD %XMM3,(%R11,%R14,8) |
(219) 0x43c84f CMP %EAX,%EBX |
(219) 0x43c851 JLE 43c5b0 |
(219) 0x43c857 SUB %EAX,%EDI |
(219) 0x43c859 ADD %R13D,%EAX |
(219) 0x43c85c MOVSXD %EDI,%RDI |
(219) 0x43c85f ADD %R12D,%EAX |
(219) 0x43c862 ADD %R9,%RDI |
(219) 0x43c865 CLTQ |
(219) 0x43c867 VMOVSD (%R10,%RDI,8),%XMM4 |
(219) 0x43c86d VMOVSD %XMM4,(%R11,%RAX,8) |
(219) 0x43c873 JMP 43c5b0 |
0x43c878 INC %EAX |
0x43c87a XOR %EDX,%EDX |
0x43c87c JMP 43c321 |
(219) 0x43c881 VXORPS %XMM1,%XMM1,%XMM1 |
(219) 0x43c885 VPERMPD (%RAX),%ZMM0,%ZMM1 |
(219) 0x43c88b ADD $0x40,%RSI |
(219) 0x43c88f SUB $0x40,%RAX |
(219) 0x43c893 VMOVUPD %ZMM1,(%R13) |
(219) 0x43c89a JMP 43c647 |
(219) 0x43c89f MOV %EBX,%R14D |
(219) 0x43c8a2 XOR %EAX,%EAX |
(219) 0x43c8a4 XOR %R13D,%R13D |
(219) 0x43c8a7 JMP 43c7c3 |
(219) 0x43c8ac MOV 0x4c(%RSP),%EAX |
(219) 0x43c8b0 JMP 43c7f4 |
0x43c8b5 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○96.30 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○3.24 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 95 |
nb uops | 101 |
loop length | 351 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 16 |
micro-operation queue | 16.83 cycles |
front end | 16.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.90 | 7.80 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
cycles | 7.90 | 12.03 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 16.35 |
Stall cycles | 0.00 |
Front-end | 16.83 |
Dispatch | 12.03 |
DIV/SQRT | 6.00 |
Overall L1 | 16.83 |
all | 4% |
load | 20% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 11% |
load | 28% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 8% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c878 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c5c8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c5c8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R11),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RCX,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOVSXD %EDI,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x27169(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c321 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 95 |
nb uops | 101 |
loop length | 351 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 16 |
micro-operation queue | 16.83 cycles |
front end | 16.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.90 | 7.80 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
cycles | 7.90 | 12.03 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 16.35 |
Stall cycles | 0.00 |
Front-end | 16.83 |
Dispatch | 12.03 |
DIV/SQRT | 6.00 |
Overall L1 | 16.83 |
all | 4% |
load | 20% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 11% |
load | 28% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 8% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c878 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c5c8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c5c8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R11),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RCX,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOVSXD %EDI,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x27169(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c321 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D | 0.03 | 0.01 |
▼Loop 219 - pack_kernel.cpp:120-124 - exec– | 0.03 | 0.02 |
○Loop 220 - pack_kernel.cpp:122-124 - exec | 0 | 0 |
○Loop 218 - pack_kernel.cpp:122-124 - exec | 0 | 0 |