Function: clover_unpack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double ... | Module: exec | Source: pack_kernel.cpp:156-160 [...] | Coverage: 0.01% |
---|
Function: clover_unpack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double ... | Module: exec | Source: pack_kernel.cpp:156-160 [...] | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 156 - 160 |
-------------------------------------------------------------------------------- |
156: #pragma omp parallel for simd |
157: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
158: for (int j = 0; j < depth; ++j) { |
159: int index = buffer_offset + j + k * depth; |
160: field(x_max + x_inc + j + 2, k) = right_rcv[index]; |
0x4389a0 PUSH %RBP |
0x4389a1 MOV %RSP,%RBP |
0x4389a4 PUSH %R15 |
0x4389a6 PUSH %R14 |
0x4389a8 PUSH %R13 |
0x4389aa PUSH %R12 |
0x4389ac MOV %RDI,%R12 |
0x4389af PUSH %RBX |
0x4389b0 AND $-0x40,%RSP |
0x4389b4 ADD $-0x80,%RSP |
0x4389b8 MOV 0x1c(%RDI),%EBX |
0x4389bb MOV 0x14(%RDI),%R14D |
0x4389bf CALL 4046c0 <omp_get_num_threads@plt> |
0x4389c4 MOV %EAX,%R13D |
0x4389c7 SUB %EBX,%R14D |
0x4389ca CALL 4045b0 <omp_get_thread_num@plt> |
0x4389cf INC %R14D |
0x4389d2 MOV %EAX,%ECX |
0x4389d4 MOV 0x18(%R12),%EAX |
0x4389d9 ADD 0x28(%R12),%EAX |
0x4389de LEA 0x2(%RBX,%RAX,1),%EAX |
0x4389e2 SUB %R14D,%EAX |
0x4389e5 CLTD |
0x4389e6 IDIV %R13D |
0x4389e9 CMP %EDX,%ECX |
0x4389eb JL 438e73 |
0x4389f1 IMUL %EAX,%ECX |
0x4389f4 ADD %ECX,%EDX |
0x4389f6 ADD %EDX,%EAX |
0x4389f8 CMP %EAX,%EDX |
0x4389fa JGE 438c3a |
0x438a00 MOVSXD 0x20(%R12),%R10 |
0x438a05 ADD %R14D,%EAX |
0x438a08 ADD %R14D,%EDX |
0x438a0b MOVSXD 0x24(%R12),%R15 |
0x438a10 MOVSXD 0x10(%R12),%RSI |
0x438a15 MOV 0x8(%R12),%RDI |
0x438a1a MOV %EAX,0x7c(%RSP) |
0x438a1e MOV %R10D,0x40(%RSP) |
0x438a23 MOV (%R12),%R14 |
0x438a27 TEST %EBX,%EBX |
0x438a29 JLE 438c3a |
0x438a2f LEA -0x1(%RBX),%EAX |
0x438a32 MOV %EBX,%R12D |
0x438a35 MOVSXD %EBX,%R11 |
0x438a38 MOV 0x8(%RDI),%R9 |
0x438a3c IMUL %EDX,%R12D |
0x438a40 MOV %EAX,0x78(%RSP) |
0x438a44 MOV %EBX,%EDI |
0x438a46 LEA (%R15,%RSI,1),%EAX |
0x438a4a LEA (%RSI,%R15,1),%R15 |
0x438a4e MOV %EBX,%ESI |
0x438a50 MOV (%R14),%RCX |
0x438a53 MOV %R11,0x68(%RSP) |
0x438a58 AND $0x7,%ESI |
0x438a5b LEA (,%R11,8),%R13 |
0x438a63 SHR $0x3,%EDI |
0x438a66 MOV 0x10(%R14),%R11 |
0x438a6a MOV %EBX,%R14D |
0x438a6d MOV %EAX,0x3c(%RSP) |
0x438a71 SAL $0x6,%RDI |
0x438a75 CLTQ |
0x438a77 MOV %ESI,0x50(%RSP) |
0x438a7b AND $-0x8,%R14D |
0x438a7f DEC %ESI |
0x438a81 MOVSXD %R12D,%R8 |
0x438a84 MOV %RCX,0x70(%RSP) |
0x438a89 ADD %R8,%R10 |
0x438a8c MOVSXD %EDX,%RDX |
0x438a8f MOV %RDI,0x60(%RSP) |
0x438a94 LEA (,%R10,8),%R8 |
0x438a9c MOV %R14D,0x54(%RSP) |
0x438aa1 MOV %RAX,0x48(%RSP) |
0x438aa6 MOV %R15,0x58(%RSP) |
0x438aab MOV %ESI,0x44(%RSP) |
0x438aaf NOP |
(200) 0x438ab0 MOV 0x70(%RSP),%RDI |
(200) 0x438ab5 MOV 0x78(%RSP),%R15D |
(200) 0x438aba IMUL %RDX,%RDI |
(200) 0x438abe CMP $0x2,%R15D |
(200) 0x438ac2 JBE 438ae7 |
(200) 0x438ac4 MOV 0x58(%RSP),%RCX |
(200) 0x438ac9 LEA 0x8(%R9,%R8,1),%RSI |
(200) 0x438ace LEA 0x2(%RDI,%RCX,1),%R14 |
(200) 0x438ad3 LEA (%R11,%R14,8),%RCX |
(200) 0x438ad7 MOV %RCX,%RAX |
(200) 0x438ada SUB %RSI,%RAX |
(200) 0x438add CMP $0x30,%RAX |
(200) 0x438ae1 JA 438c50 |
(200) 0x438ae7 MOV 0x48(%RSP),%R15 |
(200) 0x438aec XOR %ESI,%ESI |
(200) 0x438aee LEA (%R9,%R8,1),%R14 |
(200) 0x438af2 ADD %R15,%RDI |
(200) 0x438af5 LEA (%R11,%RDI,8),%RCX |
(200) 0x438af9 LEA -0x8(%R13),%RDI |
(200) 0x438afd SHR $0x3,%RDI |
(200) 0x438b01 INC %RDI |
(200) 0x438b04 AND $0x7,%EDI |
(200) 0x438b07 JE 438ba5 |
(200) 0x438b0d CMP $0x1,%RDI |
(200) 0x438b11 JE 438b90 |
(200) 0x438b13 CMP $0x2,%RDI |
(200) 0x438b17 JE 438b80 |
(200) 0x438b19 CMP $0x3,%RDI |
(200) 0x438b1d JE 438b70 |
(200) 0x438b1f CMP $0x4,%RDI |
(200) 0x438b23 JE 438b60 |
(200) 0x438b25 CMP $0x5,%RDI |
(200) 0x438b29 JE 438b50 |
(200) 0x438b2b CMP $0x6,%RDI |
(200) 0x438b2f JE 438b40 |
(200) 0x438b31 VMOVSD (%R14),%XMM7 |
(200) 0x438b36 MOV $0x8,%ESI |
(200) 0x438b3b VMOVSD %XMM7,0x10(%RCX) |
(200) 0x438b40 VMOVSD (%R14,%RSI,1),%XMM6 |
(200) 0x438b46 VMOVSD %XMM6,0x10(%RCX,%RSI,1) |
(200) 0x438b4c ADD $0x8,%RSI |
(200) 0x438b50 VMOVSD (%R14,%RSI,1),%XMM3 |
(200) 0x438b56 VMOVSD %XMM3,0x10(%RCX,%RSI,1) |
(200) 0x438b5c ADD $0x8,%RSI |
(200) 0x438b60 VMOVSD (%R14,%RSI,1),%XMM4 |
(200) 0x438b66 VMOVSD %XMM4,0x10(%RCX,%RSI,1) |
(200) 0x438b6c ADD $0x8,%RSI |
(200) 0x438b70 VMOVSD (%R14,%RSI,1),%XMM5 |
(200) 0x438b76 VMOVSD %XMM5,0x10(%RCX,%RSI,1) |
(200) 0x438b7c ADD $0x8,%RSI |
(200) 0x438b80 VMOVSD (%R14,%RSI,1),%XMM8 |
(200) 0x438b86 VMOVSD %XMM8,0x10(%RCX,%RSI,1) |
(200) 0x438b8c ADD $0x8,%RSI |
(200) 0x438b90 VMOVSD (%R14,%RSI,1),%XMM9 |
(200) 0x438b96 VMOVSD %XMM9,0x10(%RCX,%RSI,1) |
(200) 0x438b9c ADD $0x8,%RSI |
(200) 0x438ba0 CMP %R13,%RSI |
(200) 0x438ba3 JE 438c18 |
(199) 0x438ba5 VMOVSD (%R14,%RSI,1),%XMM10 |
(199) 0x438bab VMOVSD %XMM10,0x10(%RCX,%RSI,1) |
(199) 0x438bb1 VMOVSD 0x8(%RSI,%R14,1),%XMM11 |
(199) 0x438bb8 VMOVSD %XMM11,0x18(%RCX,%RSI,1) |
(199) 0x438bbe VMOVSD 0x10(%RSI,%R14,1),%XMM12 |
(199) 0x438bc5 VMOVSD %XMM12,0x20(%RCX,%RSI,1) |
(199) 0x438bcb VMOVSD 0x18(%RSI,%R14,1),%XMM13 |
(199) 0x438bd2 VMOVSD %XMM13,0x28(%RCX,%RSI,1) |
(199) 0x438bd8 VMOVSD 0x20(%RSI,%R14,1),%XMM14 |
(199) 0x438bdf VMOVSD %XMM14,0x30(%RCX,%RSI,1) |
(199) 0x438be5 VMOVSD 0x28(%RSI,%R14,1),%XMM15 |
(199) 0x438bec VMOVSD %XMM15,0x38(%RCX,%RSI,1) |
(199) 0x438bf2 VMOVSD 0x30(%RSI,%R14,1),%XMM1 |
(199) 0x438bf9 VMOVSD %XMM1,0x40(%RCX,%RSI,1) |
(199) 0x438bff VMOVSD 0x38(%RSI,%R14,1),%XMM0 |
(199) 0x438c06 VMOVSD %XMM0,0x48(%RCX,%RSI,1) |
(199) 0x438c0c ADD $0x40,%RSI |
(199) 0x438c10 CMP %R13,%RSI |
(199) 0x438c13 JNE 438ba5 |
(200) 0x438c15 NOPL (%RAX) |
(200) 0x438c18 MOV 0x68(%RSP),%RAX |
(200) 0x438c1d INC %RDX |
(200) 0x438c20 ADD %EBX,%R12D |
(200) 0x438c23 ADD %R13,%R8 |
(200) 0x438c26 LEA (%RDX),%R15D |
(200) 0x438c29 ADD %RAX,%R10 |
(200) 0x438c2c CMP %R15D,0x7c(%RSP) |
(200) 0x438c31 JG 438ab0 |
0x438c37 VZEROUPPER |
0x438c3a LEA -0x28(%RBP),%RSP |
0x438c3e POP %RBX |
0x438c3f POP %R12 |
0x438c41 POP %R13 |
0x438c43 POP %R14 |
0x438c45 POP %R15 |
0x438c47 POP %RBP |
0x438c48 RET |
0x438c49 NOPL (%RAX) |
(200) 0x438c50 CMP $0x6,%R15D |
(200) 0x438c54 JBE 438e92 |
(200) 0x438c5a MOV 0x60(%RSP),%RSI |
(200) 0x438c5f LEA (%R9,%R8,1),%R15 |
(200) 0x438c63 XOR %EAX,%EAX |
(200) 0x438c65 SUB $0x40,%RSI |
(200) 0x438c69 SHR $0x6,%RSI |
(200) 0x438c6d INC %RSI |
(200) 0x438c70 AND $0x7,%ESI |
(200) 0x438c73 JE 438d18 |
(200) 0x438c79 CMP $0x1,%RSI |
(200) 0x438c7d JE 438cfb |
(200) 0x438c7f CMP $0x2,%RSI |
(200) 0x438c83 JE 438ce9 |
(200) 0x438c85 CMP $0x3,%RSI |
(200) 0x438c89 JE 438cd7 |
(200) 0x438c8b CMP $0x4,%RSI |
(200) 0x438c8f JE 438cc5 |
(200) 0x438c91 CMP $0x5,%RSI |
(200) 0x438c95 JE 438cb3 |
(200) 0x438c97 CMP $0x6,%RSI |
(200) 0x438c9b JNE 438e7c |
(200) 0x438ca1 VMOVUPD (%R15,%RAX,1),%ZMM0 |
(200) 0x438ca8 VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(200) 0x438caf ADD $0x40,%RAX |
(200) 0x438cb3 VMOVUPD (%R15,%RAX,1),%ZMM2 |
(200) 0x438cba VMOVUPD %ZMM2,(%RCX,%RAX,1) |
(200) 0x438cc1 ADD $0x40,%RAX |
(200) 0x438cc5 VMOVUPD (%R15,%RAX,1),%ZMM7 |
(200) 0x438ccc VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(200) 0x438cd3 ADD $0x40,%RAX |
(200) 0x438cd7 VMOVUPD (%R15,%RAX,1),%ZMM6 |
(200) 0x438cde VMOVUPD %ZMM6,(%RCX,%RAX,1) |
(200) 0x438ce5 ADD $0x40,%RAX |
(200) 0x438ce9 VMOVUPD (%R15,%RAX,1),%ZMM3 |
(200) 0x438cf0 VMOVUPD %ZMM3,(%RCX,%RAX,1) |
(200) 0x438cf7 ADD $0x40,%RAX |
(200) 0x438cfb VMOVUPD (%R15,%RAX,1),%ZMM4 |
(200) 0x438d02 VMOVUPD %ZMM4,(%RCX,%RAX,1) |
(200) 0x438d09 ADD $0x40,%RAX |
(200) 0x438d0d CMP %RAX,0x60(%RSP) |
(200) 0x438d12 JE 438da7 |
(201) 0x438d18 VMOVUPD (%R15,%RAX,1),%ZMM5 |
(201) 0x438d1f VMOVUPD %ZMM5,(%RCX,%RAX,1) |
(201) 0x438d26 VMOVUPD 0x40(%R15,%RAX,1),%ZMM8 |
(201) 0x438d2e VMOVUPD %ZMM8,0x40(%RCX,%RAX,1) |
(201) 0x438d36 VMOVUPD 0x80(%R15,%RAX,1),%ZMM9 |
(201) 0x438d3e VMOVUPD %ZMM9,0x80(%RCX,%RAX,1) |
(201) 0x438d46 VMOVUPD 0xc0(%R15,%RAX,1),%ZMM10 |
(201) 0x438d4e VMOVUPD %ZMM10,0xc0(%RCX,%RAX,1) |
(201) 0x438d56 VMOVUPD 0x100(%R15,%RAX,1),%ZMM11 |
(201) 0x438d5e VMOVUPD %ZMM11,0x100(%RCX,%RAX,1) |
(201) 0x438d66 VMOVUPD 0x140(%R15,%RAX,1),%ZMM12 |
(201) 0x438d6e VMOVUPD %ZMM12,0x140(%RCX,%RAX,1) |
(201) 0x438d76 VMOVUPD 0x180(%R15,%RAX,1),%ZMM13 |
(201) 0x438d7e VMOVUPD %ZMM13,0x180(%RCX,%RAX,1) |
(201) 0x438d86 VMOVUPD 0x1c0(%R15,%RAX,1),%ZMM14 |
(201) 0x438d8e VMOVUPD %ZMM14,0x1c0(%RCX,%RAX,1) |
(201) 0x438d96 ADD $0x200,%RAX |
(201) 0x438d9c CMP %RAX,0x60(%RSP) |
(201) 0x438da1 JNE 438d18 |
(200) 0x438da7 CMP %EBX,0x54(%RSP) |
(200) 0x438dab JE 438c18 |
(200) 0x438db1 CMPL $0x2,0x44(%RSP) |
(200) 0x438db6 MOV 0x50(%RSP),%ECX |
(200) 0x438dba JBE 438e9e |
(200) 0x438dc0 MOV 0x54(%RSP),%R15D |
(200) 0x438dc5 MOV %R15D,%EAX |
(200) 0x438dc8 LEA (%R10,%RAX,1),%RSI |
(200) 0x438dcc ADD %R14,%RAX |
(200) 0x438dcf VMOVUPD (%R9,%RSI,8),%YMM15 |
(200) 0x438dd5 VMOVUPD %YMM15,(%R11,%RAX,8) |
(200) 0x438ddb MOV %ECX,%EAX |
(200) 0x438ddd AND $-0x4,%EAX |
(200) 0x438de0 ADD %R15D,%EAX |
(200) 0x438de3 AND $0x3,%ECX |
(200) 0x438de6 JE 438c18 |
(200) 0x438dec MOV 0x40(%RSP),%R14D |
(200) 0x438df1 LEA (%R14,%RAX,1),%ECX |
(200) 0x438df5 ADD %R12D,%ECX |
(200) 0x438df8 MOVSXD %ECX,%R15 |
(200) 0x438dfb VMOVSD (%R9,%R15,8),%XMM1 |
(200) 0x438e01 MOV 0x3c(%RSP),%R15D |
(200) 0x438e06 LEA 0x2(%RAX,%R15,1),%ESI |
(200) 0x438e0b MOVSXD %ESI,%RCX |
(200) 0x438e0e LEA 0x1(%RAX),%ESI |
(200) 0x438e11 ADD %RDI,%RCX |
(200) 0x438e14 VMOVSD %XMM1,(%R11,%RCX,8) |
(200) 0x438e1a CMP %ESI,%EBX |
(200) 0x438e1c JLE 438c18 |
(200) 0x438e22 LEA (%R14,%RSI,1),%ECX |
(200) 0x438e26 LEA 0x2(%RSI,%R15,1),%ESI |
(200) 0x438e2b ADD $0x2,%EAX |
(200) 0x438e2e ADD %R12D,%ECX |
(200) 0x438e31 MOVSXD %ECX,%RCX |
(200) 0x438e34 VMOVSD (%R9,%RCX,8),%XMM0 |
(200) 0x438e3a MOVSXD %ESI,%RCX |
(200) 0x438e3d ADD %RDI,%RCX |
(200) 0x438e40 VMOVSD %XMM0,(%R11,%RCX,8) |
(200) 0x438e46 CMP %EAX,%EBX |
(200) 0x438e48 JLE 438c18 |
(200) 0x438e4e LEA (%R14,%RAX,1),%R14D |
(200) 0x438e52 LEA 0x2(%RAX,%R15,1),%EAX |
(200) 0x438e57 ADD %R12D,%R14D |
(200) 0x438e5a CLTQ |
(200) 0x438e5c MOVSXD %R14D,%RSI |
(200) 0x438e5f ADD %RDI,%RAX |
(200) 0x438e62 VMOVSD (%R9,%RSI,8),%XMM2 |
(200) 0x438e68 VMOVSD %XMM2,(%R11,%RAX,8) |
(200) 0x438e6e JMP 438c18 |
0x438e73 INC %EAX |
0x438e75 XOR %EDX,%EDX |
0x438e77 JMP 4389f1 |
(200) 0x438e7c VMOVUPD (%R15),%ZMM1 |
(200) 0x438e82 MOV $0x40,%EAX |
(200) 0x438e87 VMOVUPD %ZMM1,(%RCX) |
(200) 0x438e8d JMP 438ca1 |
(200) 0x438e92 MOV %EBX,%ECX |
(200) 0x438e94 XOR %EAX,%EAX |
(200) 0x438e96 XOR %R15D,%R15D |
(200) 0x438e99 JMP 438dc8 |
(200) 0x438e9e MOV 0x54(%RSP),%EAX |
(200) 0x438ea2 JMP 438dec |
0x438ea7 NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○95.70 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○4.30 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | pack_kernel.cpp:156-160 |
Module | exec |
nb instructions | 90 |
nb uops | 96 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.00 cycles |
front end | 16.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 7.20 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
cycles | 7.30 | 11.30 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.50-15.51 |
Stall cycles | 0.00 |
Front-end | 16.00 |
Dispatch | 11.30 |
DIV/SQRT | 6.00 |
Overall L1 | 16.00 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 12% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 438e73 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x4d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 438c3a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 438c3a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x10(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4389f1 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:156-160 |
Module | exec |
nb instructions | 90 |
nb uops | 96 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.00 cycles |
front end | 16.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 7.20 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
cycles | 7.30 | 11.30 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.50-15.51 |
Stall cycles | 0.00 |
Front-end | 16.00 |
Dispatch | 11.30 |
DIV/SQRT | 6.00 |
Overall L1 | 16.00 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 12% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 438e73 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x4d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 438c3a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 438c3a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x10(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4389f1 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_unpack_message_right(global_variables&, int, int, int, int, clover::Buffer2D | 0.01 | 0.01 |
▼Loop 200 - pack_kernel.cpp:156-160 - exec– | 0.01 | 0.02 |
○Loop 201 - pack_kernel.cpp:158-160 - exec | 0 | 0 |
○Loop 199 - pack_kernel.cpp:158-160 - exec | 0 | 0 |