Function: clover_pack_message_left(global_variables&, int, int, int, int, clover::Buffer2D<double>&, ... | Module: exec | Source: pack_kernel.cpp:55-59 [...] | Coverage: 0.03% |
---|
Function: clover_pack_message_left(global_variables&, int, int, int, int, clover::Buffer2D<double>&, ... | Module: exec | Source: pack_kernel.cpp:55-59 [...] | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 55 - 59 |
-------------------------------------------------------------------------------- |
55: #pragma omp parallel for simd |
56: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
57: for (int j = 0; j < depth; ++j) { |
58: int index = buffer_offset + j + k * depth; |
59: left_snd[index] = field(x_min + x_inc - 1 + j + 2, k); |
0x43bdc0 PUSH %RBP |
0x43bdc1 MOV %RSP,%RBP |
0x43bdc4 PUSH %R15 |
0x43bdc6 PUSH %R14 |
0x43bdc8 PUSH %R13 |
0x43bdca PUSH %R12 |
0x43bdcc MOV %RDI,%R12 |
0x43bdcf PUSH %RBX |
0x43bdd0 AND $-0x40,%RSP |
0x43bdd4 ADD $-0x80,%RSP |
0x43bdd8 MOV 0x1c(%RDI),%EBX |
0x43bddb MOV 0x14(%RDI),%R14D |
0x43bddf CALL 4046c0 <omp_get_num_threads@plt> |
0x43bde4 MOV %EAX,%R13D |
0x43bde7 SUB %EBX,%R14D |
0x43bdea CALL 4045b0 <omp_get_thread_num@plt> |
0x43bdef INC %R14D |
0x43bdf2 MOV %EAX,%ECX |
0x43bdf4 MOV 0x18(%R12),%EAX |
0x43bdf9 ADD 0x28(%R12),%EAX |
0x43bdfe LEA 0x2(%RBX,%RAX,1),%EAX |
0x43be02 SUB %R14D,%EAX |
0x43be05 CLTD |
0x43be06 IDIV %R13D |
0x43be09 CMP %EDX,%ECX |
0x43be0b JL 43c296 |
0x43be11 IMUL %EAX,%ECX |
0x43be14 ADD %EDX,%ECX |
0x43be16 ADD %ECX,%EAX |
0x43be18 CMP %EAX,%ECX |
0x43be1a JGE 43c060 |
0x43be20 MOV 0x20(%R12),%R10D |
0x43be25 ADD %R14D,%EAX |
0x43be28 MOVSXD 0x24(%R12),%R15 |
0x43be2d ADD %R14D,%ECX |
0x43be30 MOVSXD 0x10(%R12),%RSI |
0x43be35 MOV (%R12),%RDI |
0x43be39 MOV %EAX,0x78(%RSP) |
0x43be3d MOV %R10D,0x40(%RSP) |
0x43be42 MOV 0x8(%R12),%R13 |
0x43be47 TEST %EBX,%EBX |
0x43be49 JLE 43c060 |
0x43be4f MOV %EBX,%R12D |
0x43be52 MOV (%RDI),%RAX |
0x43be55 MOVSXD %EBX,%R11 |
0x43be58 MOVSXD %R10D,%R8 |
0x43be5b IMUL %ECX,%R12D |
0x43be5f MOV %R11,0x68(%RSP) |
0x43be64 MOV 0x10(%RDI),%R9 |
0x43be68 LEA (,%R11,8),%R14 |
0x43be70 MOV %RAX,0x70(%RSP) |
0x43be75 MOV %EBX,%EDI |
0x43be77 LEA (%R15,%RSI,1),%EAX |
0x43be7b LEA (%RSI,%R15,1),%R15 |
0x43be7f MOV %EBX,%ESI |
0x43be81 LEA -0x1(%RBX),%EDX |
0x43be84 SHR $0x3,%EDI |
0x43be87 MOV %R15,0x58(%RSP) |
0x43be8c MOVSXD %R12D,%R11 |
0x43be8f AND $0x7,%ESI |
0x43be92 MOV %EDX,0x7c(%RSP) |
0x43be96 SAL $0x6,%RDI |
0x43be9a ADD %R8,%R11 |
0x43be9d MOV %EBX,%R8D |
0x43bea0 MOVSXD %EAX,%RDX |
0x43bea3 MOV %ESI,0x50(%RSP) |
0x43bea7 AND $-0x8,%R8D |
0x43beab DEC %EAX |
0x43bead DEC %ESI |
0x43beaf MOV %RDI,0x60(%RSP) |
0x43beb4 MOV %R8D,0x54(%RSP) |
0x43beb9 MOV 0x8(%R13),%R13 |
0x43bebd LEA (,%R11,8),%R10 |
0x43bec5 MOVSXD %ECX,%RCX |
0x43bec8 MOV %RDX,0x48(%RSP) |
0x43becd MOV %EAX,0x3c(%RSP) |
0x43bed1 MOV %ESI,0x44(%RSP) |
0x43bed5 NOPL (%RAX) |
(216) 0x43bed8 MOV 0x70(%RSP),%RDI |
(216) 0x43bedd LEA (%R13,%R10,1),%RAX |
(216) 0x43bee2 IMUL %RCX,%RDI |
(216) 0x43bee6 CMPL $0x2,0x7c(%RSP) |
(216) 0x43beeb JBE 43bf12 |
(216) 0x43beed MOV 0x58(%RSP),%R8 |
(216) 0x43bef2 MOV %RAX,%RSI |
(216) 0x43bef5 LEA (%R8,%RDI,1),%R15 |
(216) 0x43bef9 LEA 0x10(,%R15,8),%RDX |
(216) 0x43bf01 LEA (%R9,%RDX,1),%R8 |
(216) 0x43bf05 SUB %R8,%RSI |
(216) 0x43bf08 CMP $0x30,%RSI |
(216) 0x43bf0c JA 43c070 |
(216) 0x43bf12 MOV 0x48(%RSP),%R8 |
(216) 0x43bf17 XOR %EDX,%EDX |
(216) 0x43bf19 ADD %R8,%RDI |
(216) 0x43bf1c LEA (%R9,%RDI,8),%R15 |
(216) 0x43bf20 LEA -0x8(%R14),%RDI |
(216) 0x43bf24 SHR $0x3,%RDI |
(216) 0x43bf28 INC %RDI |
(216) 0x43bf2b AND $0x7,%EDI |
(216) 0x43bf2e JE 43bfcc |
(216) 0x43bf34 CMP $0x1,%RDI |
(216) 0x43bf38 JE 43bfb7 |
(216) 0x43bf3a CMP $0x2,%RDI |
(216) 0x43bf3e JE 43bfa7 |
(216) 0x43bf40 CMP $0x3,%RDI |
(216) 0x43bf44 JE 43bf97 |
(216) 0x43bf46 CMP $0x4,%RDI |
(216) 0x43bf4a JE 43bf87 |
(216) 0x43bf4c CMP $0x5,%RDI |
(216) 0x43bf50 JE 43bf77 |
(216) 0x43bf52 CMP $0x6,%RDI |
(216) 0x43bf56 JE 43bf67 |
(216) 0x43bf58 VMOVSD 0x8(%R15),%XMM7 |
(216) 0x43bf5e MOV $0x8,%EDX |
(216) 0x43bf63 VMOVSD %XMM7,(%RAX) |
(216) 0x43bf67 VMOVSD 0x8(%R15,%RDX,1),%XMM6 |
(216) 0x43bf6e VMOVSD %XMM6,(%RAX,%RDX,1) |
(216) 0x43bf73 ADD $0x8,%RDX |
(216) 0x43bf77 VMOVSD 0x8(%R15,%RDX,1),%XMM3 |
(216) 0x43bf7e VMOVSD %XMM3,(%RAX,%RDX,1) |
(216) 0x43bf83 ADD $0x8,%RDX |
(216) 0x43bf87 VMOVSD 0x8(%R15,%RDX,1),%XMM4 |
(216) 0x43bf8e VMOVSD %XMM4,(%RAX,%RDX,1) |
(216) 0x43bf93 ADD $0x8,%RDX |
(216) 0x43bf97 VMOVSD 0x8(%R15,%RDX,1),%XMM5 |
(216) 0x43bf9e VMOVSD %XMM5,(%RAX,%RDX,1) |
(216) 0x43bfa3 ADD $0x8,%RDX |
(216) 0x43bfa7 VMOVSD 0x8(%R15,%RDX,1),%XMM8 |
(216) 0x43bfae VMOVSD %XMM8,(%RAX,%RDX,1) |
(216) 0x43bfb3 ADD $0x8,%RDX |
(216) 0x43bfb7 VMOVSD 0x8(%R15,%RDX,1),%XMM9 |
(216) 0x43bfbe VMOVSD %XMM9,(%RAX,%RDX,1) |
(216) 0x43bfc3 ADD $0x8,%RDX |
(216) 0x43bfc7 CMP %R14,%RDX |
(216) 0x43bfca JE 43c040 |
(215) 0x43bfcc VMOVSD 0x8(%R15,%RDX,1),%XMM10 |
(215) 0x43bfd3 VMOVSD %XMM10,(%RAX,%RDX,1) |
(215) 0x43bfd8 VMOVSD 0x10(%R15,%RDX,1),%XMM11 |
(215) 0x43bfdf VMOVSD %XMM11,0x8(%RDX,%RAX,1) |
(215) 0x43bfe5 VMOVSD 0x18(%R15,%RDX,1),%XMM12 |
(215) 0x43bfec VMOVSD %XMM12,0x10(%RDX,%RAX,1) |
(215) 0x43bff2 VMOVSD 0x20(%R15,%RDX,1),%XMM13 |
(215) 0x43bff9 VMOVSD %XMM13,0x18(%RDX,%RAX,1) |
(215) 0x43bfff VMOVSD 0x28(%R15,%RDX,1),%XMM14 |
(215) 0x43c006 VMOVSD %XMM14,0x20(%RDX,%RAX,1) |
(215) 0x43c00c VMOVSD 0x30(%R15,%RDX,1),%XMM15 |
(215) 0x43c013 VMOVSD %XMM15,0x28(%RDX,%RAX,1) |
(215) 0x43c019 VMOVSD 0x38(%R15,%RDX,1),%XMM1 |
(215) 0x43c020 VMOVSD %XMM1,0x30(%RDX,%RAX,1) |
(215) 0x43c026 VMOVSD 0x40(%R15,%RDX,1),%XMM0 |
(215) 0x43c02d ADD $0x40,%RDX |
(215) 0x43c031 VMOVSD %XMM0,-0x8(%RDX,%RAX,1) |
(215) 0x43c037 CMP %R14,%RDX |
(215) 0x43c03a JNE 43bfcc |
(216) 0x43c03c NOPL (%RAX) |
(216) 0x43c040 MOV 0x68(%RSP),%RAX |
(216) 0x43c045 INC %RCX |
(216) 0x43c048 ADD %EBX,%R12D |
(216) 0x43c04b ADD %R14,%R10 |
(216) 0x43c04e LEA (%RCX),%ESI |
(216) 0x43c050 ADD %RAX,%R11 |
(216) 0x43c053 CMP %ESI,0x78(%RSP) |
(216) 0x43c057 JG 43bed8 |
0x43c05d VZEROUPPER |
0x43c060 LEA -0x28(%RBP),%RSP |
0x43c064 POP %RBX |
0x43c065 POP %R12 |
0x43c067 POP %R13 |
0x43c069 POP %R14 |
0x43c06b POP %R15 |
0x43c06d POP %RBP |
0x43c06e RET |
0x43c06f NOP |
(216) 0x43c070 CMPL $0x6,0x7c(%RSP) |
(216) 0x43c075 JBE 43c2b5 |
(216) 0x43c07b MOV 0x60(%RSP),%RSI |
(216) 0x43c080 LEA -0x8(%R9,%RDX,1),%R8 |
(216) 0x43c085 XOR %EDX,%EDX |
(216) 0x43c087 SUB $0x40,%RSI |
(216) 0x43c08b SHR $0x6,%RSI |
(216) 0x43c08f INC %RSI |
(216) 0x43c092 AND $0x7,%ESI |
(216) 0x43c095 JE 43c13a |
(216) 0x43c09b CMP $0x1,%RSI |
(216) 0x43c09f JE 43c11d |
(216) 0x43c0a1 CMP $0x2,%RSI |
(216) 0x43c0a5 JE 43c10b |
(216) 0x43c0a7 CMP $0x3,%RSI |
(216) 0x43c0ab JE 43c0f9 |
(216) 0x43c0ad CMP $0x4,%RSI |
(216) 0x43c0b1 JE 43c0e7 |
(216) 0x43c0b3 CMP $0x5,%RSI |
(216) 0x43c0b7 JE 43c0d5 |
(216) 0x43c0b9 CMP $0x6,%RSI |
(216) 0x43c0bd JNE 43c29f |
(216) 0x43c0c3 VMOVUPD (%R8,%RDX,1),%ZMM0 |
(216) 0x43c0ca VMOVUPD %ZMM0,(%RAX,%RDX,1) |
(216) 0x43c0d1 ADD $0x40,%RDX |
(216) 0x43c0d5 VMOVUPD (%R8,%RDX,1),%ZMM2 |
(216) 0x43c0dc VMOVUPD %ZMM2,(%RAX,%RDX,1) |
(216) 0x43c0e3 ADD $0x40,%RDX |
(216) 0x43c0e7 VMOVUPD (%R8,%RDX,1),%ZMM7 |
(216) 0x43c0ee VMOVUPD %ZMM7,(%RAX,%RDX,1) |
(216) 0x43c0f5 ADD $0x40,%RDX |
(216) 0x43c0f9 VMOVUPD (%R8,%RDX,1),%ZMM6 |
(216) 0x43c100 VMOVUPD %ZMM6,(%RAX,%RDX,1) |
(216) 0x43c107 ADD $0x40,%RDX |
(216) 0x43c10b VMOVUPD (%R8,%RDX,1),%ZMM3 |
(216) 0x43c112 VMOVUPD %ZMM3,(%RAX,%RDX,1) |
(216) 0x43c119 ADD $0x40,%RDX |
(216) 0x43c11d VMOVUPD (%R8,%RDX,1),%ZMM4 |
(216) 0x43c124 VMOVUPD %ZMM4,(%RAX,%RDX,1) |
(216) 0x43c12b ADD $0x40,%RDX |
(216) 0x43c12f CMP %RDX,0x60(%RSP) |
(216) 0x43c134 JE 43c1ca |
(217) 0x43c13a VMOVUPD (%R8,%RDX,1),%ZMM5 |
(217) 0x43c141 VMOVUPD %ZMM5,(%RAX,%RDX,1) |
(217) 0x43c148 VMOVUPD 0x40(%R8,%RDX,1),%ZMM8 |
(217) 0x43c150 VMOVUPD %ZMM8,0x40(%RDX,%RAX,1) |
(217) 0x43c158 VMOVUPD 0x80(%R8,%RDX,1),%ZMM9 |
(217) 0x43c160 VMOVUPD %ZMM9,0x80(%RDX,%RAX,1) |
(217) 0x43c168 VMOVUPD 0xc0(%R8,%RDX,1),%ZMM10 |
(217) 0x43c170 VMOVUPD %ZMM10,0xc0(%RDX,%RAX,1) |
(217) 0x43c178 VMOVUPD 0x100(%R8,%RDX,1),%ZMM11 |
(217) 0x43c180 VMOVUPD %ZMM11,0x100(%RDX,%RAX,1) |
(217) 0x43c188 VMOVUPD 0x140(%R8,%RDX,1),%ZMM12 |
(217) 0x43c190 VMOVUPD %ZMM12,0x140(%RDX,%RAX,1) |
(217) 0x43c198 VMOVUPD 0x180(%R8,%RDX,1),%ZMM13 |
(217) 0x43c1a0 VMOVUPD %ZMM13,0x180(%RDX,%RAX,1) |
(217) 0x43c1a8 VMOVUPD 0x1c0(%R8,%RDX,1),%ZMM14 |
(217) 0x43c1b0 ADD $0x200,%RDX |
(217) 0x43c1b7 VMOVUPD %ZMM14,-0x40(%RDX,%RAX,1) |
(217) 0x43c1bf CMP %RDX,0x60(%RSP) |
(217) 0x43c1c4 JNE 43c13a |
(216) 0x43c1ca CMP %EBX,0x54(%RSP) |
(216) 0x43c1ce JE 43c040 |
(216) 0x43c1d4 CMPL $0x2,0x44(%RSP) |
(216) 0x43c1d9 MOV 0x50(%RSP),%EDX |
(216) 0x43c1dd JBE 43c2c0 |
(216) 0x43c1e3 MOV 0x54(%RSP),%ESI |
(216) 0x43c1e7 MOV %ESI,%EAX |
(216) 0x43c1e9 LEA 0x1(%R15,%RAX,1),%R15 |
(216) 0x43c1ee ADD %R11,%RAX |
(216) 0x43c1f1 VMOVUPD (%R9,%R15,8),%YMM15 |
(216) 0x43c1f7 VMOVUPD %YMM15,(%R13,%RAX,8) |
(216) 0x43c1fe MOV %EDX,%EAX |
(216) 0x43c200 AND $-0x4,%EAX |
(216) 0x43c203 ADD %ESI,%EAX |
(216) 0x43c205 AND $0x3,%EDX |
(216) 0x43c208 JE 43c040 |
(216) 0x43c20e MOV 0x3c(%RSP),%R15D |
(216) 0x43c213 LEA 0x2(%RAX,%R15,1),%R8D |
(216) 0x43c218 MOVSXD %R8D,%RSI |
(216) 0x43c21b MOV 0x40(%RSP),%R8D |
(216) 0x43c220 ADD %RDI,%RSI |
(216) 0x43c223 LEA (%R8,%RAX,1),%EDX |
(216) 0x43c227 VMOVSD (%R9,%RSI,8),%XMM1 |
(216) 0x43c22d ADD %R12D,%EDX |
(216) 0x43c230 MOVSXD %EDX,%RSI |
(216) 0x43c233 LEA 0x1(%RAX),%EDX |
(216) 0x43c236 VMOVSD %XMM1,(%R13,%RSI,8) |
(216) 0x43c23d CMP %EDX,%EBX |
(216) 0x43c23f JLE 43c040 |
(216) 0x43c245 LEA 0x2(%RDX,%R15,1),%ESI |
(216) 0x43c24a ADD %R8D,%EDX |
(216) 0x43c24d ADD $0x2,%EAX |
(216) 0x43c250 MOVSXD %ESI,%RSI |
(216) 0x43c253 ADD %R12D,%EDX |
(216) 0x43c256 ADD %RDI,%RSI |
(216) 0x43c259 MOVSXD %EDX,%RDX |
(216) 0x43c25c VMOVSD (%R9,%RSI,8),%XMM0 |
(216) 0x43c262 VMOVSD %XMM0,(%R13,%RDX,8) |
(216) 0x43c269 CMP %EAX,%EBX |
(216) 0x43c26b JLE 43c040 |
(216) 0x43c271 LEA 0x2(%RAX,%R15,1),%R15D |
(216) 0x43c276 ADD %R8D,%EAX |
(216) 0x43c279 MOVSXD %R15D,%RSI |
(216) 0x43c27c ADD %R12D,%EAX |
(216) 0x43c27f ADD %RDI,%RSI |
(216) 0x43c282 CLTQ |
(216) 0x43c284 VMOVSD (%R9,%RSI,8),%XMM2 |
(216) 0x43c28a VMOVSD %XMM2,(%R13,%RAX,8) |
(216) 0x43c291 JMP 43c040 |
0x43c296 INC %EAX |
0x43c298 XOR %EDX,%EDX |
0x43c29a JMP 43be11 |
(216) 0x43c29f VMOVUPD (%R8),%ZMM1 |
(216) 0x43c2a5 MOV $0x40,%EDX |
(216) 0x43c2aa VMOVUPD %ZMM1,(%RAX) |
(216) 0x43c2b0 JMP 43c0c3 |
(216) 0x43c2b5 MOV %EBX,%EDX |
(216) 0x43c2b7 XOR %EAX,%EAX |
(216) 0x43c2b9 XOR %ESI,%ESI |
(216) 0x43c2bb JMP 43c1e9 |
(216) 0x43c2c0 MOV 0x54(%RSP),%EAX |
(216) 0x43c2c4 JMP 43c20e |
0x43c2c9 NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○93.87 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
Path / |
Source file and lines | pack_kernel.cpp:55-59 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.70 | 7.60 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
cycles | 7.70 | 11.67 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.84-15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.67 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 8% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c296 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4d6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c060 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c060 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RBX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R12D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EAX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43be11 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:55-59 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.70 | 7.60 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
cycles | 7.70 | 11.67 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.84-15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.67 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 8% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c296 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4d6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c060 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c060 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RBX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R12D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EAX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43be11 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_pack_message_left(global_variables&, int, int, int, int, clover::Buffer2D | 0.03 | 0.01 |
▼Loop 216 - pack_kernel.cpp:55-59 - exec– | 0.03 | 0.02 |
○Loop 215 - pack_kernel.cpp:57-59 - exec | 0 | 0 |
○Loop 217 - pack_kernel.cpp:57-59 - exec | 0 | 0 |