Function: clover_unpack_message_left(global_variables&, int, int, int, int, clover::Buffer2D<double> ... | Module: exec | Source: pack_kernel.cpp:88-92 [...] | Coverage: 0.03% |
---|
Function: clover_unpack_message_left(global_variables&, int, int, int, int, clover::Buffer2D<double> ... | Module: exec | Source: pack_kernel.cpp:88-92 [...] | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 88 - 92 |
-------------------------------------------------------------------------------- |
88: #pragma omp parallel for simd |
89: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
90: for (int j = 0; j < depth; ++j) { |
91: int index = buffer_offset + j + k * depth; |
92: field(x_min - j, k) = left_rcv[index]; |
0x438eb0 PUSH %RBP |
0x438eb1 MOV %RSP,%RBP |
0x438eb4 PUSH %R15 |
0x438eb6 PUSH %R14 |
0x438eb8 PUSH %R13 |
0x438eba PUSH %R12 |
0x438ebc MOV %RDI,%R12 |
0x438ebf PUSH %RBX |
0x438ec0 AND $-0x40,%RSP |
0x438ec4 ADD $-0x80,%RSP |
0x438ec8 MOV 0x1c(%RDI),%EBX |
0x438ecb MOV 0x14(%RDI),%R14D |
0x438ecf CALL 4046c0 <omp_get_num_threads@plt> |
0x438ed4 MOV %EAX,%R13D |
0x438ed7 SUB %EBX,%R14D |
0x438eda CALL 4045b0 <omp_get_thread_num@plt> |
0x438edf INC %R14D |
0x438ee2 MOV %EAX,%ECX |
0x438ee4 MOV 0x18(%R12),%EAX |
0x438ee9 ADD 0x24(%R12),%EAX |
0x438eee LEA 0x2(%RBX,%RAX,1),%EAX |
0x438ef2 SUB %R14D,%EAX |
0x438ef5 CLTD |
0x438ef6 IDIV %R13D |
0x438ef9 CMP %EDX,%ECX |
0x438efb JL 439429 |
0x438f01 IMUL %EAX,%ECX |
0x438f04 ADD %EDX,%ECX |
0x438f06 ADD %ECX,%EAX |
0x438f08 CMP %EAX,%ECX |
0x438f0a JGE 43917d |
0x438f10 MOV 0x20(%R12),%EDI |
0x438f15 MOV 0x10(%R12),%R9D |
0x438f1a ADD %R14D,%EAX |
0x438f1d ADD %R14D,%ECX |
0x438f20 MOV 0x8(%R12),%R8 |
0x438f25 MOV (%R12),%R14 |
0x438f29 MOV %EAX,0x78(%RSP) |
0x438f2d MOV %EDI,0x38(%RSP) |
0x438f31 MOV %R9D,0x34(%RSP) |
0x438f36 TEST %EBX,%EBX |
0x438f38 JLE 43917d |
0x438f3e MOV %EBX,%R12D |
0x438f41 MOV (%R14),%R13 |
0x438f44 MOV 0x8(%R8),%R11 |
0x438f48 MOVSXD %EDI,%RSI |
0x438f4b IMUL %ECX,%R12D |
0x438f4f MOV 0x10(%R14),%R8 |
0x438f53 MOVSXD %EBX,%R15 |
0x438f56 MOV %EBX,%EDX |
0x438f58 MOV %R13,0x70(%RSP) |
0x438f5d MOV %EBX,%R13D |
0x438f60 SHR $0x3,%EDX |
0x438f63 MOV %EBX,%EDI |
0x438f65 AND $0x7,%R13D |
0x438f69 MOV %R15,0x68(%RSP) |
0x438f6e MOVSXD %R9D,%R14 |
0x438f71 SAL $0x3,%R15 |
0x438f75 MOVSXD %R12D,%R10 |
0x438f78 MOV %R8,%R9 |
0x438f7b MOV %R13D,0x48(%RSP) |
0x438f80 LEA -0x1(%RBX),%EAX |
0x438f83 ADD %RSI,%R10 |
0x438f86 MOV $0x8,%ESI |
0x438f8b SAL $0x6,%RDX |
0x438f8f AND $-0x8,%EDI |
0x438f92 SUB %R15,%R9 |
0x438f95 SUB %R15,%RSI |
0x438f98 DEC %R13D |
0x438f9b MOV %EAX,0x7c(%RSP) |
0x438f9f MOV %RDX,0x50(%RSP) |
0x438fa4 VMOVDQA64 0x2a592(%RIP),%ZMM0 |
0x438fae MOVSXD %ECX,%RCX |
0x438fb1 MOV %EDI,0x4c(%RSP) |
0x438fb5 MOV %R9,0x40(%RSP) |
0x438fba MOV %RSI,0x60(%RSP) |
0x438fbf MOV %R13D,0x3c(%RSP) |
0x438fc4 NOPL (%RAX) |
(203) 0x438fc8 MOV 0x70(%RSP),%RAX |
(203) 0x438fcd LEA (,%R10,8),%RDX |
(203) 0x438fd5 IMUL %RCX,%RAX |
(203) 0x438fd9 CMPL $0x2,0x7c(%RSP) |
(203) 0x438fde JBE 43901e |
(203) 0x438fe0 MOV 0x60(%RSP),%RDI |
(203) 0x438fe5 LEA (%R14,%RAX,1),%RSI |
(203) 0x438fe9 LEA (,%R10,8),%RDX |
(203) 0x438ff1 SAL $0x3,%RSI |
(203) 0x438ff5 LEA (%R11,%RDX,1),%R13 |
(203) 0x438ff9 LEA (%RDI,%RSI,1),%R9 |
(203) 0x438ffd LEA (%R15,%RDX,1),%RDI |
(203) 0x439001 ADD %R8,%R9 |
(203) 0x439004 ADD %R11,%RDI |
(203) 0x439007 CMP %RDI,%R9 |
(203) 0x43900a JAE 439190 |
(203) 0x439010 LEA 0x8(%R8,%RSI,1),%R9 |
(203) 0x439015 CMP %R9,%R13 |
(203) 0x439018 JAE 439190 |
(203) 0x43901e LEA (%R14,%RAX,1),%RDI |
(203) 0x439022 MOV 0x40(%RSP),%RAX |
(203) 0x439027 ADD %R11,%RDX |
(203) 0x43902a SAL $0x3,%RDI |
(203) 0x43902e LEA (%R8,%RDI,1),%R13 |
(203) 0x439032 ADD %RAX,%RDI |
(203) 0x439035 MOV %R13,%RSI |
(203) 0x439038 SUB %RDI,%RSI |
(203) 0x43903b SUB $0x8,%RSI |
(203) 0x43903f SHR $0x3,%RSI |
(203) 0x439043 INC %RSI |
(203) 0x439046 AND $0x7,%ESI |
(203) 0x439049 JE 4390fa |
(203) 0x43904f CMP $0x1,%RSI |
(203) 0x439053 JE 4390e3 |
(203) 0x439059 CMP $0x2,%RSI |
(203) 0x43905d JE 4390d1 |
(203) 0x43905f CMP $0x3,%RSI |
(203) 0x439063 JE 4390bf |
(203) 0x439065 CMP $0x4,%RSI |
(203) 0x439069 JE 4390ad |
(203) 0x43906b CMP $0x5,%RSI |
(203) 0x43906f JE 43909b |
(203) 0x439071 CMP $0x6,%RSI |
(203) 0x439075 JE 439089 |
(203) 0x439077 VMOVSD (%RDX),%XMM5 |
(203) 0x43907b SUB $0x8,%R13 |
(203) 0x43907f ADD $0x8,%RDX |
(203) 0x439083 VMOVSD %XMM5,0x8(%R13) |
(203) 0x439089 VMOVSD (%RDX),%XMM6 |
(203) 0x43908d SUB $0x8,%R13 |
(203) 0x439091 ADD $0x8,%RDX |
(203) 0x439095 VMOVSD %XMM6,0x8(%R13) |
(203) 0x43909b VMOVSD (%RDX),%XMM7 |
(203) 0x43909f SUB $0x8,%R13 |
(203) 0x4390a3 ADD $0x8,%RDX |
(203) 0x4390a7 VMOVSD %XMM7,0x8(%R13) |
(203) 0x4390ad VMOVSD (%RDX),%XMM8 |
(203) 0x4390b1 SUB $0x8,%R13 |
(203) 0x4390b5 ADD $0x8,%RDX |
(203) 0x4390b9 VMOVSD %XMM8,0x8(%R13) |
(203) 0x4390bf VMOVSD (%RDX),%XMM9 |
(203) 0x4390c3 SUB $0x8,%R13 |
(203) 0x4390c7 ADD $0x8,%RDX |
(203) 0x4390cb VMOVSD %XMM9,0x8(%R13) |
(203) 0x4390d1 VMOVSD (%RDX),%XMM10 |
(203) 0x4390d5 SUB $0x8,%R13 |
(203) 0x4390d9 ADD $0x8,%RDX |
(203) 0x4390dd VMOVSD %XMM10,0x8(%R13) |
(203) 0x4390e3 VMOVSD (%RDX),%XMM11 |
(203) 0x4390e7 SUB $0x8,%R13 |
(203) 0x4390eb ADD $0x8,%RDX |
(203) 0x4390ef VMOVSD %XMM11,0x8(%R13) |
(203) 0x4390f5 CMP %R13,%RDI |
(203) 0x4390f8 JE 439160 |
(202) 0x4390fa VMOVSD (%RDX),%XMM12 |
(202) 0x4390fe SUB $0x40,%R13 |
(202) 0x439102 ADD $0x40,%RDX |
(202) 0x439106 VMOVSD %XMM12,0x40(%R13) |
(202) 0x43910c VMOVSD -0x38(%RDX),%XMM13 |
(202) 0x439111 VMOVSD %XMM13,0x38(%R13) |
(202) 0x439117 VMOVSD -0x30(%RDX),%XMM14 |
(202) 0x43911c VMOVSD %XMM14,0x30(%R13) |
(202) 0x439122 VMOVSD -0x28(%RDX),%XMM15 |
(202) 0x439127 VMOVSD %XMM15,0x28(%R13) |
(202) 0x43912d VMOVSD -0x20(%RDX),%XMM1 |
(202) 0x439132 VMOVSD %XMM1,0x20(%R13) |
(202) 0x439138 VMOVSD -0x18(%RDX),%XMM2 |
(202) 0x43913d VMOVSD %XMM2,0x18(%R13) |
(202) 0x439143 VMOVSD -0x10(%RDX),%XMM3 |
(202) 0x439148 VMOVSD %XMM3,0x10(%R13) |
(202) 0x43914e VMOVSD -0x8(%RDX),%XMM4 |
(202) 0x439153 VMOVSD %XMM4,0x8(%R13) |
(202) 0x439159 CMP %R13,%RDI |
(202) 0x43915c JNE 4390fa |
(203) 0x43915e XCHG %AX,%AX |
(203) 0x439160 MOV 0x68(%RSP),%R9 |
(203) 0x439165 INC %RCX |
(203) 0x439168 ADD %EBX,%R12D |
(203) 0x43916b LEA (%RCX),%EDX |
(203) 0x43916d ADD %R9,%R10 |
(203) 0x439170 CMP %EDX,0x78(%RSP) |
(203) 0x439174 JG 438fc8 |
0x43917a VZEROUPPER |
0x43917d LEA -0x28(%RBP),%RSP |
0x439181 POP %RBX |
0x439182 POP %R12 |
0x439184 POP %R13 |
0x439186 POP %R14 |
0x439188 POP %R15 |
0x43918a POP %RBP |
0x43918b RET |
0x43918c NOPL (%RAX) |
(203) 0x439190 CMPL $0x6,0x7c(%RSP) |
(203) 0x439195 JBE 439451 |
(203) 0x43919b MOV 0x50(%RSP),%R9 |
(203) 0x4391a0 MOV %R13,%RDX |
(203) 0x4391a3 LEA -0x38(%R8,%RSI,1),%RDI |
(203) 0x4391a8 ADD %R13,%R9 |
(203) 0x4391ab MOV %R9,0x58(%RSP) |
(203) 0x4391b0 MOV 0x50(%RSP),%R9 |
(203) 0x4391b5 SUB $0x40,%R9 |
(203) 0x4391b9 SHR $0x6,%R9 |
(203) 0x4391bd INC %R9 |
(203) 0x4391c0 AND $0x7,%R9D |
(203) 0x4391c4 JE 43929e |
(203) 0x4391ca CMP $0x1,%R9 |
(203) 0x4391ce JE 439277 |
(203) 0x4391d4 CMP $0x2,%R9 |
(203) 0x4391d8 JE 43925e |
(203) 0x4391de CMP $0x3,%R9 |
(203) 0x4391e2 JE 439245 |
(203) 0x4391e4 CMP $0x4,%R9 |
(203) 0x4391e8 JE 43922c |
(203) 0x4391ea CMP $0x5,%R9 |
(203) 0x4391ee JE 439213 |
(203) 0x4391f0 CMP $0x6,%R9 |
(203) 0x4391f4 JNE 439432 |
(203) 0x4391fa VXORPS %XMM2,%XMM2,%XMM2 |
(203) 0x4391fe VPERMPD (%RDX),%ZMM0,%ZMM2 |
(203) 0x439204 SUB $0x40,%RDI |
(203) 0x439208 ADD $0x40,%RDX |
(203) 0x43920c VMOVUPD %ZMM2,0x40(%RDI) |
(203) 0x439213 VXORPS %XMM3,%XMM3,%XMM3 |
(203) 0x439217 VPERMPD (%RDX),%ZMM0,%ZMM3 |
(203) 0x43921d SUB $0x40,%RDI |
(203) 0x439221 ADD $0x40,%RDX |
(203) 0x439225 VMOVUPD %ZMM3,0x40(%RDI) |
(203) 0x43922c VXORPS %XMM4,%XMM4,%XMM4 |
(203) 0x439230 VPERMPD (%RDX),%ZMM0,%ZMM4 |
(203) 0x439236 SUB $0x40,%RDI |
(203) 0x43923a ADD $0x40,%RDX |
(203) 0x43923e VMOVUPD %ZMM4,0x40(%RDI) |
(203) 0x439245 VXORPS %XMM5,%XMM5,%XMM5 |
(203) 0x439249 VPERMPD (%RDX),%ZMM0,%ZMM5 |
(203) 0x43924f SUB $0x40,%RDI |
(203) 0x439253 ADD $0x40,%RDX |
(203) 0x439257 VMOVUPD %ZMM5,0x40(%RDI) |
(203) 0x43925e VXORPS %XMM6,%XMM6,%XMM6 |
(203) 0x439262 VPERMPD (%RDX),%ZMM0,%ZMM6 |
(203) 0x439268 SUB $0x40,%RDI |
(203) 0x43926c ADD $0x40,%RDX |
(203) 0x439270 VMOVUPD %ZMM6,0x40(%RDI) |
(203) 0x439277 VXORPS %XMM7,%XMM7,%XMM7 |
(203) 0x43927b VPERMPD (%RDX),%ZMM0,%ZMM7 |
(203) 0x439281 MOV 0x58(%RSP),%R13 |
(203) 0x439286 ADD $0x40,%RDX |
(203) 0x43928a SUB $0x40,%RDI |
(203) 0x43928e VMOVUPD %ZMM7,0x40(%RDI) |
(203) 0x439295 CMP %R13,%RDX |
(203) 0x439298 JE 439351 |
(204) 0x43929e VXORPS %XMM8,%XMM8,%XMM8 |
(204) 0x4392a3 VPERMPD (%RDX),%ZMM0,%ZMM8 |
(204) 0x4392a9 MOV 0x58(%RSP),%R9 |
(204) 0x4392ae ADD $0x200,%RDX |
(204) 0x4392b5 SUB $0x200,%RDI |
(204) 0x4392bc VMOVUPD %ZMM8,0x200(%RDI) |
(204) 0x4392c3 VXORPS %XMM9,%XMM9,%XMM9 |
(204) 0x4392c8 VPERMPD -0x1c0(%RDX),%ZMM0,%ZMM9 |
(204) 0x4392cf VMOVUPD %ZMM9,0x1c0(%RDI) |
(204) 0x4392d6 VXORPS %XMM10,%XMM10,%XMM10 |
(204) 0x4392db VPERMPD -0x180(%RDX),%ZMM0,%ZMM10 |
(204) 0x4392e2 VMOVUPD %ZMM10,0x180(%RDI) |
(204) 0x4392e9 VXORPS %XMM11,%XMM11,%XMM11 |
(204) 0x4392ee VPERMPD -0x140(%RDX),%ZMM0,%ZMM11 |
(204) 0x4392f5 VMOVUPD %ZMM11,0x140(%RDI) |
(204) 0x4392fc VXORPS %XMM12,%XMM12,%XMM12 |
(204) 0x439301 VPERMPD -0x100(%RDX),%ZMM0,%ZMM12 |
(204) 0x439308 VMOVUPD %ZMM12,0x100(%RDI) |
(204) 0x43930f VXORPS %XMM13,%XMM13,%XMM13 |
(204) 0x439314 VPERMPD -0xc0(%RDX),%ZMM0,%ZMM13 |
(204) 0x43931b VMOVUPD %ZMM13,0xc0(%RDI) |
(204) 0x439322 VXORPS %XMM14,%XMM14,%XMM14 |
(204) 0x439327 VPERMPD -0x80(%RDX),%ZMM0,%ZMM14 |
(204) 0x43932e VMOVUPD %ZMM14,0x80(%RDI) |
(204) 0x439335 VXORPS %XMM15,%XMM15,%XMM15 |
(204) 0x43933a VPERMPD -0x40(%RDX),%ZMM0,%ZMM15 |
(204) 0x439341 VMOVUPD %ZMM15,0x40(%RDI) |
(204) 0x439348 CMP %R9,%RDX |
(204) 0x43934b JNE 43929e |
(203) 0x439351 MOV 0x4c(%RSP),%EDX |
(203) 0x439355 CMP %EDX,%EBX |
(203) 0x439357 JE 439160 |
(203) 0x43935d CMPL $0x2,0x3c(%RSP) |
(203) 0x439362 MOV 0x48(%RSP),%EDI |
(203) 0x439366 JBE 43945d |
(203) 0x43936c MOV 0x4c(%RSP),%R13D |
(203) 0x439371 MOV %R13D,%EDX |
(203) 0x439374 LEA (%RDX,%R10,1),%R9 |
(203) 0x439378 ADD %R8,%RSI |
(203) 0x43937b NEG %RDX |
(203) 0x43937e VXORPS %XMM1,%XMM1,%XMM1 |
(203) 0x439382 VPERMPD $0x1b,(%R11,%R9,8),%YMM1 |
(203) 0x439389 VMOVUPD %YMM1,-0x18(%RSI,%RDX,8) |
(203) 0x43938f MOV %EDI,%EDX |
(203) 0x439391 AND $-0x4,%EDX |
(203) 0x439394 ADD %R13D,%EDX |
(203) 0x439397 AND $0x3,%EDI |
(203) 0x43939a JE 439160 |
(203) 0x4393a0 MOV 0x38(%RSP),%R13D |
(203) 0x4393a5 MOV 0x34(%RSP),%R9D |
(203) 0x4393aa LEA (%R13,%RDX,1),%ESI |
(203) 0x4393af ADD %R12D,%ESI |
(203) 0x4393b2 MOVSXD %ESI,%RDI |
(203) 0x4393b5 MOV %R9D,%ESI |
(203) 0x4393b8 SUB %EDX,%ESI |
(203) 0x4393ba VMOVSD (%R11,%RDI,8),%XMM2 |
(203) 0x4393c0 MOVSXD %ESI,%RDI |
(203) 0x4393c3 ADD %RAX,%RDI |
(203) 0x4393c6 VMOVSD %XMM2,(%R8,%RDI,8) |
(203) 0x4393cc LEA 0x1(%RDX),%EDI |
(203) 0x4393cf CMP %EDI,%EBX |
(203) 0x4393d1 JLE 439160 |
(203) 0x4393d7 LEA (%R13,%RDI,1),%ESI |
(203) 0x4393dc ADD $0x2,%EDX |
(203) 0x4393df ADD %R12D,%ESI |
(203) 0x4393e2 MOVSXD %ESI,%RSI |
(203) 0x4393e5 VMOVSD (%R11,%RSI,8),%XMM3 |
(203) 0x4393eb MOV %R9D,%ESI |
(203) 0x4393ee SUB %EDI,%ESI |
(203) 0x4393f0 MOVSXD %ESI,%RDI |
(203) 0x4393f3 ADD %RAX,%RDI |
(203) 0x4393f6 VMOVSD %XMM3,(%R8,%RDI,8) |
(203) 0x4393fc CMP %EDX,%EBX |
(203) 0x4393fe JLE 439160 |
(203) 0x439404 LEA (%R13,%RDX,1),%R13D |
(203) 0x439409 SUB %EDX,%R9D |
(203) 0x43940c ADD %R12D,%R13D |
(203) 0x43940f MOVSXD %R9D,%RDX |
(203) 0x439412 MOVSXD %R13D,%RSI |
(203) 0x439415 ADD %RAX,%RDX |
(203) 0x439418 VMOVSD (%R11,%RSI,8),%XMM4 |
(203) 0x43941e VMOVSD %XMM4,(%R8,%RDX,8) |
(203) 0x439424 JMP 439160 |
0x439429 INC %EAX |
0x43942b XOR %EDX,%EDX |
0x43942d JMP 438f01 |
(203) 0x439432 VXORPS %XMM1,%XMM1,%XMM1 |
(203) 0x439436 VPERMPD (%R13),%ZMM0,%ZMM1 |
(203) 0x43943d ADD $0x40,%RDX |
(203) 0x439441 SUB $0x40,%RDI |
(203) 0x439445 VMOVUPD %ZMM1,0x40(%RDI) |
(203) 0x43944c JMP 4391fa |
(203) 0x439451 MOV %EBX,%EDI |
(203) 0x439453 XOR %EDX,%EDX |
(203) 0x439455 XOR %R13D,%R13D |
(203) 0x439458 JMP 439374 |
(203) 0x43945d MOV 0x4c(%RSP),%EDX |
(203) 0x439461 JMP 4393a0 |
0x439466 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○92.44 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
Path / |
Source file and lines | pack_kernel.cpp:88-92 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 321 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
cycles | 8.00 | 11.87 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.87 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 4% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 11% |
load | 32% |
store | 8% |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 439429 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x579> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43917d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43917d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDI,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R9D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %R12D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R15,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x2a592(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438f01 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:88-92 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 321 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
cycles | 8.00 | 11.87 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.87 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 4% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 11% |
load | 32% |
store | 8% |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 439429 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x579> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43917d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43917d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDI,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R9D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %R12D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R15,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x2a592(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438f01 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_unpack_message_left(global_variables&, int, int, int, int, clover::Buffer2D | 0.03 | 0.01 |
▼Loop 203 - pack_kernel.cpp:88-92 - exec– | 0.03 | 0.02 |
○Loop 204 - pack_kernel.cpp:90-92 - exec | 0 | 0 |
○Loop 202 - pack_kernel.cpp:90-92 - exec | 0 | 0 |