Function: clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100 | Module: exec | Source: pack_kernel.f90:155-163 | Coverage: 0.01% |
---|
Function: clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100 | Module: exec | Source: pack_kernel.f90:155-163 | Coverage: 0.01% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-861-0321/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/pack_kernel.f90: 155 - 163 |
-------------------------------------------------------------------------------- |
155: !$OMP PARALLEL DO PRIVATE(index) |
156: DO k=y_min-depth,y_max+y_inc+depth |
157: !$OMP SIMD |
158: DO j=1,depth |
159: index= buffer_offset + j+(k+depth-1)*depth |
160: right_snd_buffer(index)=field(x_max+1-j,k) |
161: ENDDO |
162: ENDDO |
163: !$OMP END PARALLEL DO |
0x441b70 PUSH %RBP |
0x441b71 MOV %RSP,%RBP |
0x441b74 PUSH %R15 |
0x441b76 PUSH %R14 |
0x441b78 PUSH %R13 |
0x441b7a PUSH %R12 |
0x441b7c PUSH %RBX |
0x441b7d SUB $0x58,%RSP |
0x441b81 MOV %R8,-0x48(%RBP) |
0x441b85 MOV %RCX,-0x58(%RBP) |
0x441b89 MOV 0x28(%RBP),%EAX |
0x441b8c MOVL $0,-0x40(%RBP) |
0x441b93 TEST %EAX,%EAX |
0x441b95 JS 441bf6 |
0x441b97 MOV %RDX,%R12 |
0x441b9a MOV (%RDI),%ESI |
0x441b9c MOVL $0,-0x30(%RBP) |
0x441ba3 MOV %EAX,-0x2c(%RBP) |
0x441ba6 MOVL $0x1,-0x3c(%RBP) |
0x441bad SUB $0x8,%RSP |
0x441bb1 LEA -0x3c(%RBP),%RAX |
0x441bb5 LEA -0x40(%RBP),%RCX |
0x441bb9 LEA -0x30(%RBP),%R8 |
0x441bbd LEA -0x2c(%RBP),%R9 |
0x441bc1 MOV $0x5735a0,%EDI |
0x441bc6 MOV %ESI,-0x34(%RBP) |
0x441bc9 MOV $0x22,%EDX |
0x441bce PUSH $0x1 |
0x441bd0 PUSH $0x1 |
0x441bd2 PUSH %RAX |
0x441bd3 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x441bd8 ADD $0x20,%RSP |
0x441bdc MOV -0x30(%RBP),%EAX |
0x441bdf MOV -0x2c(%RBP),%EDX |
0x441be2 SUB %EAX,%EDX |
0x441be4 MOV %EDX,-0x38(%RBP) |
0x441be7 JAE 441c05 |
0x441be9 MOV $0x5735c0,%EDI |
0x441bee MOV -0x34(%RBP),%ESI |
0x441bf1 CALL 404230 <__kmpc_for_static_fini@plt> |
0x441bf6 ADD $0x58,%RSP |
0x441bfa POP %RBX |
0x441bfb POP %R12 |
0x441bfd POP %R13 |
0x441bff POP %R14 |
0x441c01 POP %R15 |
0x441c03 POP %RBP |
0x441c04 RET |
0x441c05 MOV %RAX,%RCX |
0x441c08 MOV -0x48(%RBP),%RAX |
0x441c0c MOV (%RAX),%EAX |
0x441c0e LEA (%RCX,%R12,1),%R8D |
0x441c12 DEC %R8D |
0x441c15 XOR %R10D,%R10D |
0x441c18 ADD %R12D,%ECX |
0x441c1b MOV %RCX,-0x50(%RBP) |
0x441c1f MOVDQA 0xebaf9(%RIP),%XMM0 |
0x441c27 PCMPEQD %XMM1,%XMM1 |
0x441c2b MOVDQA 0xebabd(%RIP),%XMM2 |
0x441c33 MOVDQA 0xeba15(%RIP),%XMM3 |
0x441c3b JMP 441c52 |
0x441c3d NOPL (%RAX) |
(438) 0x441c40 MOV %EBX,%EAX |
(438) 0x441c42 LEA 0x1(%R10),%ECX |
(438) 0x441c46 INC %R8D |
(438) 0x441c49 CMP -0x38(%RBP),%R10D |
(438) 0x441c4d MOV %ECX,%R10D |
(438) 0x441c50 JE 441be9 |
(438) 0x441c52 TEST %EAX,%EAX |
(438) 0x441c54 JLE 441c42 |
(438) 0x441c56 MOV -0x50(%RBP),%RCX |
(438) 0x441c5a ADD %R10D,%ECX |
(438) 0x441c5d MOV -0x58(%RBP),%RDX |
(438) 0x441c61 MOVSXD (%RDX),%RDI |
(438) 0x441c64 MOV -0x48(%RBP),%RDX |
(438) 0x441c68 MOV (%RDX),%EBX |
(438) 0x441c6a MOV 0x10(%RBP),%RSI |
(438) 0x441c6e MOV (%RSI),%R11 |
(438) 0x441c71 MOV 0x38(%RSI),%R12 |
(438) 0x441c75 MOV 0x18(%RBP),%RDX |
(438) 0x441c79 MOV (%RDX),%R13D |
(438) 0x441c7c MOV 0x50(%RSI),%R14 |
(438) 0x441c80 MOV 0x137089(%RIP),%R15 |
(438) 0x441c87 MOV 0x1370ba(%RIP),%RDX |
(438) 0x441c8e MOV %EAX,%R9D |
(438) 0x441c91 MOV %R9,%RSI |
(438) 0x441c94 MOVSXD %ECX,%RAX |
(438) 0x441c97 MOV $-0x4,%ECX |
(438) 0x441c9c AND %RCX,%RSI |
(438) 0x441c9f MOV %RDI,-0x80(%RBP) |
(438) 0x441ca3 JE 441ef0 |
(438) 0x441ca9 MOV %R10,-0x70(%RBP) |
(438) 0x441cad MOV %R8,-0x78(%RBP) |
(438) 0x441cb1 LEA (%RBX,%R8,1),%ECX |
(438) 0x441cb5 MOV %RBX,-0x68(%RBP) |
(438) 0x441cb9 IMUL %EBX,%ECX |
(438) 0x441cbc MOVSXD %ECX,%RCX |
(438) 0x441cbf ADD %RDI,%RCX |
(438) 0x441cc2 MOVQ %R11,%XMM4 |
(438) 0x441cc7 MOV %R12,-0x60(%RBP) |
(438) 0x441ccb MOVQ %R12,%XMM5 |
(438) 0x441cd0 PSHUFD $0x44,%XMM5,%XMM5 |
(438) 0x441cd5 MOVDQA %XMM5,%XMM6 |
(438) 0x441cd9 PSRLQ $0x20,%XMM6 |
(438) 0x441cde MOVQ %R15,%XMM7 |
(438) 0x441ce3 PSHUFD $0x44,%XMM7,%XMM7 |
(438) 0x441ce8 MOVQ %RDX,%XMM8 |
(438) 0x441ced PSHUFD $0x44,%XMM8,%XMM8 |
(438) 0x441cf3 MOVDQA %XMM8,%XMM9 |
(438) 0x441cf8 PSRLQ $0x20,%XMM9 |
(438) 0x441cfe MOV %R13D,%EBX |
(438) 0x441d01 XOR %R10D,%R10D |
(438) 0x441d04 NOPW %CS:(%RAX,%RAX,1) |
(440) 0x441d10 LEA 0x1(%RAX),%RDI |
(440) 0x441d14 IMUL %R14,%RDI |
(440) 0x441d18 MOVQ %RDI,%XMM10 |
(440) 0x441d1d PADDQ %XMM4,%XMM10 |
(440) 0x441d22 PSHUFD $0x44,%XMM10,%XMM10 |
(440) 0x441d28 MOVD %EBX,%XMM11 |
(440) 0x441d2d PSHUFD $0,%XMM11,%XMM11 |
(440) 0x441d33 PADDD %XMM0,%XMM11 |
(440) 0x441d38 PXOR %XMM13,%XMM13 |
(440) 0x441d3d PCMPGTD %XMM11,%XMM13 |
(440) 0x441d42 PSHUFD $-0x12,%XMM11,%XMM12 |
(440) 0x441d48 PUNPCKLDQ %XMM13,%XMM11 |
(440) 0x441d4d PXOR %XMM13,%XMM13 |
(440) 0x441d52 PCMPGTD %XMM12,%XMM13 |
(440) 0x441d57 PUNPCKLDQ %XMM13,%XMM12 |
(440) 0x441d5c PSUBQ %XMM1,%XMM12 |
(440) 0x441d61 PSUBQ %XMM1,%XMM11 |
(440) 0x441d66 MOVDQA %XMM6,%XMM13 |
(440) 0x441d6b PMULUDQ %XMM11,%XMM13 |
(440) 0x441d70 MOVDQA %XMM5,%XMM14 |
(440) 0x441d75 PMULUDQ %XMM11,%XMM14 |
(440) 0x441d7a PSRLQ $0x20,%XMM11 |
(440) 0x441d80 PMULUDQ %XMM5,%XMM11 |
(440) 0x441d85 PADDQ %XMM13,%XMM11 |
(440) 0x441d8a PSLLQ $0x20,%XMM11 |
(440) 0x441d90 MOVDQA %XMM6,%XMM13 |
(440) 0x441d95 PMULUDQ %XMM12,%XMM13 |
(440) 0x441d9a MOVDQA %XMM5,%XMM15 |
(440) 0x441d9f PMULUDQ %XMM12,%XMM15 |
(440) 0x441da4 PSRLQ $0x20,%XMM12 |
(440) 0x441daa PMULUDQ %XMM5,%XMM12 |
(440) 0x441daf PADDQ %XMM13,%XMM12 |
(440) 0x441db4 PSLLQ $0x20,%XMM12 |
(440) 0x441dba PADDQ %XMM10,%XMM15 |
(440) 0x441dbf PADDQ %XMM12,%XMM15 |
(440) 0x441dc4 PADDQ %XMM10,%XMM14 |
(440) 0x441dc9 PADDQ %XMM11,%XMM14 |
(440) 0x441dce MOVQ %XMM14,%RDI |
(440) 0x441dd3 MOVSD (%RDI),%XMM10 |
(440) 0x441dd8 PSHUFD $-0x12,%XMM14,%XMM11 |
(440) 0x441dde MOVQ %XMM11,%R12 |
(440) 0x441de3 MOVQ %XMM15,%R8 |
(440) 0x441de8 PSHUFD $-0x12,%XMM15,%XMM11 |
(440) 0x441dee LEA (%RCX,%R10,1),%RDI |
(440) 0x441df2 MOVQ %RDI,%XMM12 |
(440) 0x441df7 MOVQ %XMM11,%RDI |
(440) 0x441dfc PSHUFD $0x44,%XMM12,%XMM11 |
(440) 0x441e02 MOVDQA %XMM11,%XMM12 |
(440) 0x441e07 PADDQ %XMM2,%XMM12 |
(440) 0x441e0c PADDQ %XMM3,%XMM11 |
(440) 0x441e11 MOVDQA %XMM9,%XMM13 |
(440) 0x441e16 PMULUDQ %XMM11,%XMM13 |
(440) 0x441e1b MOVDQA %XMM8,%XMM14 |
(440) 0x441e20 PMULUDQ %XMM11,%XMM14 |
(440) 0x441e25 PSRLQ $0x20,%XMM11 |
(440) 0x441e2b PMULUDQ %XMM8,%XMM11 |
(440) 0x441e30 PADDQ %XMM13,%XMM11 |
(440) 0x441e35 MOVDQA %XMM9,%XMM13 |
(440) 0x441e3a PMULUDQ %XMM12,%XMM13 |
(440) 0x441e3f MOVDQA %XMM8,%XMM15 |
(440) 0x441e44 PMULUDQ %XMM12,%XMM15 |
(440) 0x441e49 PSRLQ $0x20,%XMM12 |
(440) 0x441e4f PMULUDQ %XMM8,%XMM12 |
(440) 0x441e54 PADDQ %XMM13,%XMM12 |
(440) 0x441e59 MOVSD (%R12),%XMM13 |
(440) 0x441e5f PSLLQ $0x20,%XMM12 |
(440) 0x441e65 PADDQ %XMM7,%XMM15 |
(440) 0x441e6a PADDQ %XMM12,%XMM15 |
(440) 0x441e6f MOVSD (%R8),%XMM12 |
(440) 0x441e74 PSLLQ $0x20,%XMM11 |
(440) 0x441e7a PADDQ %XMM7,%XMM14 |
(440) 0x441e7f PADDQ %XMM11,%XMM14 |
(440) 0x441e84 MOVSD (%RDI),%XMM11 |
(440) 0x441e89 MOVQ %XMM14,%RDI |
(440) 0x441e8e MOVSD %XMM10,(%RDI) |
(440) 0x441e93 PSHUFD $-0x12,%XMM14,%XMM10 |
(440) 0x441e99 MOVQ %XMM10,%RDI |
(440) 0x441e9e MOVSD %XMM13,(%RDI) |
(440) 0x441ea3 MOVQ %XMM15,%RDI |
(440) 0x441ea8 MOVSD %XMM12,(%RDI) |
(440) 0x441ead PSHUFD $-0x12,%XMM15,%XMM10 |
(440) 0x441eb3 MOVQ %XMM10,%RDI |
(440) 0x441eb8 MOVSD %XMM11,(%RDI) |
(440) 0x441ebd ADD $0x4,%R10 |
(440) 0x441ec1 ADD $-0x4,%EBX |
(440) 0x441ec4 CMP %RSI,%R10 |
(440) 0x441ec7 JB 441d10 |
(438) 0x441ecd CMP %R9,%RSI |
(438) 0x441ed0 MOV -0x78(%RBP),%R8 |
(438) 0x441ed4 MOV -0x70(%RBP),%R10 |
(438) 0x441ed8 MOV -0x68(%RBP),%RBX |
(438) 0x441edc MOV -0x60(%RBP),%R12 |
(438) 0x441ee0 JE 441c40 |
(438) 0x441ee6 JMP 441ef2 |
0x441ee8 NOPL (%RAX,%RAX,1) |
(438) 0x441ef0 XOR %ESI,%ESI |
(438) 0x441ef2 SUB %RSI,%R9 |
(438) 0x441ef5 SUB %ESI,%R13D |
(438) 0x441ef8 MOV -0x80(%RBP),%RDI |
(438) 0x441efc ADD %RSI,%RDI |
(438) 0x441eff LEA (%RBX,%R8,1),%ECX |
(438) 0x441f03 IMUL %EBX,%ECX |
(438) 0x441f06 MOVSXD %ECX,%RCX |
(438) 0x441f09 ADD %RDI,%RCX |
(438) 0x441f0c IMUL %RDX,%RCX |
(438) 0x441f10 ADD %RCX,%R15 |
(438) 0x441f13 INC %RAX |
(438) 0x441f16 IMUL %RAX,%R14 |
(438) 0x441f1a ADD %R14,%R11 |
(438) 0x441f1d NOPL (%RAX) |
(439) 0x441f20 MOVSXD %R13D,%R13 |
(439) 0x441f23 LEA 0x1(%R13),%RAX |
(439) 0x441f27 IMUL %R12,%RAX |
(439) 0x441f2b MOVQ (%RAX,%R11,1),%XMM4 |
(439) 0x441f31 MOVQ %XMM4,(%R15) |
(439) 0x441f36 DEC %R13D |
(439) 0x441f39 ADD %RDX,%R15 |
(439) 0x441f3c DEC %R9 |
(439) 0x441f3f JNE 441f20 |
(438) 0x441f41 JMP 441c40 |
0x441f46 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.f90:155-163 |
Module | exec |
nb instructions | 64 |
nb uops | 63 |
loop length | 226 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.00 | 7.00 | 7.00 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
cycles | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.00 | 7.00 | 7.00 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.50 |
Dispatch | 7.00 |
Overall L1 | 10.50 |
all | 16% |
load | 50% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 11% |
load | 15% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JS 441bf6 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x5735a0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %EAX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JAE 441c05 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x5735c0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RCX,%R12,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD %R12D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVDQA 0xebaf9(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PCMPEQD %XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA 0xebabd(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xeba15(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
JMP 441c52 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | pack_kernel.f90:155-163 |
Module | exec |
nb instructions | 64 |
nb uops | 63 |
loop length | 226 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.00 | 7.00 | 7.00 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
cycles | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.00 | 7.00 | 7.00 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.50 |
Dispatch | 7.00 |
Overall L1 | 10.50 |
all | 16% |
load | 50% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 11% |
load | 15% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JS 441bf6 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x86> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x2c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x5735a0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %EAX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JAE 441c05 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x5735c0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x34(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RCX,%R12,1),%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD %R12D,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVDQA 0xebaf9(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
PCMPEQD %XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
MOVDQA 0xebabd(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xeba15(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
JMP 441c52 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100– | 0.01 | 0 |
▼Loop 438 - pack_kernel.f90:156-160 - exec– | 0 | 0.01 |
○Loop 439 - pack_kernel.f90:158-160 - exec | 0.01 | 0.01 |
○Loop 440 - pack_kernel.f90:158-160 - exec | 0 | 0 |