Loop Id: 43 | Module: exec | Source: :0-0 | Coverage: 0.01% |
---|
Loop Id: 43 | Module: exec | Source: :0-0 | Coverage: 0.01% |
---|
0x40e30b MOV -0x1b0(%RBP),%R9 |
0x40e312 MOV -0x230(%RBP),%RDI |
0x40e319 LEA (%R9,%R9,4),%RSI |
0x40e31d MOV 0x48(%RDI),%R13 |
0x40e321 MOV %R9D,-0x1f0(%RBP) |
0x40e328 SAL $0x3,%RSI |
0x40e32c ADD %RSI,%R13 |
0x40e32f ADD 0x60(%RDI),%RSI |
0x40e333 MOV %R13,-0x150(%RBP) |
0x40e33a MOV %RSI,-0x1d0(%RBP) |
0x40e341 TEST %RAX,%RAX |
0x40e344 JE 40e64a |
0x40e34a LEA (%R9,%R9,2),%R10 |
0x40e34e XOR %R13D,%R13D |
0x40e351 LEA (,%R10,8),%R8 |
0x40e359 MOV %R8,-0x210(%RBP) |
0x40e360 JMP 40e36e |
(44) 0x40e362 INC %R13 |
(44) 0x40e365 CMP %RAX,%R13 |
(44) 0x40e368 JAE 40e628 |
(44) 0x40e36e MOV -0x150(%RBP),%RDX |
(44) 0x40e375 LEA (,%R13,8),%RSI |
(44) 0x40e37d MOV 0x18(%RDX),%RDI |
(44) 0x40e381 ADD %RSI,%RDI |
(44) 0x40e384 VMOVSD (%RDI),%XMM0 |
(44) 0x40e388 VCOMISD 0x1440(%RBX),%XMM0 |
(44) 0x40e390 JAE 40e362 |
(44) 0x40e392 MOV 0x2830(%RBX),%R14 |
(44) 0x40e399 SUB 0x2828(%RBX),%R14 |
(44) 0x40e3a0 MOV $-0x5555555555555555,%RAX |
(44) 0x40e3aa SAR $0x3,%R14 |
(44) 0x40e3ae IMUL %RAX,%R14 |
(44) 0x40e3b2 TEST %R14D,%R14D |
(44) 0x40e3b5 JLE 40e5c0 |
(44) 0x40e3bb MOV -0x1d0(%RBP),%R10 |
(44) 0x40e3c2 MOV -0x2e0(%RBP),%R9 |
(44) 0x40e3c9 DEC %R14D |
(44) 0x40e3cc MOV -0x210(%RBP),%RCX |
(44) 0x40e3d3 MOVSXD 0x8(%R10),%RAX |
(44) 0x40e3d7 MOV 0x18(%R10),%RDX |
(44) 0x40e3db LEA (%R14,%R14,2),%R10 |
(44) 0x40e3df ADD 0x40(%R9),%RCX |
(44) 0x40e3e3 LEA (%R15,%R10,8),%R14 |
(44) 0x40e3e7 LEA (%R10,%R10,2),%R10 |
(44) 0x40e3eb LEA (%RAX,%R13,1),%R9 |
(44) 0x40e3ef ADD %RDX,%RSI |
(44) 0x40e3f2 ADD %R9,%RAX |
(44) 0x40e3f5 LEA (%RDX,%R9,8),%R8 |
(44) 0x40e3f9 LEA (%RDX,%RAX,8),%R9 |
(44) 0x40e3fd MOV -0x120(%RBP),%RAX |
(44) 0x40e404 MOV %R15,%RDX |
(44) 0x40e407 AND $0x3,%R10D |
(44) 0x40e40b JE 40e588 |
(44) 0x40e411 VMOVSD (%RSI),%XMM2 |
(44) 0x40e415 VMOVDDUP %XMM0,%XMM6 |
(44) 0x40e419 ADD $0x18,%RAX |
(44) 0x40e41d LEA 0x18(%R15),%RDX |
(44) 0x40e421 VMOVSD (%R9),%XMM1 |
(44) 0x40e426 VADDSD 0x10(%RCX),%XMM1,%XMM5 |
(44) 0x40e42b VMOVHPD (%R8),%XMM2,%XMM7 |
(44) 0x40e430 VFMADD231SD 0x10(%R15),%XMM0,%XMM5 |
(44) 0x40e436 VADDPD (%RCX),%XMM7,%XMM4 |
(44) 0x40e43a VFMADD231PD (%R15),%XMM6,%XMM4 |
(44) 0x40e43f VMOVSD %XMM5,-0x8(%RAX) |
(44) 0x40e444 VMOVUPD %XMM4,-0x18(%RAX) |
(44) 0x40e449 VMOVSD (%RDI),%XMM0 |
(44) 0x40e44d CMP $0x1,%R10 |
(44) 0x40e451 JE 40e588 |
(44) 0x40e457 CMP $0x2,%R10 |
(44) 0x40e45b JE 40e499 |
(44) 0x40e45d VMOVSD (%RSI),%XMM15 |
(44) 0x40e461 VMOVSD (%R9),%XMM10 |
(44) 0x40e466 VMOVDDUP %XMM0,%XMM8 |
(44) 0x40e46a ADD $0x18,%RAX |
(44) 0x40e46e VADDSD 0x10(%RCX),%XMM10,%XMM12 |
(44) 0x40e473 VFMADD231SD 0x10(%RDX),%XMM0,%XMM12 |
(44) 0x40e479 VMOVHPD (%R8),%XMM15,%XMM13 |
(44) 0x40e47e VADDPD (%RCX),%XMM13,%XMM11 |
(44) 0x40e482 VFMADD231PD (%RDX),%XMM8,%XMM11 |
(44) 0x40e487 LEA 0x30(%R15),%RDX |
(44) 0x40e48b VMOVSD %XMM12,-0x8(%RAX) |
(44) 0x40e490 VMOVUPD %XMM11,-0x18(%RAX) |
(44) 0x40e495 VMOVSD (%RDI),%XMM0 |
(44) 0x40e499 VMOVSD (%RSI),%XMM9 |
(44) 0x40e49d VMOVDDUP %XMM0,%XMM3 |
(44) 0x40e4a1 ADD $0x18,%RAX |
(44) 0x40e4a5 ADD $0x18,%RDX |
(44) 0x40e4a9 VMOVSD (%R9),%XMM14 |
(44) 0x40e4ae VADDSD 0x10(%RCX),%XMM14,%XMM2 |
(44) 0x40e4b3 VMOVHPD (%R8),%XMM9,%XMM1 |
(44) 0x40e4b8 VFMADD231SD -0x8(%RDX),%XMM0,%XMM2 |
(44) 0x40e4be VADDPD (%RCX),%XMM1,%XMM6 |
(44) 0x40e4c2 VFMADD231PD -0x18(%RDX),%XMM3,%XMM6 |
(44) 0x40e4c8 VMOVSD %XMM2,-0x8(%RAX) |
(44) 0x40e4cd VMOVUPD %XMM6,-0x18(%RAX) |
(44) 0x40e4d2 VMOVSD (%RDI),%XMM0 |
(44) 0x40e4d6 JMP 40e588 |
(45) 0x40e4db VMOVSD (%RDI),%XMM13 |
(45) 0x40e4df VMOVSD (%RSI),%XMM14 |
(45) 0x40e4e3 ADD $0x60,%RAX |
(45) 0x40e4e7 ADD $0x60,%RDX |
(45) 0x40e4eb VMOVSD (%R9),%XMM11 |
(45) 0x40e4f0 VADDSD 0x10(%RCX),%XMM11,%XMM1 |
(45) 0x40e4f5 VMOVHPD (%R8),%XMM14,%XMM3 |
(45) 0x40e4fa VMOVDDUP %XMM13,%XMM12 |
(45) 0x40e4ff VADDPD (%RCX),%XMM3,%XMM9 |
(45) 0x40e503 VFMADD132SD -0x38(%RDX),%XMM1,%XMM13 |
(45) 0x40e509 VFMADD231PD -0x48(%RDX),%XMM12,%XMM9 |
(45) 0x40e50f VMOVSD %XMM13,-0x38(%RAX) |
(45) 0x40e514 VMOVUPD %XMM9,-0x48(%RAX) |
(45) 0x40e519 VMOVSD (%RDI),%XMM6 |
(45) 0x40e51d VMOVSD (%RSI),%XMM0 |
(45) 0x40e521 VMOVSD (%R9),%XMM7 |
(45) 0x40e526 VADDSD 0x10(%RCX),%XMM7,%XMM10 |
(45) 0x40e52b VMOVHPD (%R8),%XMM0,%XMM4 |
(45) 0x40e530 VMOVDDUP %XMM6,%XMM2 |
(45) 0x40e534 VADDPD (%RCX),%XMM4,%XMM5 |
(45) 0x40e538 VFMADD132SD -0x20(%RDX),%XMM10,%XMM6 |
(45) 0x40e53e VFMADD231PD -0x30(%RDX),%XMM2,%XMM5 |
(45) 0x40e544 VMOVSD %XMM6,-0x20(%RAX) |
(45) 0x40e549 VMOVUPD %XMM5,-0x30(%RAX) |
(45) 0x40e54e VMOVSD (%RDI),%XMM8 |
(45) 0x40e552 VMOVSD (%RSI),%XMM11 |
(45) 0x40e556 VMOVSD (%R9),%XMM15 |
(45) 0x40e55b VADDSD 0x10(%RCX),%XMM15,%XMM3 |
(45) 0x40e560 VMOVHPD (%R8),%XMM11,%XMM12 |
(45) 0x40e565 VMOVDDUP %XMM8,%XMM13 |
(45) 0x40e56a VADDPD (%RCX),%XMM12,%XMM14 |
(45) 0x40e56e VFMADD132SD -0x8(%RDX),%XMM3,%XMM8 |
(45) 0x40e574 VFMADD231PD -0x18(%RDX),%XMM13,%XMM14 |
(45) 0x40e57a VMOVSD %XMM8,-0x8(%RAX) |
(45) 0x40e57f VMOVUPD %XMM14,-0x18(%RAX) |
(45) 0x40e584 VMOVSD (%RDI),%XMM0 |
(45) 0x40e588 VMOVSD (%RSI),%XMM5 |
(45) 0x40e58c VMOVSD (%R9),%XMM7 |
(45) 0x40e591 VMOVDDUP %XMM0,%XMM4 |
(45) 0x40e595 VADDSD 0x10(%RCX),%XMM7,%XMM15 |
(45) 0x40e59a VFMADD132SD 0x10(%RDX),%XMM15,%XMM0 |
(45) 0x40e5a0 VMOVHPD (%R8),%XMM5,%XMM10 |
(45) 0x40e5a5 VADDPD (%RCX),%XMM10,%XMM8 |
(45) 0x40e5a9 VFMADD231PD (%RDX),%XMM4,%XMM8 |
(45) 0x40e5ae VMOVSD %XMM0,0x10(%RAX) |
(45) 0x40e5b3 VMOVUPD %XMM8,(%RAX) |
(45) 0x40e5b7 CMP %RDX,%R14 |
(45) 0x40e5ba JNE 40e4db |
(44) 0x40e5c0 MOV 0x18(%R11),%R11 |
(44) 0x40e5c4 MOV 0x2840(%RBX),%RDI |
(44) 0x40e5cb MOV %R13D,%R9D |
(44) 0x40e5ce MOV $0x1,%R8D |
(44) 0x40e5d4 MOV -0x1f0(%RBP),%EDX |
(44) 0x40e5da LEA -0x120(%RBP),%RCX |
(44) 0x40e5e1 MOVSXD (%R11,%R13,4),%RSI |
(44) 0x40e5e5 MOV (%RDI,%RSI,8),%R14 |
(44) 0x40e5e9 MOV -0x2e0(%RBP),%RSI |
(44) 0x40e5f0 MOV %R14,%RDI |
(44) 0x40e5f3 CALL 404190 <_ZN11qmcplusplus18VirtualParticleSet9makeMovesERKNS_11ParticleSetEiRKSt6vectorINS_10TinyVectorIdLj3EEESaIS6_EEbi@plt> |
(44) 0x40e5f8 MOV -0x158(%RBP),%RDI |
(44) 0x40e5ff LEA -0x100(%RBP),%RDX |
(44) 0x40e606 MOV %R14,%RSI |
(44) 0x40e609 CALL 404360 <_ZN11qmcplusplus12WaveFunction14evaluateRatiosERNS_18VirtualParticleSetERSt6vectorIdSaIdEE@plt> |
(44) 0x40e60e MOV 0x2858(%RBX),%R11 |
(44) 0x40e615 INC %R13 |
(44) 0x40e618 MOV 0x260(%R11),%RAX |
(44) 0x40e61f CMP %RAX,%R13 |
(44) 0x40e622 JB 40e36e |
0x40e628 MOV -0x2e0(%RBP),%RCX |
0x40e62f INCQ -0x1b0(%RBP) |
0x40e636 MOV -0x1b0(%RBP),%R13 |
0x40e63d CMP 0x260(%RCX),%R13 |
0x40e644 JB 40e30b |
*** This Panel is Intentionally Left Blank. *** It is due to a lack of debug symbols in the given object |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○96.81 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○3.19 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 12.14 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.44 |
Bottlenecks | micro-operation queue, |
Function | main._omp_fn.1 |
Source | |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 3.83 |
CQA cycles if no scalar integer | 3.83 |
CQA cycles if FP arith vectorized | 3.83 |
CQA cycles if fully vectorized | 0.32 |
Front-end cycles | 3.83 |
DIV/SQRT cycles | 1.50 |
P0 cycles | 1.40 |
P1 cycles | 2.67 |
P2 cycles | 2.67 |
P3 cycles | 2.50 |
P4 cycles | 1.40 |
P5 cycles | 1.50 |
P6 cycles | 2.50 |
P7 cycles | 2.50 |
P8 cycles | 2.50 |
P9 cycles | 1.20 |
P10 cycles | 2.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 3.99 |
Stall cycles (UFS) | 0.00 |
Nb insns | 22.00 |
Nb uops | 23.00 |
Nb loads | 8.00 |
Nb stores | 5.00 |
Nb stack references | 7.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 26.09 |
Bytes prefetched | 0.00 |
Bytes loaded | 64.00 |
Bytes stored | 36.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.72 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 11.25 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 12.14 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.44 |
Bottlenecks | micro-operation queue, |
Function | main._omp_fn.1 |
Source | |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 3.83 |
CQA cycles if no scalar integer | 3.83 |
CQA cycles if FP arith vectorized | 3.83 |
CQA cycles if fully vectorized | 0.32 |
Front-end cycles | 3.83 |
DIV/SQRT cycles | 1.50 |
P0 cycles | 1.40 |
P1 cycles | 2.67 |
P2 cycles | 2.67 |
P3 cycles | 2.50 |
P4 cycles | 1.40 |
P5 cycles | 1.50 |
P6 cycles | 2.50 |
P7 cycles | 2.50 |
P8 cycles | 2.50 |
P9 cycles | 1.20 |
P10 cycles | 2.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 3.99 |
Stall cycles (UFS) | 0.00 |
Nb insns | 22.00 |
Nb uops | 23.00 |
Nb loads | 8.00 |
Nb stores | 5.00 |
Nb stack references | 7.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 26.09 |
Bytes prefetched | 0.00 |
Bytes loaded | 64.00 |
Bytes stored | 36.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.72 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 11.25 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | main._omp_fn.1 |
Source file and lines | |
Module | exec |
nb instructions | 22 |
nb uops | 23 |
loop length | 121 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 3.83 cycles |
front end | 3.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.40 | 2.67 | 2.67 | 2.50 | 1.40 | 1.50 | 2.50 | 2.50 | 2.50 | 1.20 | 2.67 |
cycles | 1.50 | 1.40 | 2.67 | 2.67 | 2.50 | 1.40 | 1.50 | 2.50 | 2.50 | 2.50 | 1.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 3.99 |
Stall cycles | 0.00 |
Front-end | 3.83 |
Dispatch | 2.67 |
Overall L1 | 3.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x1b0(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x230(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R9,4),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,-0x1f0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RSI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD 0x60(%RDI),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R13,-0x150(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x1d0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 40e64a <main._omp_fn.1+0x525a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R9,%R9,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x210(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40e36e <main._omp_fn.1+0x4f7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x2e0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INCQ -0x1b0(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV -0x1b0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP 0x260(%RCX),%R13 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JB 40e30b <main._omp_fn.1+0x4f1b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | main._omp_fn.1 |
Source file and lines | |
Module | exec |
nb instructions | 22 |
nb uops | 23 |
loop length | 121 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 3.83 cycles |
front end | 3.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.40 | 2.67 | 2.67 | 2.50 | 1.40 | 1.50 | 2.50 | 2.50 | 2.50 | 1.20 | 2.67 |
cycles | 1.50 | 1.40 | 2.67 | 2.67 | 2.50 | 1.40 | 1.50 | 2.50 | 2.50 | 2.50 | 1.20 | 2.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 3.99 |
Stall cycles | 0.00 |
Front-end | 3.83 |
Dispatch | 2.67 |
Overall L1 | 3.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x1b0(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x230(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R9,4),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,-0x1f0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RSI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD 0x60(%RDI),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R13,-0x150(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x1d0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 40e64a <main._omp_fn.1+0x525a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R9,%R9,2),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x210(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 40e36e <main._omp_fn.1+0x4f7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x2e0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INCQ -0x1b0(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV -0x1b0(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP 0x260(%RCX),%R13 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JB 40e30b <main._omp_fn.1+0x4f1b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |