Function: main.A | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.17% |
---|
Function: main.A | Module: exec | Source: main.c:50-191 [...] | Coverage: 0.17% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/171-094-7986/intel/HACCmk/build/HACCmk/src/main.c: 50 - 191 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
136: t1 = mysecond(); |
137: #endif |
138: |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
152: t2 = mysecond(); |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
191: return 0; |
0x402560 PUSH %RBP |
0x402561 MOV %RSP,%RBP |
0x402564 PUSH %R15 |
0x402566 PUSH %R14 |
0x402568 PUSH %R12 |
0x40256a PUSH %RBX |
0x40256b SUB $0x20,%RSP |
0x40256f MOV %RSI,%R14 |
0x402572 STMXCSR -0x24(%RBP) |
0x402576 MOV %EDI,%R15D |
0x402579 ORL $0x8040,-0x24(%RBP) |
0x402580 LDMXCSR -0x24(%RBP) |
0x402584 MOV $0x40a327,%EDI |
0x402589 MOV $0xbb8,%ESI |
0x40258e XOR %EAX,%EAX |
0x402590 CALL 402030 <printf@plt> |
0x402595 MOV $0x40a338,%EDI |
0x40259a MOV $0x1,%ESI |
0x40259f XOR %EAX,%EAX |
0x4025a1 CALL 402030 <printf@plt> |
0x4025a6 MOV $0x186a0,%EBX |
0x4025ab CMP $0x2,%R15D |
0x4025af JNE 4025cf |
0x4025b1 MOV 0x8(%R14),%RDI |
0x4025b5 MOV $0x40a34c,%ESI |
0x4025ba MOV $0x2,%EDX |
0x4025bf CALL 4020b0 <strncmp@plt> |
0x4025c4 TEST %EAX,%EAX |
0x4025c6 MOV $0x3a98,%EAX |
0x4025cb CMOVE %RAX,%RBX |
0x4025cf MOV $0x40a34f,%EDI |
0x4025d4 MOV %RBX,%RSI |
0x4025d7 XOR %EAX,%EAX |
0x4025d9 CALL 402030 <printf@plt> |
0x4025de MOV $0x40e0e0,%EDI |
0x4025e3 MOV $0x4028d0,%EDX |
0x4025e8 MOV $0x1,%ESI |
0x4025ed XOR %ECX,%ECX |
0x4025ef XOR %EAX,%EAX |
0x4025f1 CALL 402110 <__kmpc_fork_call@plt> |
0x4025f6 ADD $-0x191,%EBX |
0x4025fc MOV $-0x33333333,%R14D |
0x402602 IMUL %RBX,%R14 |
0x402606 SHR $0x24,%R14 |
0x40260a INC %R14D |
0x40260d XORPD %XMM0,%XMM0 |
0x402611 MOV $0x190,%R15D |
0x402617 MOVSS 0x79f5(%RIP),%XMM7 |
0x40261f MOVDQA 0x7a38(%RIP),%XMM12 |
0x402628 MOVAPS 0x7a40(%RIP),%XMM13 |
0x402630 XOR %R12D,%R12D |
0x402633 NOPW %CS:(%RAX,%RAX,1) |
(2) 0x402640 MOVSD %XMM0,-0x38(%RBP) |
(2) 0x402645 LEA (%R12,%R12,4),%RBX |
(2) 0x402649 SAL $0x4,%RBX |
(2) 0x40264d ADD $0x640,%RBX |
(2) 0x402654 XORPS %XMM1,%XMM1 |
(2) 0x402657 CVTSI2SS %R15D,%XMM1 |
(2) 0x40265c MOVSS 0x79ac(%RIP),%XMM0 |
(2) 0x402664 DIVSS %XMM1,%XMM0 |
(2) 0x402668 MOVAPS %XMM0,%XMM1 |
(2) 0x40266b SHUFPS $0,%XMM0,%XMM1 |
(2) 0x40266f MOVAPS %XMM1,%XMM2 |
(2) 0x402672 MULPS 0x79c7(%RIP),%XMM2 |
(2) 0x402679 MOVL $0,0xbb2d(%RIP) |
(2) 0x402683 MOVL $0,0x6d5a3(%RIP) |
(2) 0x40268d MOVL $0,0xcf019(%RIP) |
(2) 0x402697 MOVL $0x40000000,0x130a8f(%RIP) |
(2) 0x4026a1 XORPS %XMM4,%XMM4 |
(2) 0x4026a4 XORPS %XMM3,%XMM3 |
(2) 0x4026a7 MOV $-0x3,%RAX |
(2) 0x4026ae XCHG %AX,%AX |
(0) 0x4026b0 ADDSS %XMM0,%XMM3 |
(0) 0x4026b4 MOVSS %XMM3,0x40e1c0(,%RAX,4) |
(0) 0x4026bd ADDPS %XMM2,%XMM4 |
(0) 0x4026c0 MOVAPS %XMM4,%XMM5 |
(0) 0x4026c3 SHUFPS $0x55,%XMM4,%XMM5 |
(0) 0x4026c7 MOVSS %XMM5,0x46fc40(,%RAX,4) |
(0) 0x4026d0 MOVSS %XMM4,0x4d16c0(,%RAX,4) |
(0) 0x4026d9 LEA 0x4(%RAX),%ECX |
(0) 0x4026dc XORPS %XMM6,%XMM6 |
(0) 0x4026df CVTSI2SS %ECX,%XMM6 |
(0) 0x4026e3 MULSS %XMM7,%XMM6 |
(0) 0x4026e7 ADDSS %XMM3,%XMM6 |
(0) 0x4026eb MOVSS %XMM6,0x533140(,%RAX,4) |
(0) 0x4026f4 INC %RAX |
(0) 0x4026f7 JNE 4026b0 |
(2) 0x4026f9 MOVAPS %XMM1,%XMM6 |
(2) 0x4026fc MOVAPS 0x794c(%RIP),%XMM10 |
(2) 0x402704 MULPS %XMM10,%XMM6 |
(2) 0x402708 SHUFPS $0,%XMM3,%XMM3 |
(2) 0x40270c ADDPS %XMM6,%XMM3 |
(2) 0x40270f MOVSS 0x7900(%RIP),%XMM11 |
(2) 0x402718 MULSS %XMM11,%XMM0 |
(2) 0x40271d MOVAPS %XMM2,%XMM6 |
(2) 0x402720 SHUFPS $0x55,%XMM2,%XMM6 |
(2) 0x402724 MOVAPS %XMM6,%XMM7 |
(2) 0x402727 MULPS %XMM10,%XMM7 |
(2) 0x40272b ADDPS %XMM7,%XMM5 |
(2) 0x40272e MOVAPS %XMM6,%XMM7 |
(2) 0x402731 MULSS %XMM11,%XMM7 |
(2) 0x402736 MOVAPS %XMM2,%XMM8 |
(2) 0x40273a SHUFPS $0,%XMM2,%XMM8 |
(2) 0x40273f MOVAPS %XMM8,%XMM9 |
(2) 0x402743 MULPS %XMM10,%XMM9 |
(2) 0x402747 SHUFPS $0,%XMM4,%XMM4 |
(2) 0x40274b ADDPS %XMM9,%XMM4 |
(2) 0x40274f MULSS %XMM11,%XMM2 |
(2) 0x402754 LEA -0x2(%R15),%RAX |
(2) 0x402758 SHUFPS $0,%XMM0,%XMM0 |
(2) 0x40275c SHUFPS $0,%XMM7,%XMM7 |
(2) 0x402760 SHUFPS $0,%XMM2,%XMM2 |
(2) 0x402764 MOV $0x3,%ECX |
(2) 0x402769 MOVAPS %XMM3,%XMM9 |
(2) 0x40276d MOVAPS %XMM5,%XMM10 |
(2) 0x402771 MOVAPS %XMM4,%XMM11 |
(2) 0x402775 NOPW %CS:(%RAX,%RAX,1) |
(1) 0x402780 ADDPS %XMM0,%XMM9 |
(1) 0x402784 ADDPS %XMM1,%XMM3 |
(1) 0x402787 MOVUPS %XMM3,0x40e1b4(,%RCX,4) |
(1) 0x40278f ADDPS %XMM7,%XMM10 |
(1) 0x402793 ADDPS %XMM6,%XMM5 |
(1) 0x402796 MOVUPS %XMM5,0x46fc34(,%RCX,4) |
(1) 0x40279e ADDPS %XMM2,%XMM11 |
(1) 0x4027a2 ADDPS %XMM8,%XMM4 |
(1) 0x4027a6 MOVUPS %XMM4,0x4d16b4(,%RCX,4) |
(1) 0x4027ae MOVD %ECX,%XMM4 |
(1) 0x4027b2 PSHUFD $0,%XMM4,%XMM4 |
(1) 0x4027b7 PADDD %XMM12,%XMM4 |
(1) 0x4027bc CVTDQ2PS %XMM4,%XMM4 |
(1) 0x4027bf MULPS %XMM13,%XMM4 |
(1) 0x4027c3 ADDPS %XMM3,%XMM4 |
(1) 0x4027c6 MOVUPS %XMM4,0x533134(,%RCX,4) |
(1) 0x4027ce ADD $0x4,%RCX |
(1) 0x4027d2 MOVAPS %XMM11,%XMM4 |
(1) 0x4027d6 MOVAPS %XMM10,%XMM5 |
(1) 0x4027da MOVAPS %XMM9,%XMM3 |
(1) 0x4027de CMP %RAX,%RCX |
(1) 0x4027e1 JLE 402780 |
(2) 0x4027e3 MOV $0x594bb0,%EDI |
(2) 0x4027e8 XOR %ESI,%ESI |
(2) 0x4027ea MOV %RBX,%RDX |
(2) 0x4027ed CALL 405260 <_intel_fast_memset> |
(2) 0x4027f2 MOV $0x5f6630,%EDI |
(2) 0x4027f7 XOR %ESI,%ESI |
(2) 0x4027f9 MOV %RBX,%RDX |
(2) 0x4027fc CALL 405260 <_intel_fast_memset> |
(2) 0x402801 MOV $0x6580b0,%EDI |
(2) 0x402806 XOR %ESI,%ESI |
(2) 0x402808 MOV %RBX,%RDX |
(2) 0x40280b CALL 405260 <_intel_fast_memset> |
(2) 0x402810 XOR %EAX,%EAX |
(2) 0x402812 CALL 403000 <mysecond> |
(2) 0x402817 MOVSD %XMM0,-0x30(%RBP) |
(2) 0x40281c SUB $0x8,%RSP |
(2) 0x402820 MOV $0x40e140,%EDI |
(2) 0x402825 MOV $0x402900,%EDX |
(2) 0x40282a MOV $0x3e6b851f,%ECX |
(2) 0x40282f MOV $0x3f000000,%R8D |
(2) 0x402835 MOV $0x3cf5c28f,%R9D |
(2) 0x40283b MOV $0x6,%ESI |
(2) 0x402840 XOR %EAX,%EAX |
(2) 0x402842 PUSH $0xbb7 |
(2) 0x402847 PUSH $0 |
(2) 0x402849 PUSH %R15 |
(2) 0x40284b CALL 402110 <__kmpc_fork_call@plt> |
(2) 0x402850 ADD $0x20,%RSP |
(2) 0x402854 XOR %EAX,%EAX |
(2) 0x402856 CALL 403000 <mysecond> |
(2) 0x40285b MOVAPS 0x780d(%RIP),%XMM13 |
(2) 0x402863 MOVDQA 0x77f4(%RIP),%XMM12 |
(2) 0x40286c SUBSD -0x30(%RBP),%XMM0 |
(2) 0x402871 MULSD 0x7807(%RIP),%XMM0 |
(2) 0x402879 MOVSD -0x38(%RBP),%XMM1 |
(2) 0x40287e ADDSD %XMM0,%XMM1 |
(2) 0x402882 MOVAPD %XMM1,%XMM0 |
(2) 0x402886 ADD $0x14,%R15 |
(2) 0x40288a INC %R12 |
(2) 0x40288d CMP %R14,%R12 |
(2) 0x402890 MOVSS 0x777c(%RIP),%XMM7 |
(2) 0x402898 JNE 402640 |
0x40289e MULSD 0x77e2(%RIP),%XMM0 |
0x4028a6 MOV $0x40a378,%EDI |
0x4028ab MOV $0x1,%AL |
0x4028ad CALL 402030 <printf@plt> |
0x4028b2 XOR %EAX,%EAX |
0x4028b4 ADD $0x20,%RSP |
0x4028b8 POP %RBX |
0x4028b9 POP %R12 |
0x4028bb POP %R14 |
0x4028bd POP %R15 |
0x4028bf POP %RBP |
0x4028c0 RET |
0x4028c1 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 65 |
nb uops | 71 |
loop length | 274 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 11.83 cycles |
front end | 11.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.50 | 7.50 | 7.50 | 7.50 | 4.00 | 4.33 | 4.33 | 4.33 | 1.50 | 1.50 | 0.00 | 0.00 | 0.50 | 0.50 |
cycles | 7.50 | 7.50 | 7.50 | 7.50 | 4.00 | 4.33 | 4.33 | 4.33 | 1.50 | 1.50 | 0.00 | 0.00 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.83 |
Dispatch | 7.50 |
Overall L1 | 11.83 |
all | 4% |
load | 33% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 10% |
load | 33% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 7% |
load | 14% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 14% |
store | 6% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
STMXCSR -0x24(%RBP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 20 | 15 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ORL $0x8040,-0x24(%RBP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDMXCSR -0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 2 |
MOV $0x40a327,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x40a338,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4025cf <main.A+0x6f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x40a34c,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4020b0 <strncmp@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x40a34f,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x40e0e0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x4028d0,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402110 <__kmpc_fork_call@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XORPD %XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSS 0x79f5(%RIP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVDQA 0x7a38(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVAPS 0x7a40(%RIP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MULSD 0x77e2(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV $0x40a378,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | main.c:50-191 |
Module | exec |
nb instructions | 65 |
nb uops | 71 |
loop length | 274 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 11.83 cycles |
front end | 11.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.50 | 7.50 | 7.50 | 7.50 | 4.00 | 4.33 | 4.33 | 4.33 | 1.50 | 1.50 | 0.00 | 0.00 | 0.50 | 0.50 |
cycles | 7.50 | 7.50 | 7.50 | 7.50 | 4.00 | 4.33 | 4.33 | 4.33 | 1.50 | 1.50 | 0.00 | 0.00 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.83 |
Dispatch | 7.50 |
Overall L1 | 11.83 |
all | 4% |
load | 33% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 10% |
load | 33% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 7% |
load | 14% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 14% |
store | 6% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
STMXCSR -0x24(%RBP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0.50 | 0.50 | 20 | 15 |
MOV %EDI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ORL $0x8040,-0x24(%RBP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDMXCSR -0x24(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 2 |
MOV $0x40a327,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x40a338,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x186a0,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x2,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 4025cf <main.A+0x6f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x8(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x40a34c,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4020b0 <strncmp@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x3a98,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVE %RAX,%RBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x40a34f,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x40e0e0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x4028d0,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 402110 <__kmpc_fork_call@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $-0x191,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $-0x33333333,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
IMUL %RBX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x24,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XORPD %XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x190,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSS 0x79f5(%RIP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVDQA 0x7a38(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVAPS 0x7a40(%RIP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MULSD 0x77e2(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV $0x40a378,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 402030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main.A– | 0.17 | 0.02 |
▼Loop 2 - main.c:77-169 - exec– | 0 | 0 |
○Loop 1 - main.c:111-116 - exec | 0.17 | 4.18 |
○Loop 0 - main.c:111-116 - exec | 0 | 0 |