Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.07% |
---|
Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.07% |
---|
/scratch_na/users/xoserete/qaas_runs/171-418-0442/intel/HACCmk/build/HACCmk/src/main.c: 50 - 192 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
57: double t3, elapsed = 0.0, validation, final, t1, t2; |
[...] |
63: long NN = N; |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
192: } |
/scratch_na/users/xoserete/qaas_runs/171-418-0442/intel/HACCmk/build/HACCmk/src/mysecond.c: 22 - 23 |
-------------------------------------------------------------------------------- |
22: i = gettimeofday(&tp,&tzp); |
23: return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); |
0x4010a0 PUSH %RBP |
0x4010a1 XOR %EAX,%EAX |
0x4010a3 MOV %RSP,%RBP |
0x4010a6 PUSH %R15 |
0x4010a8 PUSH %R14 |
0x4010aa PUSH %R13 |
0x4010ac MOV %RSI,%R13 |
0x4010af MOV $0xbb8,%ESI |
0x4010b4 PUSH %R12 |
0x4010b6 MOV $0x186a0,%R12D |
0x4010bc PUSH %RBX |
0x4010bd MOV %EDI,%EBX |
0x4010bf MOV $0x40301f,%EDI |
0x4010c4 SUB $0x48,%RSP |
0x4010c8 CALL 401050 <printf@plt> |
0x4010cd MOV $0x1,%ESI |
0x4010d2 MOV $0x403030,%EDI |
0x4010d7 XOR %EAX,%EAX |
0x4010d9 CALL 401050 <printf@plt> |
0x4010de CMP $0x2,%EBX |
0x4010e1 JNE 40110a |
0x4010e3 MOV 0x8(%R13),%RDI |
0x4010e7 MOV $0x2,%EDX |
0x4010ec MOV $0x403044,%ESI |
0x4010f1 CALL 401030 <strncmp@plt> |
0x4010f6 CMP $0x1,%EAX |
0x4010f9 SBB %R12,%R12 |
0x4010fc AND $-0x14c08,%R12 |
0x401103 ADD $0x186a0,%R12 |
0x40110a MOV %R12,%RSI |
0x40110d MOV $0x403047,%EDI |
0x401112 XOR %EAX,%EAX |
0x401114 CALL 401050 <printf@plt> |
0x401119 XOR %ECX,%ECX |
0x40111b XOR %EDX,%EDX |
0x40111d LEA -0x50(%RBP),%RSI |
0x401121 MOV $0x4015d0,%EDI |
0x401126 MOVL $0,-0x50(%RBP) |
0x40112d CALL 401090 <GOMP_parallel@plt> |
0x401132 VMOVSS 0x1f4a(%RIP),%XMM9 |
0x40113a MOV 0x1f5f(%RIP),%R13 |
0x401141 MOV $0x190,%EAX |
0x401146 VXORPD %XMM0,%XMM0,%XMM0 |
0x40114a VXORPS %XMM5,%XMM5,%XMM5 |
0x40114e XCHG %AX,%AX |
(1) 0x401150 VCVTSI2SS %EAX,%XMM5,%XMM4 |
(1) 0x401154 VMOVSS 0x1f20(%RIP),%XMM7 |
(1) 0x40115c VXORPS %XMM1,%XMM1,%XMM1 |
(1) 0x401160 LEA -0x1(%RAX),%RDX |
(1) 0x401164 MOVL $0,0x24de12(%RIP) |
(1) 0x40116e MOV %EAX,%EBX |
(1) 0x401170 MOV $0x1,%R14D |
(1) 0x401176 VMOVAPS %XMM1,%XMM3 |
(1) 0x40117a MOVL $0,0x1ec37c(%RIP) |
(1) 0x401184 VMOVAPS %XMM1,%XMM2 |
(1) 0x401188 MOVL $0,0x18a8ee(%RIP) |
(1) 0x401192 VDIVSS %XMM4,%XMM7,%XMM10 |
(1) 0x401196 MOVL $0x40000000,0x128e60(%RIP) |
(1) 0x4011a0 VMULSS 0x1ed8(%RIP),%XMM10,%XMM11 |
(1) 0x4011a8 VADDSS %XMM10,%XMM10,%XMM8 |
(1) 0x4011ad AND $0x3,%EDX |
(1) 0x4011b0 JE 401293 |
(1) 0x4011b6 CMP $0x1,%RDX |
(1) 0x4011ba JE 401246 |
(1) 0x4011c0 CMP $0x2,%RDX |
(1) 0x4011c4 JE 401202 |
(1) 0x4011c6 VCVTSI2SS %R14D,%XMM5,%XMM6 |
(1) 0x4011cb VMOVAPS %XMM10,%XMM1 |
(1) 0x4011cf VMOVSS %XMM10,0x24ddad(%RIP) |
(1) 0x4011d7 VMOVAPS %XMM8,%XMM3 |
(1) 0x4011db VMOVSS %XMM8,0x1ec321(%RIP) |
(1) 0x4011e3 VMOVAPS %XMM11,%XMM2 |
(1) 0x4011e7 MOV $0x2,%R14D |
(1) 0x4011ed VMOVSS %XMM11,0x18a88f(%RIP) |
(1) 0x4011f5 VFMADD132SS %XMM9,%XMM10,%XMM6 |
(1) 0x4011fa VMOVSS %XMM6,0x128e02(%RIP) |
(1) 0x401202 VCVTSI2SS %R14D,%XMM5,%XMM12 |
(1) 0x401207 VADDSS %XMM10,%XMM1,%XMM1 |
(1) 0x40120c VADDSS %XMM8,%XMM3,%XMM3 |
(1) 0x401211 VADDSS %XMM11,%XMM2,%XMM2 |
(1) 0x401216 VMOVSS %XMM1,0x64ef80(,%R14,4) |
(1) 0x401220 VFMADD132SS %XMM9,%XMM1,%XMM12 |
(1) 0x401225 VMOVSS %XMM3,0x5ed500(,%R14,4) |
(1) 0x40122f VMOVSS %XMM2,0x58ba80(,%R14,4) |
(1) 0x401239 VMOVSS %XMM12,0x52a000(,%R14,4) |
(1) 0x401243 INC %R14 |
(1) 0x401246 VCVTSI2SS %R14D,%XMM5,%XMM13 |
(1) 0x40124b VADDSS %XMM10,%XMM1,%XMM1 |
(1) 0x401250 VADDSS %XMM8,%XMM3,%XMM3 |
(1) 0x401255 VADDSS %XMM11,%XMM2,%XMM2 |
(1) 0x40125a VMOVSS %XMM1,0x64ef80(,%R14,4) |
(1) 0x401264 VFMADD132SS %XMM9,%XMM1,%XMM13 |
(1) 0x401269 VMOVSS %XMM3,0x5ed500(,%R14,4) |
(1) 0x401273 VMOVSS %XMM2,0x58ba80(,%R14,4) |
(1) 0x40127d VMOVSS %XMM13,0x52a000(,%R14,4) |
(1) 0x401287 INC %R14 |
(1) 0x40128a CMP %RAX,%R14 |
(1) 0x40128d JE 4013a1 |
(0) 0x401293 VADDSS %XMM10,%XMM1,%XMM14 |
(0) 0x401298 VADDSS %XMM8,%XMM3,%XMM15 |
(0) 0x40129d LEA 0x2(%R14),%RSI |
(0) 0x4012a1 VADDSS %XMM11,%XMM2,%XMM4 |
(0) 0x4012a6 LEA 0x1(%R14),%RCX |
(0) 0x4012aa LEA 0x3(%R14),%RDI |
(0) 0x4012ae VADDSS %XMM10,%XMM14,%XMM12 |
(0) 0x4012b3 VMOVSS %XMM15,0x5ed500(,%R14,4) |
(0) 0x4012bd VADDSS %XMM8,%XMM15,%XMM3 |
(0) 0x4012c2 VCVTSI2SS %ESI,%XMM5,%XMM15 |
(0) 0x4012c6 VMOVSS %XMM4,0x58ba80(,%R14,4) |
(0) 0x4012d0 VADDSS %XMM11,%XMM4,%XMM2 |
(0) 0x4012d5 VCVTSI2SS %R14D,%XMM5,%XMM7 |
(0) 0x4012da VMOVSS %XMM14,0x64ef80(,%R14,4) |
(0) 0x4012e4 VADDSS %XMM10,%XMM12,%XMM1 |
(0) 0x4012e9 VMOVSS %XMM12,0x64ef80(,%RCX,4) |
(0) 0x4012f2 VADDSS %XMM8,%XMM3,%XMM13 |
(0) 0x4012f7 VCVTSI2SS %ECX,%XMM5,%XMM6 |
(0) 0x4012fb VMOVSS %XMM3,0x5ed500(,%RCX,4) |
(0) 0x401304 VCVTSI2SS %EDI,%XMM5,%XMM4 |
(0) 0x401308 VMOVSS %XMM2,0x58ba80(,%RCX,4) |
(0) 0x401311 VFMADD132SS %XMM9,%XMM1,%XMM15 |
(0) 0x401316 VMOVSS %XMM1,0x64ef80(,%RSI,4) |
(0) 0x40131f VADDSS %XMM10,%XMM1,%XMM1 |
(0) 0x401324 VADDSS %XMM8,%XMM13,%XMM3 |
(0) 0x401329 VFMADD132SS %XMM9,%XMM14,%XMM7 |
(0) 0x40132e VADDSS %XMM11,%XMM2,%XMM14 |
(0) 0x401333 VMOVSS %XMM13,0x5ed500(,%RSI,4) |
(0) 0x40133c VFMADD132SS %XMM9,%XMM12,%XMM6 |
(0) 0x401341 VFMADD132SS %XMM9,%XMM1,%XMM4 |
(0) 0x401346 VMOVSS %XMM1,0x64ef80(,%RDI,4) |
(0) 0x40134f VADDSS %XMM11,%XMM14,%XMM2 |
(0) 0x401354 VMOVSS %XMM14,0x58ba80(,%RSI,4) |
(0) 0x40135d VMOVSS %XMM3,0x5ed500(,%RDI,4) |
(0) 0x401366 VMOVSS %XMM7,0x52a000(,%R14,4) |
(0) 0x401370 ADD $0x4,%R14 |
(0) 0x401374 VMOVSS %XMM6,0x52a000(,%RCX,4) |
(0) 0x40137d VMOVSS %XMM2,0x58ba80(,%RDI,4) |
(0) 0x401386 VMOVSS %XMM15,0x52a000(,%RSI,4) |
(0) 0x40138f VMOVSS %XMM4,0x52a000(,%RDI,4) |
(0) 0x401398 CMP %RAX,%R14 |
(0) 0x40139b JNE 401293 |
(1) 0x4013a1 LEA (,%R14,4),%R15 |
(1) 0x4013a9 XOR %ESI,%ESI |
(1) 0x4013ab MOV $0x4c8580,%EDI |
(1) 0x4013b0 VMOVSD %XMM0,-0x70(%RBP) |
(1) 0x4013b5 MOV %R15,%RDX |
(1) 0x4013b8 CALL 401070 <memset@plt> |
(1) 0x4013bd MOV %R15,%RDX |
(1) 0x4013c0 XOR %ESI,%ESI |
(1) 0x4013c2 MOV $0x466b00,%EDI |
(1) 0x4013c7 CALL 401070 <memset@plt> |
(1) 0x4013cc MOV %R15,%RDX |
(1) 0x4013cf XOR %ESI,%ESI |
(1) 0x4013d1 MOV $0x405080,%EDI |
(1) 0x4013d6 CALL 401070 <memset@plt> |
(1) 0x4013db LEA -0x58(%RBP),%RSI |
(1) 0x4013df LEA -0x50(%RBP),%RDI |
(1) 0x4013e3 CALL 401060 <gettimeofday@plt> |
(1) 0x4013e8 VXORPS %XMM5,%XMM5,%XMM5 |
(1) 0x4013ec XOR %ECX,%ECX |
(1) 0x4013ee XOR %EDX,%EDX |
(1) 0x4013f0 VCVTSI2SDQ -0x48(%RBP),%XMM5,%XMM9 |
(1) 0x4013f6 LEA -0x50(%RBP),%RSI |
(1) 0x4013fa MOV $0x401d20,%EDI |
(1) 0x4013ff MOVL $0xbb8,-0x40(%RBP) |
(1) 0x401406 VCVTSI2SDQ -0x50(%RBP),%XMM5,%XMM0 |
(1) 0x40140c MOV %EBX,-0x44(%RBP) |
(1) 0x40140f MOVL $0x3e6b851f,-0x48(%RBP) |
(1) 0x401416 MOV %R13,-0x50(%RBP) |
(1) 0x40141a VMOVSD %XMM9,-0x68(%RBP) |
(1) 0x40141f VMOVQ %XMM0,%R15 |
(1) 0x401424 CALL 401090 <GOMP_parallel@plt> |
(1) 0x401429 LEA -0x58(%RBP),%RSI |
(1) 0x40142d LEA -0x50(%RBP),%RDI |
(1) 0x401431 CALL 401060 <gettimeofday@plt> |
(1) 0x401436 VXORPS %XMM5,%XMM5,%XMM5 |
(1) 0x40143a VMOVSD 0x1c66(%RIP),%XMM8 |
(1) 0x401442 VMOVSD 0x1c5e(%RIP),%XMM7 |
(1) 0x40144a VCVTSI2SDQ -0x50(%RBP),%XMM5,%XMM3 |
(1) 0x401450 VMOVQ %R15,%XMM11 |
(1) 0x401455 VMOVSD -0x70(%RBP),%XMM0 |
(1) 0x40145a LEA 0x14(%R14),%RAX |
(1) 0x40145e VCVTSI2SDQ -0x48(%RBP),%XMM5,%XMM10 |
(1) 0x401464 CMP %RAX,%R12 |
(1) 0x401467 VMOVSS 0x1c15(%RIP),%XMM9 |
(1) 0x40146f VFNMADD132SD -0x68(%RBP),%XMM3,%XMM7 |
(1) 0x401475 VFMSUB132SD %XMM10,%XMM11,%XMM8 |
(1) 0x40147a VADDSD %XMM8,%XMM7,%XMM12 |
(1) 0x40147f VFMADD231SD 0x1c28(%RIP),%XMM12,%XMM0 |
(1) 0x401488 JG 401150 |
0x40148e VMULSD 0x1c12(%RIP),%XMM0,%XMM0 |
0x401496 MOV $0x403058,%EDI |
0x40149b MOV $0x1,%EAX |
0x4014a0 CALL 401050 <printf@plt> |
0x4014a5 ADD $0x48,%RSP |
0x4014a9 XOR %EAX,%EAX |
0x4014ab POP %RBX |
0x4014ac POP %R12 |
0x4014ae POP %R13 |
0x4014b0 POP %R14 |
0x4014b2 POP %R15 |
0x4014b4 POP %RBP |
0x4014b5 RET |
0x4014b6 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | __libc_start_main | libc-2.28.so |
Path / |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 59 |
nb uops | 65 |
loop length | 226 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
cycles | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.96 |
Stall cycles | 0.00 |
Front-end | 10.83 |
Dispatch | 6.50 |
Overall L1 | 10.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xbb8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x186a0,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x40301f,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403030,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 40110a <main+0x6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403044,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401030 <strncmp@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SBB %R12,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x186a0,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4015d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVSS 0x1f4a(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1f5f(%RIP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x190,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULSD 0x1c12(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV $0x403058,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 59 |
nb uops | 65 |
loop length | 226 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
cycles | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.96 |
Stall cycles | 0.00 |
Front-end | 10.83 |
Dispatch | 6.50 |
Overall L1 | 10.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xbb8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x186a0,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x40301f,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403030,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 40110a <main+0x6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403044,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401030 <strncmp@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SBB %R12,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x186a0,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4015d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVSS 0x1f4a(%RIP),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1f5f(%RIP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x190,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULSD 0x1c12(%RIP),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV $0x403058,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.07 | 0.01 |
▼Loop 1 - main.c:97-169 - exec– | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0.06 | 1.11 |