Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.04% |
---|
Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.04% |
---|
/scratch_na/users/xoserete/qaas_runs/171-317-3836/intel/HACCmk/build/HACCmk/src/main.c: 50 - 192 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
57: double t3, elapsed = 0.0, validation, final, t1, t2; |
[...] |
63: long NN = N; |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
136: t1 = mysecond(); |
137: #endif |
138: |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
152: t2 = mysecond(); |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
192: } |
0x4010a0 PUSH %RBP |
0x4010a1 XOR %EAX,%EAX |
0x4010a3 MOV %RSP,%RBP |
0x4010a6 PUSH %R15 |
0x4010a8 PUSH %R14 |
0x4010aa PUSH %R13 |
0x4010ac MOV %RSI,%R13 |
0x4010af MOV $0xbb8,%ESI |
0x4010b4 PUSH %R12 |
0x4010b6 MOV $0x186a0,%R12D |
0x4010bc PUSH %RBX |
0x4010bd MOV %EDI,%EBX |
0x4010bf MOV $0x40301f,%EDI |
0x4010c4 SUB $0x38,%RSP |
0x4010c8 CALL 401050 <printf@plt> |
0x4010cd MOV $0x1,%ESI |
0x4010d2 MOV $0x403030,%EDI |
0x4010d7 XOR %EAX,%EAX |
0x4010d9 CALL 401050 <printf@plt> |
0x4010de CMP $0x2,%EBX |
0x4010e1 JNE 40110a |
0x4010e3 MOV 0x8(%R13),%RDI |
0x4010e7 MOV $0x2,%EDX |
0x4010ec MOV $0x403044,%ESI |
0x4010f1 CALL 401030 <strncmp@plt> |
0x4010f6 CMP $0x1,%EAX |
0x4010f9 SBB %R12,%R12 |
0x4010fc AND $-0x14c08,%R12 |
0x401103 ADD $0x186a0,%R12 |
0x40110a MOV %R12,%RSI |
0x40110d MOV $0x403047,%EDI |
0x401112 XOR %EAX,%EAX |
0x401114 CALL 401050 <printf@plt> |
0x401119 XOR %ECX,%ECX |
0x40111b XOR %EDX,%EDX |
0x40111d LEA -0x50(%RBP),%RSI |
0x401121 MOV $0x401910,%EDI |
0x401126 MOVL $0,-0x50(%RBP) |
0x40112d CALL 401090 <GOMP_parallel@plt> |
0x401132 VMOVSS 0x1f4a(%RIP),%XMM8 |
0x40113a MOV 0x1f47(%RIP),%R13 |
0x401141 MOV $0x190,%EAX |
0x401146 VXORPD %XMM9,%XMM9,%XMM9 |
0x40114b VXORPS %XMM7,%XMM7,%XMM7 |
0x40114f NOP |
(1) 0x401150 VCVTSI2SS %EAX,%XMM7,%XMM3 |
(1) 0x401154 VMOVSS 0x1f20(%RIP),%XMM5 |
(1) 0x40115c VXORPS %XMM0,%XMM0,%XMM0 |
(1) 0x401160 LEA -0x1(%RAX),%RDX |
(1) 0x401164 MOVL $0,0x18a912(%RIP) |
(1) 0x40116e MOV %EAX,%EBX |
(1) 0x401170 MOV $0x1,%R14D |
(1) 0x401176 VMOVAPS %XMM0,%XMM2 |
(1) 0x40117a MOVL $0,0x1ec37c(%RIP) |
(1) 0x401184 VMOVAPS %XMM0,%XMM1 |
(1) 0x401188 MOVL $0,0x24ddee(%RIP) |
(1) 0x401192 VDIVSS %XMM3,%XMM5,%XMM10 |
(1) 0x401196 MOVL $0x40000000,0x128e60(%RIP) |
(1) 0x4011a0 VMULSS 0x1ed8(%RIP),%XMM10,%XMM11 |
(1) 0x4011a8 VADDSS %XMM10,%XMM10,%XMM6 |
(1) 0x4011ad AND $0x3,%EDX |
(1) 0x4011b0 JE 401291 |
(1) 0x4011b6 CMP $0x1,%RDX |
(1) 0x4011ba JE 401245 |
(1) 0x4011c0 CMP $0x2,%RDX |
(1) 0x4011c4 JE 401202 |
(1) 0x4011c6 VCVTSI2SS %R14D,%XMM7,%XMM4 |
(1) 0x4011cb VMOVAPS %XMM10,%XMM0 |
(1) 0x4011cf VMOVSS %XMM10,0x18a8ad(%RIP) |
(1) 0x4011d7 VMOVAPS %XMM6,%XMM2 |
(1) 0x4011db VMOVSS %XMM6,0x1ec321(%RIP) |
(1) 0x4011e3 VMOVAPS %XMM11,%XMM1 |
(1) 0x4011e7 MOV $0x2,%R14D |
(1) 0x4011ed VMOVSS %XMM11,0x24dd8f(%RIP) |
(1) 0x4011f5 VFMADD132SS %XMM8,%XMM10,%XMM4 |
(1) 0x4011fa VMOVSS %XMM4,0x128e02(%RIP) |
(1) 0x401202 VCVTSI2SS %R14D,%XMM7,%XMM12 |
(1) 0x401207 VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x40120c VADDSS %XMM6,%XMM2,%XMM2 |
(1) 0x401210 VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x401215 VMOVSS %XMM0,0x58ba80(,%R14,4) |
(1) 0x40121f VFMADD132SS %XMM8,%XMM0,%XMM12 |
(1) 0x401224 VMOVSS %XMM2,0x5ed500(,%R14,4) |
(1) 0x40122e VMOVSS %XMM1,0x64ef80(,%R14,4) |
(1) 0x401238 VMOVSS %XMM12,0x52a000(,%R14,4) |
(1) 0x401242 INC %R14 |
(1) 0x401245 VCVTSI2SS %R14D,%XMM7,%XMM13 |
(1) 0x40124a VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x40124f VADDSS %XMM6,%XMM2,%XMM2 |
(1) 0x401253 VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x401258 VMOVSS %XMM0,0x58ba80(,%R14,4) |
(1) 0x401262 VFMADD132SS %XMM8,%XMM0,%XMM13 |
(1) 0x401267 VMOVSS %XMM2,0x5ed500(,%R14,4) |
(1) 0x401271 VMOVSS %XMM1,0x64ef80(,%R14,4) |
(1) 0x40127b VMOVSS %XMM13,0x52a000(,%R14,4) |
(1) 0x401285 INC %R14 |
(1) 0x401288 CMP %RAX,%R14 |
(1) 0x40128b JE 40139b |
(0) 0x401291 VADDSS %XMM10,%XMM0,%XMM14 |
(0) 0x401296 VADDSS %XMM6,%XMM2,%XMM15 |
(0) 0x40129a LEA 0x2(%R14),%RSI |
(0) 0x40129e VADDSS %XMM11,%XMM1,%XMM3 |
(0) 0x4012a3 LEA 0x1(%R14),%RCX |
(0) 0x4012a7 LEA 0x3(%R14),%RDI |
(0) 0x4012ab VADDSS %XMM10,%XMM14,%XMM12 |
(0) 0x4012b0 VMOVSS %XMM15,0x5ed500(,%R14,4) |
(0) 0x4012ba VADDSS %XMM6,%XMM15,%XMM2 |
(0) 0x4012be VCVTSI2SS %ESI,%XMM7,%XMM15 |
(0) 0x4012c2 VMOVSS %XMM3,0x64ef80(,%R14,4) |
(0) 0x4012cc VADDSS %XMM11,%XMM3,%XMM1 |
(0) 0x4012d1 VCVTSI2SS %R14D,%XMM7,%XMM5 |
(0) 0x4012d6 VMOVSS %XMM14,0x58ba80(,%R14,4) |
(0) 0x4012e0 VADDSS %XMM10,%XMM12,%XMM0 |
(0) 0x4012e5 VMOVSS %XMM12,0x58ba80(,%RCX,4) |
(0) 0x4012ee VADDSS %XMM6,%XMM2,%XMM13 |
(0) 0x4012f2 VCVTSI2SS %ECX,%XMM7,%XMM4 |
(0) 0x4012f6 VMOVSS %XMM2,0x5ed500(,%RCX,4) |
(0) 0x4012ff VCVTSI2SS %EDI,%XMM7,%XMM3 |
(0) 0x401303 VMOVSS %XMM1,0x64ef80(,%RCX,4) |
(0) 0x40130c VFMADD132SS %XMM8,%XMM0,%XMM15 |
(0) 0x401311 VMOVSS %XMM0,0x58ba80(,%RSI,4) |
(0) 0x40131a VADDSS %XMM10,%XMM0,%XMM0 |
(0) 0x40131f VADDSS %XMM6,%XMM13,%XMM2 |
(0) 0x401323 VFMADD132SS %XMM8,%XMM14,%XMM5 |
(0) 0x401328 VADDSS %XMM11,%XMM1,%XMM14 |
(0) 0x40132d VMOVSS %XMM13,0x5ed500(,%RSI,4) |
(0) 0x401336 VFMADD132SS %XMM8,%XMM12,%XMM4 |
(0) 0x40133b VFMADD132SS %XMM8,%XMM0,%XMM3 |
(0) 0x401340 VMOVSS %XMM0,0x58ba80(,%RDI,4) |
(0) 0x401349 VADDSS %XMM11,%XMM14,%XMM1 |
(0) 0x40134e VMOVSS %XMM14,0x64ef80(,%RSI,4) |
(0) 0x401357 VMOVSS %XMM2,0x5ed500(,%RDI,4) |
(0) 0x401360 VMOVSS %XMM5,0x52a000(,%R14,4) |
(0) 0x40136a ADD $0x4,%R14 |
(0) 0x40136e VMOVSS %XMM4,0x52a000(,%RCX,4) |
(0) 0x401377 VMOVSS %XMM1,0x64ef80(,%RDI,4) |
(0) 0x401380 VMOVSS %XMM15,0x52a000(,%RSI,4) |
(0) 0x401389 VMOVSS %XMM3,0x52a000(,%RDI,4) |
(0) 0x401392 CMP %RAX,%R14 |
(0) 0x401395 JNE 401291 |
(1) 0x40139b LEA (,%R14,4),%R15 |
(1) 0x4013a3 XOR %ESI,%ESI |
(1) 0x4013a5 MOV $0x4c8580,%EDI |
(1) 0x4013aa VMOVSD %XMM9,-0x58(%RBP) |
(1) 0x4013af MOV %R15,%RDX |
(1) 0x4013b2 CALL 401070 <memset@plt> |
(1) 0x4013b7 MOV %R15,%RDX |
(1) 0x4013ba XOR %ESI,%ESI |
(1) 0x4013bc MOV $0x466b00,%EDI |
(1) 0x4013c1 CALL 401070 <memset@plt> |
(1) 0x4013c6 MOV %R15,%RDX |
(1) 0x4013c9 XOR %ESI,%ESI |
(1) 0x4013cb MOV $0x405080,%EDI |
(1) 0x4013d0 CALL 401070 <memset@plt> |
(1) 0x4013d5 XOR %EAX,%EAX |
(1) 0x4013d7 CALL 401950 <mysecond> |
(1) 0x4013dc XOR %ECX,%ECX |
(1) 0x4013de XOR %EDX,%EDX |
(1) 0x4013e0 LEA -0x50(%RBP),%RSI |
(1) 0x4013e4 MOV $0x401580,%EDI |
(1) 0x4013e9 VMOVQ %XMM0,%R15 |
(1) 0x4013ee MOVL $0xbb8,-0x40(%RBP) |
(1) 0x4013f5 MOV %EBX,-0x44(%RBP) |
(1) 0x4013f8 MOVL $0x3e6b851f,-0x48(%RBP) |
(1) 0x4013ff MOV %R13,-0x50(%RBP) |
(1) 0x401403 CALL 401090 <GOMP_parallel@plt> |
(1) 0x401408 XOR %EAX,%EAX |
(1) 0x40140a CALL 401950 <mysecond> |
(1) 0x40140f VMOVQ %R15,%XMM7 |
(1) 0x401414 VMOVSD -0x58(%RBP),%XMM9 |
(1) 0x401419 LEA 0x14(%R14),%RAX |
(1) 0x40141d VSUBSD %XMM7,%XMM0,%XMM8 |
(1) 0x401421 CMP %RAX,%R12 |
(1) 0x401424 VXORPS %XMM7,%XMM7,%XMM7 |
(1) 0x401428 VFMADD231SD 0x1c5f(%RIP),%XMM8,%XMM9 |
(1) 0x401431 VMOVSS 0x1c4b(%RIP),%XMM8 |
(1) 0x401439 JG 401150 |
0x40143f VMULSD 0x1c51(%RIP),%XMM9,%XMM0 |
0x401447 MOV $0x403058,%EDI |
0x40144c MOV $0x1,%EAX |
0x401451 CALL 401050 <printf@plt> |
0x401456 ADD $0x38,%RSP |
0x40145a XOR %EAX,%EAX |
0x40145c POP %RBX |
0x40145d POP %R12 |
0x40145f POP %R13 |
0x401461 POP %R14 |
0x401463 POP %R15 |
0x401465 POP %RBP |
0x401466 RET |
0x401467 NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 59 |
nb uops | 65 |
loop length | 225 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
cycles | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.96 |
Stall cycles | 0.00 |
Front-end | 10.83 |
Dispatch | 6.50 |
Overall L1 | 10.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xbb8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x186a0,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x40301f,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403030,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 40110a <main+0x6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403044,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401030 <strncmp@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SBB %R12,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x186a0,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x401910,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVSS 0x1f4a(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1f47(%RIP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x190,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULSD 0x1c51(%RIP),%XMM9,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV $0x403058,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 59 |
nb uops | 65 |
loop length | 225 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.83 cycles |
front end | 10.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
cycles | 4.10 | 4.00 | 3.67 | 3.67 | 6.50 | 4.00 | 3.90 | 6.50 | 6.50 | 6.50 | 4.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 9.96 |
Stall cycles | 0.00 |
Front-end | 10.83 |
Dispatch | 6.50 |
Overall L1 | 10.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 8% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 9% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xbb8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x186a0,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x40301f,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403030,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x2,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 40110a <main+0x6a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%R13),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x403044,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401030 <strncmp@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CMP $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SBB %R12,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x186a0,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x401910,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVSS 0x1f4a(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1f47(%RIP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x190,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULSD 0x1c51(%RIP),%XMM9,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV $0x403058,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 401050 <printf@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.04 | 0.01 |
▼Loop 1 - main.c:97-169 - exec– | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0.04 | 0.95 |