Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.25% |
---|
Function: main | Module: exec | Source: main.c:50-192 [...] | Coverage: 0.25% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/171-094-7986/intel/HACCmk/build/HACCmk/src/mysecond.c: 22 - 23 |
-------------------------------------------------------------------------------- |
22: i = gettimeofday(&tp,&tzp); |
23: return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); |
/beegfs/hackathon/users/eoseret/qaas_runs/171-094-7986/intel/HACCmk/build/HACCmk/src/main.c: 50 - 192 |
-------------------------------------------------------------------------------- |
50: { |
[...] |
57: double t3, elapsed = 0.0, validation, final, t1, t2; |
[...] |
63: long NN = N; |
[...] |
73: printf( "count is set %d\n", count ); |
74: printf( "Total MPI ranks %d\n", nprocs ); |
75: } |
76: |
77: if (argc == 2 && strncmp(argv[1], "-s", 2) == 0) |
78: NN = 15000; |
79: |
80: printf( "N is set %ld\n", NN ); |
81: |
82: #pragma omp parallel |
[...] |
97: for ( n = 400; n < NN; n = n + 20 ) |
[...] |
103: dx1 = 1.0f/(float)n; |
104: dy1 = 2.0f/(float)n; |
105: dz1 = 3.0f/(float)n; |
106: xx[0] = 0.f; |
107: yy[0] = 0.f; |
108: zz[0] = 0.f; |
109: mass[0] = 2.f; |
110: |
111: for ( i = 1; i < n; i++ ) |
112: { |
113: xx[i] = xx[i-1] + dx1; |
114: yy[i] = yy[i-1] + dy1; |
115: zz[i] = zz[i-1] + dz1; |
116: mass[i] = (float)i * 0.01f + xx[i]; |
117: } |
118: |
119: for ( i = 0; i < n; i++ ) |
120: { |
121: vx1[i] = 0.f; |
122: vy1[i] = 0.f; |
123: vz1[i] = 0.f; |
[...] |
139: #pragma omp parallel for private( dx1, dy1, dz1 ) |
[...] |
166: t3 = (t2 - t1) * 1e6; |
167: #endif |
168: |
169: elapsed = elapsed + t3; |
[...] |
185: printf( "\nKernel elapsed time, s: %18.8lf\n", elapsed*1e-6 ); |
[...] |
192: } |
0x4010a0 PUSH %RBP |
0x4010a1 XOR %EAX,%EAX |
0x4010a3 MOV %RSP,%RBP |
0x4010a6 PUSH %R15 |
0x4010a8 PUSH %R14 |
0x4010aa PUSH %R13 |
0x4010ac PUSH %R12 |
0x4010ae PUSH %RBX |
0x4010af MOV %RSI,%R12 |
0x4010b2 MOV %EDI,%EBX |
0x4010b4 SUB $0x58,%RSP |
0x4010b8 MOV $0xbb8,%ESI |
0x4010bd MOV $0x40301f,%EDI |
0x4010c2 MOV $0x186a0,%R13D |
0x4010c8 CALL 401030 <printf@plt> |
0x4010cd MOV $0x1,%ESI |
0x4010d2 MOV $0x403030,%EDI |
0x4010d7 XOR %EAX,%EAX |
0x4010d9 CALL 401030 <printf@plt> |
0x4010de CMP $0x2,%EBX |
0x4010e1 JNE 40110b |
0x4010e3 MOV 0x8(%R12),%RDI |
0x4010e8 MOV $0x2,%EDX |
0x4010ed MOV $0x403044,%ESI |
0x4010f2 CALL 401070 <strncmp@plt> |
0x4010f7 CMP $0x1,%EAX |
0x4010fa SBB %R13,%R13 |
0x4010fd AND $-0x14c08,%R13 |
0x401104 ADD $0x186a0,%R13 |
0x40110b MOV %R13,%RSI |
0x40110e MOV $0x403047,%EDI |
0x401113 XOR %EAX,%EAX |
0x401115 CALL 401030 <printf@plt> |
0x40111a XOR %EDX,%EDX |
0x40111c XOR %ECX,%ECX |
0x40111e LEA -0x50(%RBP),%RSI |
0x401122 MOV $0x4015a0,%EDI |
0x401127 MOVL $0,-0x50(%RBP) |
0x40112e CALL 401090 <GOMP_parallel@plt> |
0x401133 VMOVSS 0x1f49(%RIP),%XMM8 |
0x40113b MOV 0x1f5e(%RIP),%R14 |
0x401142 MOV $0x190,%EDX |
0x401147 VXORPD %XMM9,%XMM9,%XMM9 |
0x40114c VXORPS %XMM4,%XMM4,%XMM4 |
(1) 0x401150 VMOVSS 0x1f24(%RIP),%XMM6 |
(1) 0x401158 VCVTSI2SS %EDX,%XMM4,%XMM3 |
(1) 0x40115c LEA -0x1(%RDX),%RAX |
(1) 0x401160 VXORPS %XMM0,%XMM0,%XMM0 |
(1) 0x401164 AND $0x3,%EAX |
(1) 0x401167 MOV %EDX,%R12D |
(1) 0x40116a MOV $0x1,%EBX |
(1) 0x40116f VMOVAPS %XMM0,%XMM2 |
(1) 0x401173 VMOVAPS %XMM0,%XMM1 |
(1) 0x401177 MOVL $0,0x24ddff(%RIP) |
(1) 0x401181 MOVL $0,0x1ec375(%RIP) |
(1) 0x40118b MOVL $0,0x18a8eb(%RIP) |
(1) 0x401195 MOVL $0x40000000,0x128e61(%RIP) |
(1) 0x40119f VDIVSS %XMM3,%XMM6,%XMM10 |
(1) 0x4011a3 VMULSS 0x1ed5(%RIP),%XMM10,%XMM11 |
(1) 0x4011ab VADDSS %XMM10,%XMM10,%XMM7 |
(1) 0x4011b0 JE 401281 |
(1) 0x4011b6 CMP $0x1,%RAX |
(1) 0x4011ba JE 40123a |
(1) 0x4011bc CMP $0x2,%RAX |
(1) 0x4011c0 JE 4011fc |
(1) 0x4011c2 VCVTSI2SS %EBX,%XMM4,%XMM5 |
(1) 0x4011c6 VFMADD132SS %XMM8,%XMM10,%XMM5 |
(1) 0x4011cb VMOVAPS %XMM10,%XMM0 |
(1) 0x4011cf VMOVSS %XMM10,0x24ddad(%RIP) |
(1) 0x4011d7 VMOVAPS %XMM7,%XMM2 |
(1) 0x4011db VMOVSS %XMM7,0x1ec321(%RIP) |
(1) 0x4011e3 VMOVAPS %XMM11,%XMM1 |
(1) 0x4011e7 MOV $0x2,%EBX |
(1) 0x4011ec VMOVSS %XMM11,0x18a890(%RIP) |
(1) 0x4011f4 VMOVSS %XMM5,0x128e08(%RIP) |
(1) 0x4011fc VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x401201 VCVTSI2SS %EBX,%XMM4,%XMM12 |
(1) 0x401205 VADDSS %XMM7,%XMM2,%XMM2 |
(1) 0x401209 VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x40120e VFMADD132SS %XMM8,%XMM0,%XMM12 |
(1) 0x401213 VMOVSS %XMM0,0x64ef80(,%RBX,4) |
(1) 0x40121c VMOVSS %XMM2,0x5ed500(,%RBX,4) |
(1) 0x401225 VMOVSS %XMM1,0x58ba80(,%RBX,4) |
(1) 0x40122e VMOVSS %XMM12,0x52a000(,%RBX,4) |
(1) 0x401237 INC %RBX |
(1) 0x40123a VADDSS %XMM10,%XMM0,%XMM0 |
(1) 0x40123f VCVTSI2SS %EBX,%XMM4,%XMM13 |
(1) 0x401243 VADDSS %XMM7,%XMM2,%XMM2 |
(1) 0x401247 VADDSS %XMM11,%XMM1,%XMM1 |
(1) 0x40124c VFMADD132SS %XMM8,%XMM0,%XMM13 |
(1) 0x401251 VMOVSS %XMM0,0x64ef80(,%RBX,4) |
(1) 0x40125a VMOVSS %XMM2,0x5ed500(,%RBX,4) |
(1) 0x401263 VMOVSS %XMM1,0x58ba80(,%RBX,4) |
(1) 0x40126c VMOVSS %XMM13,0x52a000(,%RBX,4) |
(1) 0x401275 INC %RBX |
(1) 0x401278 CMP %RDX,%RBX |
(1) 0x40127b JE 401386 |
(0) 0x401281 VADDSS %XMM10,%XMM0,%XMM14 |
(0) 0x401286 VADDSS %XMM7,%XMM2,%XMM15 |
(0) 0x40128a LEA 0x1(%RBX),%RCX |
(0) 0x40128e LEA 0x2(%RBX),%RSI |
(0) 0x401292 VADDSS %XMM11,%XMM1,%XMM3 |
(0) 0x401297 VCVTSI2SS %EBX,%XMM4,%XMM6 |
(0) 0x40129b LEA 0x3(%RBX),%RDI |
(0) 0x40129f VCVTSI2SS %ECX,%XMM4,%XMM5 |
(0) 0x4012a3 VADDSS %XMM10,%XMM14,%XMM12 |
(0) 0x4012a8 VADDSS %XMM7,%XMM15,%XMM2 |
(0) 0x4012ac VMOVSS %XMM14,0x64ef80(,%RBX,4) |
(0) 0x4012b5 VFMADD132SS %XMM8,%XMM14,%XMM6 |
(0) 0x4012ba VADDSS %XMM11,%XMM3,%XMM1 |
(0) 0x4012bf VMOVSS %XMM15,0x5ed500(,%RBX,4) |
(0) 0x4012c8 VCVTSI2SS %ESI,%XMM4,%XMM15 |
(0) 0x4012cc VMOVSS %XMM3,0x58ba80(,%RBX,4) |
(0) 0x4012d5 VCVTSI2SS %EDI,%XMM4,%XMM3 |
(0) 0x4012d9 VADDSS %XMM10,%XMM12,%XMM0 |
(0) 0x4012de VMOVSS %XMM12,0x64ef80(,%RCX,4) |
(0) 0x4012e7 VADDSS %XMM7,%XMM2,%XMM13 |
(0) 0x4012eb VFMADD132SS %XMM8,%XMM12,%XMM5 |
(0) 0x4012f0 VADDSS %XMM11,%XMM1,%XMM14 |
(0) 0x4012f5 VMOVSS %XMM2,0x5ed500(,%RCX,4) |
(0) 0x4012fe VMOVSS %XMM1,0x58ba80(,%RCX,4) |
(0) 0x401307 VFMADD132SS %XMM8,%XMM0,%XMM15 |
(0) 0x40130c VMOVSS %XMM0,0x64ef80(,%RSI,4) |
(0) 0x401315 VADDSS %XMM10,%XMM0,%XMM0 |
(0) 0x40131a VADDSS %XMM7,%XMM13,%XMM2 |
(0) 0x40131e VADDSS %XMM11,%XMM14,%XMM1 |
(0) 0x401323 VMOVSS %XMM6,0x52a000(,%RBX,4) |
(0) 0x40132c ADD $0x4,%RBX |
(0) 0x401330 VMOVSS %XMM5,0x52a000(,%RCX,4) |
(0) 0x401339 VFMADD132SS %XMM8,%XMM0,%XMM3 |
(0) 0x40133e VMOVSS %XMM13,0x5ed500(,%RSI,4) |
(0) 0x401347 VMOVSS %XMM14,0x58ba80(,%RSI,4) |
(0) 0x401350 VMOVSS %XMM15,0x52a000(,%RSI,4) |
(0) 0x401359 VMOVSS %XMM0,0x64ef80(,%RDI,4) |
(0) 0x401362 VMOVSS %XMM2,0x5ed500(,%RDI,4) |
(0) 0x40136b VMOVSS %XMM1,0x58ba80(,%RDI,4) |
(0) 0x401374 VMOVSS %XMM3,0x52a000(,%RDI,4) |
(0) 0x40137d CMP %RDX,%RBX |
(0) 0x401380 JNE 401281 |
(1) 0x401386 LEA (,%RBX,4),%R15 |
(1) 0x40138e XOR %ESI,%ESI |
(1) 0x401390 MOV $0x4c8580,%EDI |
(1) 0x401395 VMOVSD %XMM9,-0x78(%RBP) |
(1) 0x40139a MOV %R15,%RDX |
(1) 0x40139d CALL 401040 <memset@plt> |
(1) 0x4013a2 XOR %ESI,%ESI |
(1) 0x4013a4 MOV %R15,%RDX |
(1) 0x4013a7 MOV $0x466b00,%EDI |
(1) 0x4013ac CALL 401040 <memset@plt> |
(1) 0x4013b1 MOV %R15,%RDX |
(1) 0x4013b4 XOR %ESI,%ESI |
(1) 0x4013b6 MOV $0x405080,%EDI |
(1) 0x4013bb CALL 401040 <memset@plt> |
(1) 0x4013c0 LEA -0x58(%RBP),%RSI |
(1) 0x4013c4 LEA -0x50(%RBP),%RDI |
(1) 0x4013c8 CALL 401060 <gettimeofday@plt> |
(1) 0x4013cd XOR %EDX,%EDX |
(1) 0x4013cf XOR %ECX,%ECX |
(1) 0x4013d1 LEA -0x50(%RBP),%RSI |
(1) 0x4013d5 MOV $0x401d60,%EDI |
(1) 0x4013da VXORPS %XMM4,%XMM4,%XMM4 |
(1) 0x4013de MOVL $0xbb8,-0x40(%RBP) |
(1) 0x4013e5 VCVTSI2SDQ -0x48(%RBP),%XMM4,%XMM9 |
(1) 0x4013eb VCVTSI2SDQ -0x50(%RBP),%XMM4,%XMM8 |
(1) 0x4013f1 VMOVSD %XMM9,-0x70(%RBP) |
(1) 0x4013f6 VMOVSD %XMM8,-0x68(%RBP) |
(1) 0x4013fb MOV %R12D,-0x44(%RBP) |
(1) 0x4013ff MOVL $0x3e6b851f,-0x48(%RBP) |
(1) 0x401406 MOV %R14,-0x50(%RBP) |
(1) 0x40140a CALL 401090 <GOMP_parallel@plt> |
(1) 0x40140f LEA -0x58(%RBP),%RSI |
(1) 0x401413 LEA -0x50(%RBP),%RDI |
(1) 0x401417 CALL 401060 <gettimeofday@plt> |
(1) 0x40141c LEA 0x14(%RBX),%RDX |
(1) 0x401420 VXORPS %XMM4,%XMM4,%XMM4 |
(1) 0x401424 VMOVSD 0x1c7c(%RIP),%XMM7 |
(1) 0x40142c CMP %RDX,%R13 |
(1) 0x40142f VCVTSI2SDQ -0x50(%RBP),%XMM4,%XMM10 |
(1) 0x401435 VCVTSI2SDQ -0x48(%RBP),%XMM4,%XMM11 |
(1) 0x40143b VFNMADD231SD -0x70(%RBP),%XMM7,%XMM10 |
(1) 0x401441 VFMSUB213SD -0x68(%RBP),%XMM7,%XMM11 |
(1) 0x401447 VMOVSD -0x78(%RBP),%XMM9 |
(1) 0x40144c VADDSD %XMM11,%XMM10,%XMM6 |
(1) 0x401451 VMOVSS 0x1c2b(%RIP),%XMM8 |
(1) 0x401459 VFMADD231SD 0x1c4e(%RIP),%XMM6,%XMM9 |
(1) 0x401462 JG 401150 |
0x401468 MOV $0x403058,%EDI |
0x40146d MOV $0x1,%EAX |
0x401472 VMULSD %XMM7,%XMM9,%XMM0 |
0x401476 CALL 401030 <printf@plt> |
0x40147b ADD $0x58,%RSP |
0x40147f XOR %EAX,%EAX |
0x401481 POP %RBX |
0x401482 POP %R12 |
0x401484 POP %R13 |
0x401486 POP %R14 |
0x401488 POP %R15 |
0x40148a POP %RBP |
0x40148b RET |
0x40148c NOPL (%RAX) |
Path / |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 58 |
nb uops | 63 |
loop length | 216 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 6.00 | 6.00 | 6.00 | 4.00 | 3.33 | 3.33 | 3.33 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.00 | 6.00 | 6.00 | 6.00 | 4.00 | 3.33 | 3.33 | 3.33 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.50 |
Dispatch | 6.00 |
Overall L1 | 10.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 7% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x40301f,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x186a0,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x403030,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 40110b <main+0x6b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x403044,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 401070 <strncmp@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBB %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x186a0,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x50(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x4015a0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSS 0x1f49(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x1f5e(%RIP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x190,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x403058,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMULSD %XMM7,%XMM9,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | main.c:50-192 |
Module | exec |
nb instructions | 58 |
nb uops | 63 |
loop length | 216 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 6.00 | 6.00 | 6.00 | 4.00 | 3.33 | 3.33 | 3.33 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.00 | 6.00 | 6.00 | 6.00 | 4.00 | 3.33 | 3.33 | 3.33 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.50 |
Dispatch | 6.00 |
Overall L1 | 10.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 7% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 10% |
store | 6% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EDI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xbb8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x40301f,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x186a0,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x1,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x403030,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JNE 40110b <main+0x6b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x403044,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 401070 <strncmp@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBB %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x14c08,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x186a0,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x403047,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x50(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x4015a0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 401090 <GOMP_parallel@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSS 0x1f49(%RIP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x1f5e(%RIP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x190,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPS %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x403058,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMULSD %XMM7,%XMM9,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CALL 401030 <printf@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x58,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼main– | 0.25 | 0.02 |
▼Loop 1 - main.c:57-169 - exec– | 0 | 0 |
○Loop 0 - main.c:111-116 - exec | 0.25 | 4.73 |