Function: _Z14generate_chunkiR16global_variables._omp_fn.1 | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.01% |
---|
Function: _Z14generate_chunkiR16global_variables._omp_fn.1 | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.01% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/generate_chunk.cpp: 85 - 123 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for simd collapse(2) |
86: for (int j = (0); j < (yrange); j++) { |
87: for (int i = (0); i < (xrange); i++) { |
88: double x_cent = state_xmin[state]; |
89: double y_cent = state_ymin[state]; |
90: if (state_geometry[state] == g_rect) { |
91: if (field.vertexx[i + 1] >= state_xmin[state] && field.vertexx[i] < state_xmax[state]) { |
92: if (field.vertexy[j + 1] >= state_ymin[state] && field.vertexy[j] < state_ymax[state]) { |
93: field.energy0(i, j) = state_energy[state]; |
94: field.density0(i, j) = state_density[state]; |
95: for (int kt = j; kt <= j + 1; ++kt) { |
96: for (int jt = i; jt <= i + 1; ++jt) { |
97: field.xvel0(jt, kt) = state_xvel[state]; |
98: field.yvel0(jt, kt) = state_yvel[state]; |
99: } |
100: } |
101: } |
102: } |
103: } else if (state_geometry[state] == g_circ) { |
104: double radius = |
105: std::sqrt((field.cellx[i] - x_cent) * (field.cellx[i] - x_cent) + (field.celly[j] - y_cent) * (field.celly[j] - y_cent)); |
106: if (radius <= state_radius[state]) { |
107: field.energy0(i, j) = state_energy[state]; |
108: field.density0(i, j) = state_density[state]; |
109: for (int kt = j; kt <= j + 1; ++kt) { |
110: for (int jt = i; jt <= i + 1; ++jt) { |
111: field.xvel0(jt, kt) = state_xvel[state]; |
112: field.yvel0(jt, kt) = state_yvel[state]; |
113: } |
114: } |
115: } |
116: } else if (state_geometry[state] == g_point) { |
117: if (field.vertexx[i] == x_cent && field.vertexy[j] == y_cent) { |
118: field.energy0(i, j) = state_energy[state]; |
119: field.density0(i, j) = state_density[state]; |
120: for (int kt = j; kt <= j + 1; ++kt) { |
121: for (int jt = i; jt <= i + 1; ++jt) { |
122: field.xvel0(jt, kt) = state_xvel[state]; |
123: field.yvel0(jt, kt) = state_yvel[state]; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x438f60 PUSH %RBP |
0x438f61 MOV %RSP,%RBP |
0x438f64 PUSH %R15 |
0x438f66 PUSH %R14 |
0x438f68 PUSH %R13 |
0x438f6a PUSH %R12 |
0x438f6c PUSH %RBX |
0x438f6d SUB $0x48,%RSP |
0x438f71 MOV 0x5c(%RDI),%R15D |
0x438f75 MOV 0x58(%RDI),%R12D |
0x438f79 MOV %R15D,-0x44(%RBP) |
0x438f7d TEST %R15D,%R15D |
0x438f80 JLE 439160 |
0x438f86 TEST %R12D,%R12D |
0x438f89 JLE 439160 |
0x438f8f MOV %RDI,%RBX |
0x438f92 CALL 404650 <omp_get_num_threads@plt> |
0x438f97 MOV %EAX,%R14D |
0x438f9a CALL 404540 <omp_get_thread_num@plt> |
0x438f9f XOR %EDX,%EDX |
0x438fa1 MOV %EAX,%ESI |
0x438fa3 MOV %R15D,%EAX |
0x438fa6 IMUL %R12D,%EAX |
0x438faa DIV %R14D |
0x438fad MOV %EAX,%R10D |
0x438fb0 CMP %EDX,%ESI |
0x438fb2 JB 4396fb |
0x438fb8 IMUL %R10D,%ESI |
0x438fbc ADD %EDX,%ESI |
0x438fbe LEA (%R10,%RSI,1),%EDI |
0x438fc2 MOV %EDI,-0x48(%RBP) |
0x438fc5 CMP %EDI,%ESI |
0x438fc7 JAE 439160 |
0x438fcd MOV %ESI,%EAX |
0x438fcf XOR %EDX,%EDX |
0x438fd1 MOVSXD 0x60(%RBX),%R8 |
0x438fd5 VMOVQ 0x28(%RBX),%XMM2 |
0x438fda DIV %R12D |
0x438fdd VMOVQ 0x20(%RBX),%XMM12 |
0x438fe2 VMOVQ 0x40(%RBX),%XMM6 |
0x438fe7 MOV 0x48(%RBX),%R15 |
0x438feb VMOVQ 0x30(%RBX),%XMM3 |
0x438ff0 VMOVQ 0x10(%RBX),%XMM14 |
0x438ff5 MOV 0x50(%RBX),%R11 |
0x438ff9 MOV %R12D,%R9D |
0x438ffc VMOVQ 0x18(%RBX),%XMM13 |
0x439001 MOV %R12D,-0x50(%RBP) |
0x439005 LEA (,%R8,8),%R14 |
0x43900d SAL $0x2,%R8 |
0x439011 VMOVDQA64 %XMM2,%XMM19 |
0x439017 VMOVDQA64 %XMM6,%XMM20 |
0x43901d MOV %R8,-0x58(%RBP) |
0x439021 VMOVDQA %XMM12,%XMM2 |
0x439025 MOV %EAX,-0x38(%RBP) |
0x439028 MOVSXD -0x38(%RBP),%RDI |
0x43902c SUB %EDX,%R9D |
0x43902f MOV %ESI,%EAX |
0x439031 MOV %R9D,%R13D |
0x439034 MOV %RDI,%R8 |
0x439037 NOPW (%RAX,%RAX,1) |
(234) 0x439040 CMP %R13D,%R10D |
(234) 0x439043 CMOVBE %R10D,%R13D |
(234) 0x439047 INC %R8D |
(234) 0x43904a LEA (%RAX,%R13,1),%R9D |
(234) 0x43904e MOV %R13D,%R10D |
(234) 0x439051 CMP %R9D,%EAX |
(234) 0x439054 JAE 439140 |
(234) 0x43905a VMOVQ %XMM2,%RCX |
(234) 0x43905f MOV 0x8(%R15),%RAX |
(234) 0x439063 VMOVQ %XMM3,%R12 |
(234) 0x439068 MOVSXD %EDX,%RDX |
(234) 0x43906b MOV 0x8(%RCX),%R13 |
(234) 0x43906f MOV -0x58(%RBP),%RCX |
(234) 0x439073 MOV 0x8(%R12),%RSI |
(234) 0x439078 DEC %R10D |
(234) 0x43907b LEA (,%RDI,8),%R12 |
(234) 0x439083 MOV (%RAX,%RCX,1),%ECX |
(234) 0x439086 ADD %R14,%R13 |
(234) 0x439089 ADD %R14,%RSI |
(234) 0x43908c LEA 0x1(%RDX),%RAX |
(234) 0x439090 VMOVQ %R13,%XMM7 |
(234) 0x439095 LEA 0x1(%RDX,%R10,1),%R13 |
(234) 0x43909a VMOVQ %RSI,%XMM10 |
(234) 0x43909f VMOVSD (%RSI),%XMM5 |
(234) 0x4390a3 VMOVQ %XMM7,%R10 |
(234) 0x4390a8 VMOVSD (%R10),%XMM4 |
(234) 0x4390ad CMP $0x1,%ECX |
(234) 0x4390b0 JNE 439170 |
(234) 0x4390b6 MOV 0x258(%R11),%R10 |
(234) 0x4390bd VMOVQ %R15,%XMM6 |
(234) 0x4390c2 VMOVQ %XMM19,%RSI |
(234) 0x4390c8 MOV %R9D,-0x38(%RBP) |
(234) 0x4390cc JMP 4390d3 |
0x4390ce XCHG %AX,%AX |
(236) 0x4390d0 INC %RAX |
(236) 0x4390d3 LEA (,%RDX,8),%RCX |
(236) 0x4390db VCOMISD 0x8(%R10,%RDX,8),%XMM4 |
(236) 0x4390e2 JA 439130 |
(236) 0x4390e4 MOV 0x8(%RSI),%R9 |
(236) 0x4390e8 VMOVSD (%R9,%R14,1),%XMM11 |
(236) 0x4390ee VCOMISD (%R10,%RCX,1),%XMM11 |
(236) 0x4390f4 JBE 439130 |
(236) 0x4390f6 MOV 0x278(%R11),%R15 |
(236) 0x4390fd VCOMISD 0x8(%R15,%R12,1),%XMM5 |
(236) 0x439104 JA 439130 |
(236) 0x439106 MOV 0x38(%RBX),%RCX |
(236) 0x43910a VMOVQ 0x8(%RCX),%XMM9 |
(236) 0x43910f VMOVQ %XMM9,%R9 |
(236) 0x439114 VMOVSD (%R9,%R14,1),%XMM15 |
(236) 0x43911a VCOMISD (%R15,%R12,1),%XMM15 |
(236) 0x439120 JA 439530 |
(236) 0x439126 NOPW %CS:(%RAX,%RAX,1) |
(236) 0x439130 MOV %RAX,%RDX |
(236) 0x439133 CMP %R13,%RAX |
(236) 0x439136 JNE 4390d0 |
(234) 0x439138 MOV -0x38(%RBP),%EAX |
(234) 0x43913b VMOVQ %XMM6,%R15 |
(234) 0x439140 INC %RDI |
(234) 0x439143 CMP %R8D,-0x44(%RBP) |
(234) 0x439147 JLE 439160 |
(234) 0x439149 MOV -0x48(%RBP),%R10D |
(234) 0x43914d MOV -0x50(%RBP),%R13D |
(234) 0x439151 XOR %EDX,%EDX |
(234) 0x439153 SUB %EAX,%R10D |
(234) 0x439156 JMP 439040 |
0x43915b NOPL (%RAX,%RAX,1) |
0x439160 ADD $0x48,%RSP |
0x439164 POP %RBX |
0x439165 POP %R12 |
0x439167 POP %R13 |
0x439169 POP %R14 |
0x43916b POP %R15 |
0x43916d POP %RBP |
0x43916e RET |
0x43916f NOP |
(234) 0x439170 MOVSXD %R8D,%R10 |
(234) 0x439173 MOV %R9D,-0x5c(%RBP) |
(234) 0x439177 VMOVQ %R15,%XMM9 |
(234) 0x43917c VMOVDQA64 %XMM20,%XMM11 |
(234) 0x439182 VMOVQ %R10,%XMM15 |
(234) 0x439187 MOV %R12,%R9 |
(234) 0x43918a MOV %R8D,-0x4c(%RBP) |
(234) 0x43918e MOV %RBX,-0x38(%RBP) |
(234) 0x439192 JMP 4391b8 |
0x439194 NOPW %CS:(%RAX,%RAX,1) |
0x43919f NOP |
(235) 0x4391a0 CMP $0x3,%ECX |
(235) 0x4391a3 JE 439380 |
(235) 0x4391a9 MOV %RAX,%RDX |
(235) 0x4391ac CMP %R13,%RAX |
(235) 0x4391af JE 439510 |
(235) 0x4391b5 INC %RAX |
(235) 0x4391b8 CMP $0x2,%ECX |
(235) 0x4391bb JNE 4391a0 |
(235) 0x4391bd MOV 0x218(%R11),%R12 |
(235) 0x4391c4 MOV 0x238(%R11),%R8 |
(235) 0x4391cb VMOVQ %XMM11,%R10 |
(235) 0x4391d0 MOV 0x8(%R10),%RSI |
(235) 0x4391d4 VMOVSD (%R12,%RDX,8),%XMM8 |
(235) 0x4391da VMOVSD (%R8,%R9,1),%XMM0 |
(235) 0x4391e0 VSUBSD %XMM4,%XMM8,%XMM12 |
(235) 0x4391e4 VSUBSD %XMM5,%XMM0,%XMM1 |
(235) 0x4391e8 VMULSD %XMM1,%XMM1,%XMM6 |
(235) 0x4391ec VFMADD132SD %XMM12,%XMM6,%XMM12 |
(235) 0x4391f1 VSQRTSD %XMM12,%XMM12,%XMM12 |
(235) 0x4391f6 VCOMISD (%RSI,%R14,1),%XMM12 |
(235) 0x4391fc JA 4391a9 |
(235) 0x4391fe MOV -0x38(%RBP),%RBX |
(235) 0x439202 MOV 0x30(%R11),%R8 |
(235) 0x439206 MOV 0x40(%R11),%R10 |
(235) 0x43920a VMOVQ 0xb8(%R11),%XMM8 |
(235) 0x439213 VMOVQ 0xe8(%R11),%XMM12 |
(235) 0x43921c MOV 0x8(%RBX),%R15 |
(235) 0x439220 MOV (%RBX),%RSI |
(235) 0x439223 IMUL %RDI,%R8 |
(235) 0x439227 ADD %RDX,%R8 |
(235) 0x43922a MOV 0x8(%R15),%R12 |
(235) 0x43922e MOV (%R11),%R15 |
(235) 0x439231 MOV 0x8(%RSI),%RBX |
(235) 0x439235 VMOVSD (%R12,%R14,1),%XMM4 |
(235) 0x43923b MOV 0x10(%R11),%R12 |
(235) 0x43923f IMUL %RDI,%R15 |
(235) 0x439243 VMOVSD %XMM4,(%R10,%R8,8) |
(235) 0x439249 VMOVSD (%RBX,%R14,1),%XMM5 |
(235) 0x43924f MOV 0xd8(%R11),%RBX |
(235) 0x439256 VMOVQ %XMM14,%R8 |
(235) 0x43925b VMOVQ %XMM13,%R10 |
(235) 0x439260 MOV 0x8(%R8),%R8 |
(235) 0x439264 ADD %RDX,%R15 |
(235) 0x439267 VMOVSD %XMM5,(%R12,%R15,8) |
(235) 0x43926d MOV 0xa8(%R11),%R12 |
(235) 0x439274 MOV 0x8(%R10),%R15 |
(235) 0x439278 MOV %RBX,%RSI |
(235) 0x43927b IMUL %RDI,%RSI |
(235) 0x43927f ADD %R14,%R8 |
(235) 0x439282 VMOVSD (%R8),%XMM0 |
(235) 0x439287 MOV %R12,%R10 |
(235) 0x43928a MOV %RSI,-0x40(%RBP) |
(235) 0x43928e VMOVQ %XMM8,%RSI |
(235) 0x439293 ADD %R14,%R15 |
(235) 0x439296 IMUL %RDI,%R10 |
(235) 0x43929a MOV %R10,-0x68(%RBP) |
(235) 0x43929e ADD %RDX,%R10 |
(235) 0x4392a1 VMOVSD %XMM0,(%RSI,%R10,8) |
(235) 0x4392a7 MOV -0x40(%RBP),%RSI |
(235) 0x4392ab VMOVSD (%R15),%XMM6 |
(235) 0x4392b0 LEA (%RSI,%RDX,1),%R10 |
(235) 0x4392b4 VMOVQ %R10,%XMM4 |
(235) 0x4392b9 VMOVQ %XMM12,%R10 |
(235) 0x4392be VMOVQ %XMM4,%RSI |
(235) 0x4392c3 VMOVSD %XMM6,(%R10,%RSI,8) |
(235) 0x4392c9 MOV -0x68(%RBP),%R10 |
(235) 0x4392cd VMOVQ %XMM8,%RSI |
(235) 0x4392d2 VMOVSD (%R8),%XMM5 |
(235) 0x4392d7 LEA (%R10,%RAX,1),%R10 |
(235) 0x4392db VMOVSD %XMM5,(%RSI,%R10,8) |
(235) 0x4392e1 MOV -0x40(%RBP),%RSI |
(235) 0x4392e5 VMOVQ %XMM12,%R10 |
(235) 0x4392ea VMOVSD (%R15),%XMM0 |
(235) 0x4392ef ADD %RAX,%RSI |
(235) 0x4392f2 VMOVSD %XMM0,(%R10,%RSI,8) |
(235) 0x4392f8 VMOVQ %XMM15,%RSI |
(235) 0x4392fd VMOVSD (%R8),%XMM6 |
(235) 0x439302 IMUL %RSI,%R12 |
(235) 0x439306 IMUL %RSI,%RBX |
(235) 0x43930a VMOVQ %XMM8,%RSI |
(235) 0x43930f MOV %RBX,%R10 |
(235) 0x439312 LEA (%R12,%RDX,1),%RBX |
(235) 0x439316 VMOVSD %XMM6,(%RSI,%RBX,8) |
(235) 0x43931b ADD %R10,%RDX |
(235) 0x43931e VMOVQ %XMM12,%RSI |
(235) 0x439323 VMOVSD (%R15),%XMM4 |
(235) 0x439328 ADD %RAX,%R10 |
(235) 0x43932b VMOVSD %XMM4,(%RSI,%RDX,8) |
(235) 0x439330 LEA (%R12,%RAX,1),%RDX |
(235) 0x439334 VMOVSD (%R8),%XMM5 |
(235) 0x439339 VMOVQ %XMM8,%R8 |
(235) 0x43933e VMOVSD %XMM5,(%R8,%RDX,8) |
(235) 0x439344 MOV %RAX,%RDX |
(235) 0x439347 VMOVSD (%R15),%XMM8 |
(235) 0x43934c VMOVSD %XMM8,(%RSI,%R10,8) |
(235) 0x439352 CMP %R13,%RAX |
(235) 0x439355 JE 439510 |
(235) 0x43935b VMOVQ %XMM7,%R15 |
(235) 0x439360 VMOVQ %XMM10,%R10 |
(235) 0x439365 VMOVSD (%R15),%XMM4 |
(235) 0x43936a VMOVSD (%R10),%XMM5 |
(235) 0x43936f JMP 4391b5 |
0x439374 NOPW %CS:(%RAX,%RAX,1) |
0x43937f NOP |
(235) 0x439380 MOV 0x258(%R11),%RBX |
(235) 0x439387 VCOMISD (%RBX,%RDX,8),%XMM4 |
(235) 0x43938c JNE 4391a9 |
(235) 0x439392 MOV 0x278(%R11),%R15 |
(235) 0x439399 VCOMISD (%R15,%R9,1),%XMM5 |
(235) 0x43939f JNE 4391a9 |
(235) 0x4393a5 MOV -0x38(%RBP),%R8 |
(235) 0x4393a9 MOV 0x30(%R11),%R10 |
(235) 0x4393ad MOV 0x40(%R11),%RBX |
(235) 0x4393b1 VMOVQ 0xd8(%R11),%XMM4 |
(235) 0x4393ba VMOVQ 0xb8(%R11),%XMM6 |
(235) 0x4393c3 VMOVQ 0xe8(%R11),%XMM8 |
(235) 0x4393cc MOV 0x8(%R8),%R12 |
(235) 0x4393d0 MOV (%R8),%R15 |
(235) 0x4393d3 IMUL %RDI,%R10 |
(235) 0x4393d7 ADD %RDX,%R10 |
(235) 0x4393da MOV 0x8(%R12),%RSI |
(235) 0x4393df MOV 0x8(%R15),%R8 |
(235) 0x4393e3 MOV 0x10(%R11),%R12 |
(235) 0x4393e7 VMOVSD (%RSI,%R14,1),%XMM0 |
(235) 0x4393ed MOV (%R11),%RSI |
(235) 0x4393f0 VMOVSD %XMM0,(%RBX,%R10,8) |
(235) 0x4393f6 VMOVSD (%R8,%R14,1),%XMM1 |
(235) 0x4393fc VMOVQ %XMM14,%R10 |
(235) 0x439401 VMOVQ %XMM13,%RBX |
(235) 0x439406 MOV 0x8(%R10),%R15 |
(235) 0x43940a MOV 0x8(%RBX),%R8 |
(235) 0x43940e IMUL %RDI,%RSI |
(235) 0x439412 ADD %R14,%R15 |
(235) 0x439415 ADD %R14,%R8 |
(235) 0x439418 ADD %RDX,%RSI |
(235) 0x43941b VMOVSD %XMM1,(%R12,%RSI,8) |
(235) 0x439421 MOV 0xa8(%R11),%R12 |
(235) 0x439428 VMOVQ %XMM4,%RSI |
(235) 0x43942d VMOVSD (%R15),%XMM17 |
(235) 0x439433 IMUL %RDI,%RSI |
(235) 0x439437 MOV %RSI,-0x40(%RBP) |
(235) 0x43943b VMOVQ %XMM6,%RSI |
(235) 0x439440 MOV %R12,%RBX |
(235) 0x439443 IMUL %RDI,%RBX |
(235) 0x439447 LEA (%RBX,%RDX,1),%R10 |
(235) 0x43944b ADD %RAX,%RBX |
(235) 0x43944e VMOVSD %XMM17,(%RSI,%R10,8) |
(235) 0x439455 MOV -0x40(%RBP),%RSI |
(235) 0x439459 VMOVSD (%R8),%XMM16 |
(235) 0x43945f LEA (%RSI,%RDX,1),%R10 |
(235) 0x439463 VMOVQ %XMM8,%RSI |
(235) 0x439468 VMOVSD %XMM16,(%RSI,%R10,8) |
(235) 0x43946f VMOVQ %XMM6,%RSI |
(235) 0x439474 VMOVSD (%R15),%XMM18 |
(235) 0x43947a MOV -0x40(%RBP),%R10 |
(235) 0x43947e VMOVSD %XMM18,(%RSI,%RBX,8) |
(235) 0x439485 MOVSXD -0x4c(%RBP),%RSI |
(235) 0x439489 VMOVQ %XMM8,%RBX |
(235) 0x43948e VMOVSD (%R8),%XMM21 |
(235) 0x439494 ADD %RAX,%R10 |
(235) 0x439497 IMUL %RSI,%R12 |
(235) 0x43949b VMOVSD %XMM21,(%RBX,%R10,8) |
(235) 0x4394a2 VMOVQ %XMM4,%R10 |
(235) 0x4394a7 VMOVSD (%R15),%XMM0 |
(235) 0x4394ac IMUL %RSI,%R10 |
(235) 0x4394b0 LEA (%R12,%RDX,1),%RBX |
(235) 0x4394b4 ADD %RAX,%R12 |
(235) 0x4394b7 VMOVQ %RBX,%XMM1 |
(235) 0x4394bc VMOVQ %XMM6,%RBX |
(235) 0x4394c1 ADD %R10,%RDX |
(235) 0x4394c4 ADD %RAX,%R10 |
(235) 0x4394c7 VMOVQ %XMM1,%RSI |
(235) 0x4394cc VMOVSD %XMM0,(%RBX,%RSI,8) |
(235) 0x4394d1 VMOVQ %XMM8,%RBX |
(235) 0x4394d6 VMOVSD (%R8),%XMM4 |
(235) 0x4394db VMOVSD %XMM4,(%RBX,%RDX,8) |
(235) 0x4394e0 VMOVQ %XMM6,%RDX |
(235) 0x4394e5 VMOVSD (%R15),%XMM5 |
(235) 0x4394ea VMOVSD %XMM5,(%RDX,%R12,8) |
(235) 0x4394f0 MOV %RAX,%RDX |
(235) 0x4394f3 VMOVSD (%R8),%XMM6 |
(235) 0x4394f8 VMOVSD %XMM6,(%RBX,%R10,8) |
(235) 0x4394fe CMP %R13,%RAX |
(235) 0x439501 JNE 43935b |
(234) 0x439507 NOPW (%RAX,%RAX,1) |
(234) 0x439510 MOV -0x4c(%RBP),%R8D |
(234) 0x439514 MOV -0x5c(%RBP),%EAX |
(234) 0x439517 MOV -0x38(%RBP),%RBX |
(234) 0x43951b VMOVQ %XMM9,%R15 |
(234) 0x439520 JMP 439140 |
0x439525 NOPW %CS:(%RAX,%RAX,1) |
(236) 0x439530 MOV 0x8(%RBX),%R15 |
(236) 0x439534 MOV 0x40(%R11),%R9 |
(236) 0x439538 VMOVQ 0xa8(%R11),%XMM11 |
(236) 0x439541 VMOVQ 0xd8(%R11),%XMM5 |
(236) 0x43954a VMOVQ 0xb8(%R11),%XMM4 |
(236) 0x439553 VMOVQ 0xe8(%R11),%XMM15 |
(236) 0x43955c MOV 0x8(%R15),%RCX |
(236) 0x439560 MOV 0x30(%R11),%R15 |
(236) 0x439564 VMOVSD (%RCX,%R14,1),%XMM12 |
(236) 0x43956a MOV (%RBX),%RCX |
(236) 0x43956d IMUL %RDI,%R15 |
(236) 0x439571 ADD %RDX,%R15 |
(236) 0x439574 VMOVSD %XMM12,(%R9,%R15,8) |
(236) 0x43957a MOV 0x10(%R11),%R9 |
(236) 0x43957e MOV 0x8(%RCX),%R15 |
(236) 0x439582 MOV (%R11),%RCX |
(236) 0x439585 IMUL %RDI,%RCX |
(236) 0x439589 VMOVSD (%R15,%R14,1),%XMM0 |
(236) 0x43958f VMOVQ %XMM14,%R15 |
(236) 0x439594 ADD %RDX,%RCX |
(236) 0x439597 VMOVSD %XMM0,(%R9,%RCX,8) |
(236) 0x43959d MOV 0x8(%R15),%RCX |
(236) 0x4395a1 VMOVQ %XMM13,%R9 |
(236) 0x4395a6 MOV 0x8(%R9),%R15 |
(236) 0x4395aa VMOVQ %XMM11,%R9 |
(236) 0x4395af IMUL %RDI,%R9 |
(236) 0x4395b3 ADD %R14,%RCX |
(236) 0x4395b6 VMOVQ %RCX,%XMM8 |
(236) 0x4395bb VMOVQ %XMM5,%RCX |
(236) 0x4395c0 ADD %R14,%R15 |
(236) 0x4395c3 IMUL %RDI,%RCX |
(236) 0x4395c7 VMOVQ %R15,%XMM1 |
(236) 0x4395cc VMOVQ %XMM8,%R15 |
(236) 0x4395d1 VMOVSD (%R15),%XMM22 |
(236) 0x4395d7 VMOVQ %XMM4,%R15 |
(236) 0x4395dc MOV %RCX,-0x40(%RBP) |
(236) 0x4395e0 LEA (%R9,%RDX,1),%RCX |
(236) 0x4395e4 LEA (%R9,%RAX,1),%R9 |
(236) 0x4395e8 VMOVSD %XMM22,(%R15,%RCX,8) |
(236) 0x4395ef MOV -0x40(%RBP),%RCX |
(236) 0x4395f3 VMOVQ %XMM1,%R15 |
(236) 0x4395f8 VMOVSD (%R15),%XMM23 |
(236) 0x4395fe ADD %RDX,%RCX |
(236) 0x439601 VMOVQ %RCX,%XMM12 |
(236) 0x439606 VMOVQ %XMM15,%RCX |
(236) 0x43960b VMOVQ %XMM12,%R15 |
(236) 0x439610 VMOVSD %XMM23,(%RCX,%R15,8) |
(236) 0x439617 VMOVQ %XMM8,%RCX |
(236) 0x43961c VMOVQ %XMM4,%R15 |
(236) 0x439621 VMOVSD (%RCX),%XMM24 |
(236) 0x439627 VMOVSD %XMM24,(%R15,%R9,8) |
(236) 0x43962e MOV -0x40(%RBP),%R9 |
(236) 0x439632 VMOVQ %XMM1,%RCX |
(236) 0x439637 VMOVQ %XMM15,%R15 |
(236) 0x43963c VMOVSD (%RCX),%XMM25 |
(236) 0x439642 MOVSXD %R8D,%RCX |
(236) 0x439645 ADD %RAX,%R9 |
(236) 0x439648 VMOVSD %XMM25,(%R15,%R9,8) |
(236) 0x43964f VMOVQ %XMM11,%R9 |
(236) 0x439654 VMOVQ %XMM5,%R15 |
(236) 0x439659 IMUL %RCX,%R9 |
(236) 0x43965d IMUL %R15,%RCX |
(236) 0x439661 LEA (%R9,%RDX,1),%R15 |
(236) 0x439665 LEA (%R9,%RAX,1),%R9 |
(236) 0x439669 VMOVQ %R15,%XMM0 |
(236) 0x43966e MOV %RCX,-0x40(%RBP) |
(236) 0x439672 VMOVQ %XMM8,%RCX |
(236) 0x439677 VMOVQ %XMM4,%R15 |
(236) 0x43967c VMOVSD (%RCX),%XMM26 |
(236) 0x439682 VMOVQ %XMM0,%RCX |
(236) 0x439687 VMOVSD %XMM26,(%R15,%RCX,8) |
(236) 0x43968e VMOVQ %XMM1,%R15 |
(236) 0x439693 VMOVQ %XMM15,%RCX |
(236) 0x439698 VMOVSD (%R15),%XMM27 |
(236) 0x43969e MOV -0x40(%RBP),%R15 |
(236) 0x4396a2 ADD %R15,%RDX |
(236) 0x4396a5 ADD %RAX,%R15 |
(236) 0x4396a8 VMOVSD %XMM27,(%RCX,%RDX,8) |
(236) 0x4396af VMOVQ %XMM8,%RDX |
(236) 0x4396b4 VMOVQ %XMM4,%RCX |
(236) 0x4396b9 VMOVSD (%RDX),%XMM8 |
(236) 0x4396bd VMOVQ %XMM1,%RDX |
(236) 0x4396c2 VMOVSD %XMM8,(%RCX,%R9,8) |
(236) 0x4396c8 VMOVQ %XMM15,%R9 |
(236) 0x4396cd VMOVSD (%RDX),%XMM4 |
(236) 0x4396d1 MOV %RAX,%RDX |
(236) 0x4396d4 VMOVSD %XMM4,(%R9,%R15,8) |
(236) 0x4396da CMP %R13,%RAX |
(236) 0x4396dd JE 439138 |
(236) 0x4396e3 VMOVQ %XMM7,%RCX |
(236) 0x4396e8 VMOVQ %XMM10,%R9 |
(236) 0x4396ed VMOVSD (%RCX),%XMM4 |
(236) 0x4396f1 VMOVSD (%R9),%XMM5 |
(236) 0x4396f6 JMP 4390d0 |
0x4396fb INC %R10D |
0x4396fe XOR %EDX,%EDX |
0x439700 JMP 438fb8 |
0x439705 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 80 |
nb uops | 74 |
loop length | 303 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 5.50 | 5.25 | 5.25 | 6.00 | 6.33 | 6.33 | 6.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.00 | 5.50 | 5.25 | 5.25 | 6.00 | 6.33 | 6.33 | 6.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.33 |
Dispatch | 6.33 |
DIV/SQRT | 12.00 |
Overall L1 | 12.33 |
all | 9% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 23% |
all | 10% |
load | 11% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x5c(%RDI),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15D,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %R15D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R12D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R14D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 4396fb <_Z14generate_chunkiR16global_variables._omp_fn.1+0x79b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R10D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R10,%RSI,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVSXD 0x60(%RBX),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ 0x28(%RBX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%RBX),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x40(%RBX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x30(%RBX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%RBX),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x18(%RBX),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R12D,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 %XMM2,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA64 %XMM6,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA %XMM12,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD -0x38(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 438fb8 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 80 |
nb uops | 74 |
loop length | 303 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 5 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 5.50 | 5.25 | 5.25 | 6.00 | 6.33 | 6.33 | 6.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.00 | 5.50 | 5.25 | 5.25 | 6.00 | 6.33 | 6.33 | 6.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.33 |
Dispatch | 6.33 |
DIV/SQRT | 12.00 |
Overall L1 | 12.33 |
all | 9% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 23% |
all | 10% |
load | 11% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x5c(%RDI),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15D,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %R15D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R12D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R14D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 4396fb <_Z14generate_chunkiR16global_variables._omp_fn.1+0x79b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %R10D,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R10,%RSI,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 439160 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVSXD 0x60(%RBX),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ 0x28(%RBX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x20(%RBX),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x40(%RBX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ 0x30(%RBX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%RBX),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R12D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVQ 0x18(%RBX),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R12D,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 %XMM2,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA64 %XMM6,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA %XMM12,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD -0x38(%RBP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %EDX,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 438fb8 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z14generate_chunkiR16global_variables._omp_fn.1– | 0.01 | 0.01 |
▼Loop 234 - generate_chunk.cpp:85-123 - exec– | 0 | 0 |
○Loop 236 - context.h:46-69 - exec | 0.01 | 0.01 |
○Loop 235 - generate_chunk.cpp:88-123 - exec | 0 | 0 |