Function: generate_chunk(int, global_variables&) [clone ._omp_fn.1] | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.01% |
---|
Function: generate_chunk(int, global_variables&) [clone ._omp_fn.1] | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/generate_chunk.cpp: 85 - 123 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for simd collapse(2) |
86: for (int j = (0); j < (yrange); j++) { |
87: for (int i = (0); i < (xrange); i++) { |
88: double x_cent = state_xmin[state]; |
89: double y_cent = state_ymin[state]; |
90: if (state_geometry[state] == g_rect) { |
91: if (field.vertexx[i + 1] >= state_xmin[state] && field.vertexx[i] < state_xmax[state]) { |
92: if (field.vertexy[j + 1] >= state_ymin[state] && field.vertexy[j] < state_ymax[state]) { |
93: field.energy0(i, j) = state_energy[state]; |
94: field.density0(i, j) = state_density[state]; |
95: for (int kt = j; kt <= j + 1; ++kt) { |
96: for (int jt = i; jt <= i + 1; ++jt) { |
97: field.xvel0(jt, kt) = state_xvel[state]; |
98: field.yvel0(jt, kt) = state_yvel[state]; |
99: } |
100: } |
101: } |
102: } |
103: } else if (state_geometry[state] == g_circ) { |
104: double radius = |
105: std::sqrt((field.cellx[i] - x_cent) * (field.cellx[i] - x_cent) + (field.celly[j] - y_cent) * (field.celly[j] - y_cent)); |
106: if (radius <= state_radius[state]) { |
107: field.energy0(i, j) = state_energy[state]; |
108: field.density0(i, j) = state_density[state]; |
109: for (int kt = j; kt <= j + 1; ++kt) { |
110: for (int jt = i; jt <= i + 1; ++jt) { |
111: field.xvel0(jt, kt) = state_xvel[state]; |
112: field.yvel0(jt, kt) = state_yvel[state]; |
113: } |
114: } |
115: } |
116: } else if (state_geometry[state] == g_point) { |
117: if (field.vertexx[i] == x_cent && field.vertexy[j] == y_cent) { |
118: field.energy0(i, j) = state_energy[state]; |
119: field.density0(i, j) = state_density[state]; |
120: for (int kt = j; kt <= j + 1; ++kt) { |
121: for (int jt = i; jt <= i + 1; ++jt) { |
122: field.xvel0(jt, kt) = state_xvel[state]; |
123: field.yvel0(jt, kt) = state_yvel[state]; |
0x437070 PUSH %RBP |
0x437071 MOV %RSP,%RBP |
0x437074 PUSH %R15 |
0x437076 PUSH %R14 |
0x437078 PUSH %R13 |
0x43707a PUSH %R12 |
0x43707c PUSH %RBX |
0x43707d SUB $0xa8,%RSP |
0x437084 MOV %RDI,-0x38(%RBP) |
0x437088 MOV 0x5c(%RDI),%R13D |
0x43708c MOV 0x58(%RDI),%R12D |
0x437090 TEST %R13D,%R13D |
0x437093 JLE 437260 |
0x437099 TEST %R12D,%R12D |
0x43709c JLE 437260 |
0x4370a2 CALL 404650 <omp_get_num_threads@plt> |
0x4370a7 MOV %EAX,%EBX |
0x4370a9 CALL 404540 <omp_get_thread_num@plt> |
0x4370ae XOR %EDX,%EDX |
0x4370b0 MOV -0x38(%RBP),%R9 |
0x4370b4 MOV %EAX,%ECX |
0x4370b6 MOV %R13D,%EAX |
0x4370b9 IMUL %R12D,%EAX |
0x4370bd DIV %EBX |
0x4370bf CMP %EDX,%ECX |
0x4370c1 MOV %EAX,%EDI |
0x4370c3 JB 4377a7 |
0x4370c9 IMUL %EDI,%ECX |
0x4370cc LEA (%RCX,%RDX,1),%ESI |
0x4370cf LEA (%RDI,%RSI,1),%R14D |
0x4370d3 CMP %R14D,%ESI |
0x4370d6 JAE 437260 |
0x4370dc MOV %ESI,%EAX |
0x4370de XOR %EDX,%EDX |
0x4370e0 MOVSXD 0x60(%R9),%R10 |
0x4370e4 MOV 0x20(%R9),%R8 |
0x4370e8 DIV %R12D |
0x4370eb MOV 0x30(%R9),%R15 |
0x4370ef MOV 0x48(%R9),%RBX |
0x4370f3 MOV %R12D,-0x7c(%RBP) |
0x4370f7 MOV %R8,-0x38(%RBP) |
0x4370fb MOV %R15,-0x50(%RBP) |
0x4370ff MOV 0x50(%R9),%R15 |
0x437103 MOV %R13D,-0x40(%RBP) |
0x437107 MOV %R9,-0x78(%RBP) |
0x43710b MOV %R14D,-0x80(%RBP) |
0x43710f LEA (,%R10,8),%R11 |
0x437117 LEA (,%R10,4),%R8 |
0x43711f MOV 0x18(%R9),%R10 |
0x437123 MOV %R12D,%ECX |
0x437126 MOV %R11,%R13 |
0x437129 MOV -0x38(%RBP),%R11 |
0x43712d MOV %R10,-0xc0(%RBP) |
0x437134 MOV 0x28(%R9),%R10 |
0x437138 MOV %R10,-0x60(%RBP) |
0x43713c MOV %EAX,-0x3c(%RBP) |
0x43713f MOV 0x10(%R9),%RAX |
0x437143 SUB %EDX,%ECX |
0x437145 MOV %RAX,-0xb8(%RBP) |
0x43714c MOV 0x40(%R9),%RAX |
0x437150 MOV -0x50(%RBP),%R9 |
0x437154 MOV %RAX,-0x48(%RBP) |
0x437158 MOVSXD -0x3c(%RBP),%RAX |
0x43715c MOV %RAX,%R14 |
0x43715f NOP |
(237) 0x437160 CMP %ECX,%EDI |
(237) 0x437162 CMOVBE %EDI,%ECX |
(237) 0x437165 INCL -0x3c(%RBP) |
(237) 0x437168 LEA (%RSI,%RCX,1),%R10D |
(237) 0x43716c CMP %R10D,%ESI |
(237) 0x43716f JAE 437243 |
(237) 0x437175 MOV 0x8(%RBX),%RSI |
(237) 0x437179 MOV 0x8(%R11),%RDI |
(237) 0x43717d LEA (,%R14,8),%RAX |
(237) 0x437185 MOVSXD %EDX,%RDX |
(237) 0x437188 MOV 0x8(%R9),%R12 |
(237) 0x43718c MOV %RAX,-0x38(%RBP) |
(237) 0x437190 DEC %ECX |
(237) 0x437192 LEA 0x1(%RDX),%RAX |
(237) 0x437196 MOV (%RSI,%R8,1),%ESI |
(237) 0x43719a ADD %R13,%RDI |
(237) 0x43719d ADD %RAX,%RCX |
(237) 0x4371a0 ADD %R13,%R12 |
(237) 0x4371a3 MOV %RDI,-0x50(%RBP) |
(237) 0x4371a7 VMOVSD (%RDI),%XMM15 |
(237) 0x4371ab MOV %R12,-0x58(%RBP) |
(237) 0x4371af VMOVSD (%R12),%XMM1 |
(237) 0x4371b5 CMP $0x1,%ESI |
(237) 0x4371b8 JNE 437278 |
(237) 0x4371be MOV %R9,-0x68(%RBP) |
(237) 0x4371c2 MOV 0x258(%R15),%RSI |
(237) 0x4371c9 MOV %R8,-0x70(%RBP) |
(237) 0x4371cd JMP 4371d3 |
0x4371cf NOP |
(239) 0x4371d0 INC %RAX |
(239) 0x4371d3 VCOMISD 0x8(%RSI,%RDX,8),%XMM15 |
(239) 0x4371d9 JA 437230 |
(239) 0x4371db MOV -0x60(%RBP),%R8 |
(239) 0x4371df MOV 0x8(%R8),%R12 |
(239) 0x4371e3 VMOVSD (%R12,%R13,1),%XMM9 |
(239) 0x4371e9 VCOMISD (%RSI,%RDX,8),%XMM9 |
(239) 0x4371ee JBE 437230 |
(239) 0x4371f0 MOV 0x278(%R15),%RDI |
(239) 0x4371f7 MOV -0x38(%RBP),%R9 |
(239) 0x4371fb VCOMISD 0x8(%RDI,%R9,1),%XMM1 |
(239) 0x437202 JA 437230 |
(239) 0x437204 MOV -0x78(%RBP),%R8 |
(239) 0x437208 MOV 0x38(%R8),%R12 |
(239) 0x43720c MOV -0x38(%RBP),%R8 |
(239) 0x437210 MOV 0x8(%R12),%R9 |
(239) 0x437215 VMOVSD (%R9,%R13,1),%XMM10 |
(239) 0x43721b VCOMISD (%RDI,%R8,1),%XMM10 |
(239) 0x437221 JA 4375e0 |
(239) 0x437227 NOPW (%RAX,%RAX,1) |
(239) 0x437230 MOV %RAX,%RDX |
(239) 0x437233 CMP %RCX,%RAX |
(239) 0x437236 JNE 4371d0 |
(237) 0x437238 MOV -0x68(%RBP),%R9 |
(237) 0x43723c MOV -0x70(%RBP),%R8 |
(237) 0x437240 MOV %R10D,%ESI |
(237) 0x437243 MOV -0x3c(%RBP),%EAX |
(237) 0x437246 INC %R14 |
(237) 0x437249 CMP %EAX,-0x40(%RBP) |
(237) 0x43724c JLE 437260 |
(237) 0x43724e MOV -0x80(%RBP),%EDI |
(237) 0x437251 MOV -0x7c(%RBP),%ECX |
(237) 0x437254 XOR %EDX,%EDX |
(237) 0x437256 SUB %ESI,%EDI |
(237) 0x437258 JMP 437160 |
0x43725d NOPL (%RAX) |
0x437260 ADD $0xa8,%RSP |
0x437267 POP %RBX |
0x437268 POP %R12 |
0x43726a POP %R13 |
0x43726c POP %R14 |
0x43726e POP %R15 |
0x437270 POP %RBP |
0x437271 RET |
0x437272 NOPW (%RAX,%RAX,1) |
(237) 0x437278 MOVSXD -0x3c(%RBP),%RDI |
(237) 0x43727c MOV %R8,-0xa8(%RBP) |
(237) 0x437283 MOV %R14,%R12 |
(237) 0x437286 MOV %RCX,%R8 |
(237) 0x437289 MOV %R10D,-0x88(%RBP) |
(237) 0x437290 MOV %RDI,-0xb0(%RBP) |
(237) 0x437297 MOV %R11,-0x90(%RBP) |
(237) 0x43729e MOV %R9,-0x98(%RBP) |
(237) 0x4372a5 MOV %RBX,-0xa0(%RBP) |
(237) 0x4372ac JMP 4372c8 |
0x4372ae XCHG %AX,%AX |
(238) 0x4372b0 CMP $0x3,%ESI |
(238) 0x4372b3 JE 437460 |
(238) 0x4372b9 MOV %RAX,%RDX |
(238) 0x4372bc CMP %R8,%RAX |
(238) 0x4372bf JE 4375b0 |
(238) 0x4372c5 INC %RAX |
(238) 0x4372c8 CMP $0x2,%ESI |
(238) 0x4372cb JNE 4372b0 |
(238) 0x4372cd MOV 0x238(%R15),%RDI |
(238) 0x4372d4 MOV -0x38(%RBP),%RBX |
(238) 0x4372d8 MOV 0x218(%R15),%R9 |
(238) 0x4372df MOV -0x48(%RBP),%RCX |
(238) 0x4372e3 VMOVSD (%RDI,%RBX,1),%XMM12 |
(238) 0x4372e8 VMOVSD (%R9,%RDX,8),%XMM10 |
(238) 0x4372ee MOV 0x8(%RCX),%R14 |
(238) 0x4372f2 VSUBSD %XMM1,%XMM12,%XMM13 |
(238) 0x4372f6 VSUBSD %XMM15,%XMM10,%XMM11 |
(238) 0x4372fb VMULSD %XMM13,%XMM13,%XMM14 |
(238) 0x437300 VFMADD132SD %XMM11,%XMM14,%XMM11 |
(238) 0x437305 VSQRTSD %XMM11,%XMM11,%XMM11 |
(238) 0x43730a VCOMISD (%R14,%R13,1),%XMM11 |
(238) 0x437310 JA 4372b9 |
(238) 0x437312 MOV -0x78(%RBP),%R11 |
(238) 0x437316 MOV 0x30(%R15),%RBX |
(238) 0x43731a MOV 0x40(%R15),%RDI |
(238) 0x43731e MOV 0x8(%R11),%R10 |
(238) 0x437322 IMUL %R12,%RBX |
(238) 0x437326 MOV (%R11),%RCX |
(238) 0x437329 MOV (%R15),%R11 |
(238) 0x43732c MOV 0x8(%R10),%R9 |
(238) 0x437330 MOV 0x8(%RCX),%R14 |
(238) 0x437334 ADD %RDX,%RBX |
(238) 0x437337 IMUL %R12,%R11 |
(238) 0x43733b MOV 0x10(%R15),%R10 |
(238) 0x43733f MOV 0xd8(%R15),%RCX |
(238) 0x437346 VMOVSD (%R9,%R13,1),%XMM15 |
(238) 0x43734c MOV -0xb8(%RBP),%R9 |
(238) 0x437353 VMOVSD %XMM15,(%RDI,%RBX,8) |
(238) 0x437358 MOV 0xa8(%R15),%RDI |
(238) 0x43735f MOV -0xc0(%RBP),%RBX |
(238) 0x437366 ADD %RDX,%R11 |
(238) 0x437369 VMOVSD (%R14,%R13,1),%XMM0 |
(238) 0x43736f MOV 0xe8(%R15),%R14 |
(238) 0x437376 VMOVSD %XMM0,(%R10,%R11,8) |
(238) 0x43737c MOV 0x8(%R9),%R11 |
(238) 0x437380 MOV 0x8(%RBX),%R9 |
(238) 0x437384 MOV %RDI,%RBX |
(238) 0x437387 IMUL %R12,%RBX |
(238) 0x43738b MOV 0xb8(%R15),%R10 |
(238) 0x437392 ADD %R13,%R11 |
(238) 0x437395 ADD %R13,%R9 |
(238) 0x437398 VMOVSD (%R11),%XMM1 |
(238) 0x43739d MOV %RBX,-0x70(%RBP) |
(238) 0x4373a1 MOV %RCX,%RBX |
(238) 0x4373a4 IMUL %R12,%RBX |
(238) 0x4373a8 MOV %RBX,-0x68(%RBP) |
(238) 0x4373ac MOV -0x70(%RBP),%RBX |
(238) 0x4373b0 ADD %RDX,%RBX |
(238) 0x4373b3 VMOVSD %XMM1,(%R10,%RBX,8) |
(238) 0x4373b9 MOV -0x68(%RBP),%RBX |
(238) 0x4373bd VMOVSD (%R9),%XMM2 |
(238) 0x4373c2 ADD %RDX,%RBX |
(238) 0x4373c5 VMOVSD %XMM2,(%R14,%RBX,8) |
(238) 0x4373cb MOV -0x70(%RBP),%RBX |
(238) 0x4373cf VMOVSD (%R11),%XMM3 |
(238) 0x4373d4 ADD %RAX,%RBX |
(238) 0x4373d7 VMOVSD %XMM3,(%R10,%RBX,8) |
(238) 0x4373dd VMOVSD (%R9),%XMM4 |
(238) 0x4373e2 MOV -0x68(%RBP),%RBX |
(238) 0x4373e6 ADD %RAX,%RBX |
(238) 0x4373e9 VMOVSD %XMM4,(%R14,%RBX,8) |
(238) 0x4373ef MOV -0xb0(%RBP),%RBX |
(238) 0x4373f6 VMOVSD (%R11),%XMM5 |
(238) 0x4373fb IMUL %RBX,%RDI |
(238) 0x4373ff IMUL %RBX,%RCX |
(238) 0x437403 LEA (%RDI,%RDX,1),%RBX |
(238) 0x437407 ADD %RAX,%RDI |
(238) 0x43740a VMOVSD %XMM5,(%R10,%RBX,8) |
(238) 0x437410 ADD %RCX,%RDX |
(238) 0x437413 ADD %RAX,%RCX |
(238) 0x437416 VMOVSD (%R9),%XMM6 |
(238) 0x43741b VMOVSD %XMM6,(%R14,%RDX,8) |
(238) 0x437421 MOV %RAX,%RDX |
(238) 0x437424 VMOVSD (%R11),%XMM7 |
(238) 0x437429 VMOVSD %XMM7,(%R10,%RDI,8) |
(238) 0x43742f VMOVSD (%R9),%XMM8 |
(238) 0x437434 VMOVSD %XMM8,(%R14,%RCX,8) |
(238) 0x43743a CMP %R8,%RAX |
(238) 0x43743d JE 4375b0 |
(238) 0x437443 MOV -0x50(%RBP),%R11 |
(238) 0x437447 MOV -0x58(%RBP),%R10 |
(238) 0x43744b VMOVSD (%R11),%XMM15 |
(238) 0x437450 VMOVSD (%R10),%XMM1 |
(238) 0x437455 JMP 4372c5 |
0x43745a NOPW (%RAX,%RAX,1) |
(238) 0x437460 MOV 0x258(%R15),%RBX |
(238) 0x437467 VCOMISD (%RBX,%RDX,8),%XMM15 |
(238) 0x43746c JNE 4372b9 |
(238) 0x437472 MOV 0x278(%R15),%RCX |
(238) 0x437479 MOV -0x38(%RBP),%R9 |
(238) 0x43747d VCOMISD (%RCX,%R9,1),%XMM1 |
(238) 0x437483 JNE 4372b9 |
(238) 0x437489 MOV -0x78(%RBP),%R14 |
(238) 0x43748d MOV 0x30(%R15),%RBX |
(238) 0x437491 MOV 0x40(%R15),%RDI |
(238) 0x437495 MOV 0x8(%R14),%R11 |
(238) 0x437499 IMUL %R12,%RBX |
(238) 0x43749d MOV (%R14),%RCX |
(238) 0x4374a0 MOV (%R15),%R14 |
(238) 0x4374a3 MOV 0x8(%R11),%R10 |
(238) 0x4374a7 MOV 0x8(%RCX),%R9 |
(238) 0x4374ab ADD %RDX,%RBX |
(238) 0x4374ae IMUL %R12,%R14 |
(238) 0x4374b2 MOV 0x10(%R15),%R11 |
(238) 0x4374b6 MOV 0xe8(%R15),%RCX |
(238) 0x4374bd VMOVSD (%R10,%R13,1),%XMM0 |
(238) 0x4374c3 MOV -0xb8(%RBP),%R10 |
(238) 0x4374ca VMOVSD %XMM0,(%RDI,%RBX,8) |
(238) 0x4374cf ADD %RDX,%R14 |
(238) 0x4374d2 MOV -0xc0(%RBP),%RDI |
(238) 0x4374d9 MOV 0xb8(%R15),%RBX |
(238) 0x4374e0 VMOVSD (%R9,%R13,1),%XMM1 |
(238) 0x4374e6 MOV 0x8(%R10),%R9 |
(238) 0x4374ea MOV 0x8(%RDI),%RDI |
(238) 0x4374ee VMOVSD %XMM1,(%R11,%R14,8) |
(238) 0x4374f4 MOV 0xd8(%R15),%R14 |
(238) 0x4374fb MOV 0xa8(%R15),%R11 |
(238) 0x437502 ADD %R13,%R9 |
(238) 0x437505 VMOVSD (%R9),%XMM2 |
(238) 0x43750a ADD %R13,%RDI |
(238) 0x43750d MOV %R14,%R10 |
(238) 0x437510 MOV %R11,-0x70(%RBP) |
(238) 0x437514 IMUL %R12,%R11 |
(238) 0x437518 IMUL %R12,%R10 |
(238) 0x43751c MOV %R10,-0x68(%RBP) |
(238) 0x437520 LEA (%R11,%RDX,1),%R10 |
(238) 0x437524 ADD %RAX,%R11 |
(238) 0x437527 VMOVSD %XMM2,(%RBX,%R10,8) |
(238) 0x43752d VMOVSD (%RDI),%XMM3 |
(238) 0x437531 MOV -0x68(%RBP),%R10 |
(238) 0x437535 ADD %RDX,%R10 |
(238) 0x437538 VMOVSD %XMM3,(%RCX,%R10,8) |
(238) 0x43753e VMOVSD (%R9),%XMM4 |
(238) 0x437543 VMOVSD %XMM4,(%RBX,%R11,8) |
(238) 0x437549 MOV -0x68(%RBP),%R11 |
(238) 0x43754d VMOVSD (%RDI),%XMM5 |
(238) 0x437551 ADD %RAX,%R11 |
(238) 0x437554 VMOVSD %XMM5,(%RCX,%R11,8) |
(238) 0x43755a MOVSXD -0x3c(%RBP),%R10 |
(238) 0x43755e MOV -0x70(%RBP),%R11 |
(238) 0x437562 VMOVSD (%R9),%XMM6 |
(238) 0x437567 IMUL %R10,%R11 |
(238) 0x43756b IMUL %R10,%R14 |
(238) 0x43756f LEA (%R11,%RDX,1),%R10 |
(238) 0x437573 ADD %RAX,%R11 |
(238) 0x437576 VMOVSD %XMM6,(%RBX,%R10,8) |
(238) 0x43757c ADD %R14,%RDX |
(238) 0x43757f ADD %RAX,%R14 |
(238) 0x437582 VMOVSD (%RDI),%XMM7 |
(238) 0x437586 VMOVSD %XMM7,(%RCX,%RDX,8) |
(238) 0x43758b MOV %RAX,%RDX |
(238) 0x43758e VMOVSD (%R9),%XMM8 |
(238) 0x437593 VMOVSD %XMM8,(%RBX,%R11,8) |
(238) 0x437599 VMOVSD (%RDI),%XMM9 |
(238) 0x43759d VMOVSD %XMM9,(%RCX,%R14,8) |
(238) 0x4375a3 CMP %R8,%RAX |
(238) 0x4375a6 JNE 437443 |
(237) 0x4375ac NOPL (%RAX) |
(237) 0x4375b0 MOV -0x88(%RBP),%R10D |
(237) 0x4375b7 MOV -0x90(%RBP),%R11 |
(237) 0x4375be MOV %R12,%R14 |
(237) 0x4375c1 MOV -0x98(%RBP),%R9 |
(237) 0x4375c8 MOV -0xa0(%RBP),%RBX |
(237) 0x4375cf MOV -0xa8(%RBP),%R8 |
(237) 0x4375d6 JMP 437240 |
0x4375db NOPL (%RAX,%RAX,1) |
(239) 0x4375e0 MOV -0x78(%RBP),%R9 |
(239) 0x4375e4 MOV 0x30(%R15),%R8 |
(239) 0x4375e8 MOV 0x8(%R9),%RDI |
(239) 0x4375ec IMUL %R14,%R8 |
(239) 0x4375f0 MOV (%R9),%R9 |
(239) 0x4375f3 MOV 0x8(%RDI),%R12 |
(239) 0x4375f7 MOV 0x40(%R15),%RDI |
(239) 0x4375fb ADD %RDX,%R8 |
(239) 0x4375fe VMOVSD (%R12,%R13,1),%XMM11 |
(239) 0x437604 MOV 0x8(%R9),%R12 |
(239) 0x437608 MOV -0xb8(%RBP),%R9 |
(239) 0x43760f VMOVSD %XMM11,(%RDI,%R8,8) |
(239) 0x437615 MOV (%R15),%R8 |
(239) 0x437618 MOV 0x10(%R15),%RDI |
(239) 0x43761c VMOVSD (%R12,%R13,1),%XMM12 |
(239) 0x437622 MOV 0x8(%R9),%R12 |
(239) 0x437626 IMUL %R14,%R8 |
(239) 0x43762a MOV -0xc0(%RBP),%R9 |
(239) 0x437631 ADD %R13,%R12 |
(239) 0x437634 MOV %R12,-0xa0(%RBP) |
(239) 0x43763b ADD %RDX,%R8 |
(239) 0x43763e VMOVSD %XMM12,(%RDI,%R8,8) |
(239) 0x437644 MOV 0xb8(%R15),%R8 |
(239) 0x43764b MOV 0xa8(%R15),%RDI |
(239) 0x437652 VMOVSD (%R12),%XMM13 |
(239) 0x437658 MOV %R8,-0x90(%RBP) |
(239) 0x43765f MOV 0x8(%R9),%R8 |
(239) 0x437663 MOV 0xe8(%R15),%R9 |
(239) 0x43766a MOV %RDI,-0xc8(%RBP) |
(239) 0x437671 IMUL %R14,%RDI |
(239) 0x437675 ADD %R13,%R8 |
(239) 0x437678 MOV %R8,-0x88(%RBP) |
(239) 0x43767f MOV -0x90(%RBP),%R8 |
(239) 0x437686 MOV %R9,-0x98(%RBP) |
(239) 0x43768d MOV 0xd8(%R15),%R9 |
(239) 0x437694 LEA (%RDI,%RDX,1),%R12 |
(239) 0x437698 ADD %RAX,%RDI |
(239) 0x43769b VMOVSD %XMM13,(%R8,%R12,8) |
(239) 0x4376a1 MOV %R9,-0xb0(%RBP) |
(239) 0x4376a8 IMUL %R14,%R9 |
(239) 0x4376ac MOV -0x88(%RBP),%R8 |
(239) 0x4376b3 VMOVSD (%R8),%XMM14 |
(239) 0x4376b8 LEA (%R9,%RDX,1),%R12 |
(239) 0x4376bc MOV %R9,-0xa8(%RBP) |
(239) 0x4376c3 MOV -0x98(%RBP),%R9 |
(239) 0x4376ca VMOVSD %XMM14,(%R9,%R12,8) |
(239) 0x4376d0 MOV -0xa0(%RBP),%R12 |
(239) 0x4376d7 VMOVSD (%R12),%XMM0 |
(239) 0x4376dd MOV -0x90(%RBP),%R12 |
(239) 0x4376e4 VMOVSD %XMM0,(%R12,%RDI,8) |
(239) 0x4376ea MOV -0xa8(%RBP),%RDI |
(239) 0x4376f1 VMOVSD (%R8),%XMM2 |
(239) 0x4376f6 MOV -0xa0(%RBP),%R8 |
(239) 0x4376fd LEA (%RDI,%RAX,1),%R12 |
(239) 0x437701 MOV -0xc8(%RBP),%RDI |
(239) 0x437708 VMOVSD %XMM2,(%R9,%R12,8) |
(239) 0x43770e MOVSXD -0x3c(%RBP),%R12 |
(239) 0x437712 MOV -0xb0(%RBP),%R9 |
(239) 0x437719 VMOVSD (%R8),%XMM3 |
(239) 0x43771e MOV -0x90(%RBP),%R8 |
(239) 0x437725 IMUL %R12,%RDI |
(239) 0x437729 IMUL %R9,%R12 |
(239) 0x43772d LEA (%RDI,%RDX,1),%R9 |
(239) 0x437731 ADD %RAX,%RDI |
(239) 0x437734 VMOVSD %XMM3,(%R8,%R9,8) |
(239) 0x43773a MOV -0x88(%RBP),%R9 |
(239) 0x437741 ADD %R12,%RDX |
(239) 0x437744 MOV -0x90(%RBP),%R8 |
(239) 0x43774b LEA (%R12,%RAX,1),%R12 |
(239) 0x43774f VMOVSD (%R9),%XMM4 |
(239) 0x437754 MOV -0x98(%RBP),%R9 |
(239) 0x43775b VMOVSD %XMM4,(%R9,%RDX,8) |
(239) 0x437761 MOV -0xa0(%RBP),%RDX |
(239) 0x437768 VMOVSD (%RDX),%XMM5 |
(239) 0x43776c MOV %RAX,%RDX |
(239) 0x43776f VMOVSD %XMM5,(%R8,%RDI,8) |
(239) 0x437775 MOV -0x88(%RBP),%RDI |
(239) 0x43777c VMOVSD (%RDI),%XMM6 |
(239) 0x437780 VMOVSD %XMM6,(%R9,%R12,8) |
(239) 0x437786 CMP %RCX,%RAX |
(239) 0x437789 JE 437238 |
(239) 0x43778f MOV -0x50(%RBP),%R12 |
(239) 0x437793 MOV -0x58(%RBP),%R9 |
(239) 0x437797 VMOVSD (%R12),%XMM15 |
(239) 0x43779d VMOVSD (%R9),%XMM1 |
(239) 0x4377a2 JMP 4371d0 |
0x4377a7 INC %EDI |
0x4377a9 XOR %EDX,%EDX |
0x4377ab JMP 4370c9 |
Path / |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 82 |
nb uops | 90 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
cycles | 3.70 | 10.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.48-14.54 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x5c(%RDI),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 4377a7 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x737> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RSI,1),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD 0x60(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R9),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0x30(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%R9),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,4),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4370c9 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x59> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 82 |
nb uops | 90 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
cycles | 3.70 | 10.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.48-14.54 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x5c(%RDI),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 4377a7 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x737> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RSI,1),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 437260 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD 0x60(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R9),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0x30(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%R9),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,4),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4370c9 <_Z14generate_chunkiR16global_variables._omp_fn.1+0x59> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼generate_chunk(int, global_variables&) [clone ._omp_fn.1]– | 0.01 | 0.01 |
▼Loop 237 - generate_chunk.cpp:85-123 - exec– | 0 | 0 |
○Loop 239 - context.h:46-69 - exec | 0.01 | 0.01 |
○Loop 238 - generate_chunk.cpp:88-123 - exec | 0 | 0 |