| Function: std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, ... | Module: attention-gcc-gnr-256 | Source: random.tcc:397-425 [...] | Coverage (incl. loops): 0.20% | (excl. loops): 0.10% |
|---|
| Function: std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, ... | Module: attention-gcc-gnr-256 | Source: random.tcc:397-425 [...] | Coverage (incl. loops): 0.20% | (excl. loops): 0.10% |
|---|
/cluster/comp/gcc/15.1.0/include/c++/15.1.0/bits/random.tcc: 397 - 425 |
-------------------------------------------------------------------------------- |
397: mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, |
[...] |
404: for (size_t __k = 0; __k < (__n - __m); ++__k) |
405: { |
406: _UIntType __y = ((_M_x[__k] & __upper_mask) |
407: | (_M_x[__k + 1] & __lower_mask)); |
408: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
409: ^ ((__y & 0x01) ? __a : 0)); |
410: } |
411: |
412: for (size_t __k = (__n - __m); __k < (__n - 1); ++__k) |
413: { |
414: _UIntType __y = ((_M_x[__k] & __upper_mask) |
415: | (_M_x[__k + 1] & __lower_mask)); |
416: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
417: ^ ((__y & 0x01) ? __a : 0)); |
418: } |
419: |
420: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
421: | (_M_x[0] & __lower_mask)); |
422: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
423: ^ ((__y & 0x01) ? __a : 0)); |
424: _M_p = 0; |
425: } |
0x403140 MOV $-0x80000000,%RCX |
0x403147 MOV $0x7fffffff,%ESI |
0x40314c MOV $0x1,%R9D |
0x403152 MOV $-0x66f74f21,%R8D |
0x403158 LEA 0x700(%RDI),%RDX |
0x40315f MOV %RDI,%RAX |
0x403162 VPBROADCASTQ %RCX,%YMM5 |
0x403168 VPBROADCASTQ %RSI,%YMM4 |
0x40316e VPBROADCASTQ %R9,%YMM3 |
0x403174 VPBROADCASTQ %R8,%YMM2 |
0x40317a NOPW (%RAX,%RAX,1) |
(37) 0x403180 VPANDQ 0x8(%RAX),%YMM4,%YMM1 |
(37) 0x40318a VMOVDQA %YMM5,%YMM0 |
(37) 0x40318e VPTERNLOGQ $-0x14,(%RAX),%YMM1,%YMM0 |
(37) 0x403195 VPSRLQ $0x1,%YMM0,%YMM1 |
(37) 0x40319a VPANDQ %YMM3,%YMM0,%YMM0 |
(37) 0x4031a0 VPMULLQ %YMM2,%YMM0,%YMM0 |
(37) 0x4031a6 ADD $0x20,%RAX |
(37) 0x4031aa VPTERNLOGQ $-0x6a,0xc48(%RAX),%YMM0,%YMM1 |
(37) 0x4031b5 VMOVDQU %YMM1,-0x20(%RAX) |
(37) 0x4031ba CMP %RAX,%RDX |
(37) 0x4031bd JNE 403180 |
0x4031bf MOV 0x710(%RDI),%RAX |
0x4031c6 MOV 0x718(%RDI),%RDX |
0x4031cd VPBROADCASTQ %RSI,%XMM1 |
0x4031d3 VPANDQ 0x708(%RDI),%XMM1,%XMM1 |
0x4031dd VPBROADCASTQ %RCX,%XMM0 |
0x4031e3 AND $0x7fffffff,%EDX |
0x4031e9 AND $-0x80000000,%RAX |
0x4031ef VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 |
0x4031f7 OR %RDX,%RAX |
0x4031fa VPBROADCASTQ %R9,%XMM1 |
0x403200 VPSRLQ $0x1,%XMM0,%XMM2 |
0x403205 MOV %RAX,%RDX |
0x403208 SHR $0x1,%RDX |
0x40320b VPANDQ %XMM1,%XMM0,%XMM0 |
0x403211 AND $0x1,%EAX |
0x403214 VPBROADCASTQ %R8,%XMM1 |
0x40321a VPMULLQ %XMM1,%XMM0,%XMM0 |
0x403220 NEG %RAX |
0x403223 XOR 0x1378(%RDI),%RDX |
0x40322a AND $-0x66f74f21,%EAX |
0x40322f XOR %RDX,%RAX |
0x403232 MOV $-0x80000000,%RDX |
0x403239 VPBROADCASTQ %RDX,%YMM5 |
0x40323f MOV $0x7fffffff,%EDX |
0x403244 VPBROADCASTQ %RDX,%YMM4 |
0x40324a MOV $0x1,%EDX |
0x40324f VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM2 |
0x40325a VPBROADCASTQ %RDX,%YMM3 |
0x403260 MOV $-0x66f74f21,%EDX |
0x403265 MOV %RAX,0x710(%RDI) |
0x40326c VMOVDQU %XMM2,0x700(%RDI) |
0x403274 LEA 0x718(%RDI),%RAX |
0x40327b LEA 0x1378(%RDI),%RCX |
0x403282 VPBROADCASTQ %RDX,%YMM2 |
0x403288 NOPW %CS:(%RAX,%RAX,1) |
0x403293 NOPW %CS:(%RAX,%RAX,1) |
0x40329e NOPW %CS:(%RAX,%RAX,1) |
0x4032a9 NOPW %CS:(%RAX,%RAX,1) |
0x4032b4 NOPW %CS:(%RAX,%RAX,1) |
0x4032bf NOP |
(38) 0x4032c0 VPANDQ 0x8(%RAX),%YMM4,%YMM1 |
(38) 0x4032ca VMOVDQA %YMM5,%YMM0 |
(38) 0x4032ce VPTERNLOGQ $-0x14,(%RAX),%YMM1,%YMM0 |
(38) 0x4032d5 VPSRLQ $0x1,%YMM0,%YMM1 |
(38) 0x4032da VPANDQ %YMM3,%YMM0,%YMM0 |
(38) 0x4032e0 VPMULLQ %YMM2,%YMM0,%YMM0 |
(38) 0x4032e6 ADD $0x20,%RAX |
(38) 0x4032ea VPTERNLOGQ $-0x6a,-0x738(%RAX),%YMM0,%YMM1 |
(38) 0x4032f5 VMOVDQU %YMM1,-0x20(%RAX) |
(38) 0x4032fa CMP %RAX,%RCX |
(38) 0x4032fd JNE 4032c0 |
0x4032ff MOV 0x1378(%RDI),%RAX |
0x403306 MOV (%RDI),%RDX |
0x403309 AND $-0x80000000,%RAX |
0x40330f AND $0x7fffffff,%EDX |
0x403315 OR %RDX,%RAX |
0x403318 MOV %RAX,%RDX |
0x40331b SHR $0x1,%RDX |
0x40331e AND $0x1,%EAX |
0x403321 NEG %RAX |
0x403324 XOR 0xc60(%RDI),%RDX |
0x40332b AND $-0x66f74f21,%EAX |
0x403330 XOR %RDX,%RAX |
0x403333 MOV %RAX,0x1378(%RDI) |
0x40333a MOVQ $0,0x1380(%RDI) |
0x403345 VZEROUPPER |
0x403348 RET |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ○50.00 | main | random.tcc:462 | attention-gcc-gnr-256 |
| ○50.00 | main | random.tcc:462 | attention-gcc-gnr-256 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.10% of application time for run run_0
| Source file and lines | random.tcc:397-425 |
| Module | attention-gcc-gnr-256 |
| nb instructions | 67 |
| nb uops | 74 |
| loop length | 395 |
| used x86 registers | 7 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 12.33 cycles |
| front end | 12.33 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 8.77 | 8.73 | 3.33 | 3.33 | 2.00 | 12.00 | 8.70 | 2.00 | 2.00 | 2.00 | 8.80 | 3.33 |
| cycles | 8.77 | 8.73 | 3.33 | 3.33 | 2.00 | 12.00 | 8.70 | 2.00 | 2.00 | 2.00 | 8.80 | 3.33 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 12.33 |
| Dispatch | 12.00 |
| Overall L1 | 12.33 |
| all | 14% |
| load | 33% |
| store | 25% |
| mul | 100% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 12% |
| all | 12% |
| load | 16% |
| store | 14% |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 12% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV $-0x80000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $0x7fffffff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $0x1,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $-0x66f74f21,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| LEA 0x700(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VPBROADCASTQ %RCX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %RSI,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %R9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %R8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x710(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x718(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VPBROADCASTQ %RSI,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPANDQ 0x708(%RDI),%XMM1,%XMM1 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| VPBROADCASTQ %RCX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| AND $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| AND $-0x80000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 | 2 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| OR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VPBROADCASTQ %R9,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPSRLQ $0x1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 | vect (25.0%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| VPANDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| AND $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %R8,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPMULLQ %XMM1,%XMM0,%XMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 | vect (25.0%) |
| NEG %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| XOR 0x1378(%RDI),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1-2 | 0.33 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV $-0x80000000,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %RDX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %RDX,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM2 | 2 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| VPBROADCASTQ %RDX,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $-0x66f74f21,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV %RAX,0x710(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVDQU %XMM2,0x700(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| LEA 0x718(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| LEA 0x1378(%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VPBROADCASTQ %RDX,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x1378(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV (%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| AND $-0x80000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| AND $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| OR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| NEG %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| XOR 0xc60(%RDI),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1-2 | 0.33 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RAX,0x1378(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOVQ $0,0x1380(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.10% of application time for run run_0
| Source file and lines | random.tcc:397-425 |
| Module | attention-gcc-gnr-256 |
| nb instructions | 67 |
| nb uops | 74 |
| loop length | 395 |
| used x86 registers | 7 |
| used mmx registers | 0 |
| used xmm registers | 3 |
| used ymm registers | 4 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 12.33 cycles |
| front end | 12.33 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 8.77 | 8.73 | 3.33 | 3.33 | 2.00 | 12.00 | 8.70 | 2.00 | 2.00 | 2.00 | 8.80 | 3.33 |
| cycles | 8.77 | 8.73 | 3.33 | 3.33 | 2.00 | 12.00 | 8.70 | 2.00 | 2.00 | 2.00 | 8.80 | 3.33 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 12.33 |
| Dispatch | 12.00 |
| Overall L1 | 12.33 |
| all | 14% |
| load | 33% |
| store | 25% |
| mul | 100% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 12% |
| all | 12% |
| load | 16% |
| store | 14% |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 12% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV $-0x80000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $0x7fffffff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $0x1,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV $-0x66f74f21,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| LEA 0x700(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| VPBROADCASTQ %RCX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %RSI,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %R9,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPBROADCASTQ %R8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x710(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV 0x718(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| VPBROADCASTQ %RSI,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPANDQ 0x708(%RDI),%XMM1,%XMM1 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| VPBROADCASTQ %RCX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| AND $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| AND $-0x80000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 | 2 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| OR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| VPBROADCASTQ %R9,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPSRLQ $0x1,%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 | vect (25.0%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| VPANDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| AND $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %R8,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| VPMULLQ %XMM1,%XMM0,%XMM0 | 5 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 | vect (25.0%) |
| NEG %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| XOR 0x1378(%RDI),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1-2 | 0.33 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV $-0x80000000,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %RDX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPBROADCASTQ %RDX,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM2 | 2 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | vect (25.0%) |
| VPBROADCASTQ %RDX,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| MOV $-0x66f74f21,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (6.3%) |
| MOV %RAX,0x710(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| VMOVDQU %XMM2,0x700(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 | vect (25.0%) |
| LEA 0x718(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| LEA 0x1378(%RDI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| VPBROADCASTQ %RDX,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
| MOV 0x1378(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| MOV (%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 | scal (12.5%) |
| AND $-0x80000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| AND $0x7fffffff,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| OR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 | scal (12.5%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| NEG %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 | scal (12.5%) |
| XOR 0xc60(%RDI),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1-2 | 0.33 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 | scal (12.5%) |
| MOV %RAX,0x1378(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
| MOVQ $0,0x1380(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | scal (6.3%) |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼std::mersenne_twister_engine | 0.20 | 0.02 |
| ○Loop 38 - random.tcc:412-417 - attention-gcc-gnr-256 | 0.10 | 0.01 |
| ○Loop 37 - random.tcc:404-409 - attention-gcc-gnr-256 | 0.00 | 0.00 |
