| Function: std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, ... | Module: attention-gcc-skl256 | Source: random.tcc:397-425 [...] | Coverage (incl. loops): 0.11% | (excl. loops): 0.00% |
|---|
| Function: std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, ... | Module: attention-gcc-skl256 | Source: random.tcc:397-425 [...] | Coverage (incl. loops): 0.11% | (excl. loops): 0.00% |
|---|
/usr/include/c++/16.1.1/bits/random.tcc: 397 - 425 |
-------------------------------------------------------------------------------- |
397: mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, |
[...] |
404: for (size_t __k = 0; __k < (__n - __m); ++__k) |
405: { |
406: _UIntType __y = ((_M_x[__k] & __upper_mask) |
407: | (_M_x[__k + 1] & __lower_mask)); |
408: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
409: ^ ((__y & 0x01) ? __a : 0)); |
410: } |
411: |
412: for (size_t __k = (__n - __m); __k < (__n - 1); ++__k) |
413: { |
414: _UIntType __y = ((_M_x[__k] & __upper_mask) |
415: | (_M_x[__k + 1] & __lower_mask)); |
416: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
417: ^ ((__y & 0x01) ? __a : 0)); |
418: } |
419: |
420: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
421: | (_M_x[0] & __lower_mask)); |
422: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
423: ^ ((__y & 0x01) ? __a : 0)); |
424: _M_p = 0; |
425: } |
0x3380 MOV $-0x66f74f21,%EAX |
0x3385 LEA 0x700(%RDI),%RDX |
0x338c VPBROADCASTQ %RAX,%YMM2 |
0x3392 MOV $0x1,%EAX |
0x3397 VPBROADCASTQ %RAX,%YMM3 |
0x339d MOV $0x7fffffff,%EAX |
0x33a2 VMOVDQA %YMM2,%YMM6 |
0x33a6 VPBROADCASTQ %RAX,%YMM4 |
0x33ac MOV $-0x80000000,%RAX |
0x33b3 VMOVDQA %YMM3,%YMM7 |
0x33b7 VPBROADCASTQ %RAX,%YMM5 |
0x33bd VMOVDQA %YMM4,%YMM8 |
0x33c1 MOV %RDI,%RAX |
0x33c4 VMOVDQA %YMM5,%YMM9 |
0x33c8 NOPL (%RAX,%RAX,1) |
(41) 0x33d0 VPANDQ 0x8(%RAX),%YMM8,%YMM1 |
(41) 0x33da VMOVDQA %YMM9,%YMM0 |
(41) 0x33de ADD $0x20,%RAX |
(41) 0x33e2 VPTERNLOGQ $-0x14,-0x20(%RAX),%YMM1,%YMM0 |
(41) 0x33ea VPSRLQ $0x1,%YMM0,%YMM1 |
(41) 0x33ef VPANDQ %YMM7,%YMM0,%YMM0 |
(41) 0x33f5 VPMULLQ %YMM6,%YMM0,%YMM0 |
(41) 0x33fb VPTERNLOGQ $-0x6a,0xc48(%RAX),%YMM0,%YMM1 |
(41) 0x3406 VMOVDQU %YMM1,-0x20(%RAX) |
(41) 0x340b CMP %RAX,%RDX |
(41) 0x340e JNE 33d0 |
0x3410 VPANDQ 0x708(%RDI),%XMM4,%XMM1 |
0x341a VMOVDQA %XMM5,%XMM0 |
0x341e MOV 0x710(%RDI),%RAX |
0x3425 MOV 0x718(%RDI),%RDX |
0x342c VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 |
0x3434 AND $-0x80000000,%RAX |
0x343a VPSRLQ $0x1,%XMM0,%XMM1 |
0x343f VPANDQ %XMM3,%XMM0,%XMM0 |
0x3445 AND $0x7fffffff,%EDX |
0x344b VPMULLQ %XMM2,%XMM0,%XMM0 |
0x3451 OR %RDX,%RAX |
0x3454 MOV %RAX,%RDX |
0x3457 AND $0x1,%EAX |
0x345a SHR $0x1,%RDX |
0x345d NEG %RAX |
0x3460 XOR 0x1378(%RDI),%RDX |
0x3467 AND $-0x66f74f21,%EAX |
0x346c XOR %RDX,%RAX |
0x346f LEA 0x1378(%RDI),%RDX |
0x3476 VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM1 |
0x3481 MOV %RAX,0x710(%RDI) |
0x3488 LEA 0x718(%RDI),%RAX |
0x348f VMOVDQU %XMM1,0x700(%RDI) |
0x3497 NOPW (%RAX,%RAX,1) |
(42) 0x34a0 VPANDQ 0x8(%RAX),%YMM4,%YMM1 |
(42) 0x34aa VMOVDQA %YMM5,%YMM0 |
(42) 0x34ae ADD $0x20,%RAX |
(42) 0x34b2 VPTERNLOGQ $-0x14,-0x20(%RAX),%YMM1,%YMM0 |
(42) 0x34ba VPSRLQ $0x1,%YMM0,%YMM1 |
(42) 0x34bf VPANDQ %YMM3,%YMM0,%YMM0 |
(42) 0x34c5 VPMULLQ %YMM2,%YMM0,%YMM0 |
(42) 0x34cb VPTERNLOGQ $-0x6a,-0x738(%RAX),%YMM0,%YMM1 |
(42) 0x34d6 VMOVDQU %YMM1,-0x20(%RAX) |
(42) 0x34db CMP %RAX,%RDX |
(42) 0x34de JNE 34a0 |
0x34e0 MOV 0x1378(%RDI),%RAX |
0x34e7 MOV (%RDI),%RDX |
0x34ea MOVQ $0,0x1380(%RDI) |
0x34f5 AND $0x7fffffff,%EDX |
0x34fb AND $-0x80000000,%RAX |
0x3501 OR %RDX,%RAX |
0x3504 MOV %RAX,%RDX |
0x3507 AND $0x1,%EAX |
0x350a SHR $0x1,%RDX |
0x350d NEG %RAX |
0x3510 XOR 0xc60(%RDI),%RDX |
0x3517 AND $-0x66f74f21,%EAX |
0x351c XOR %RDX,%RAX |
0x351f MOV %RAX,0x1378(%RDI) |
0x3526 VZEROUPPER |
0x3529 RET |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __libc_init_first | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | main | new_allocator.h:183 | attention-gcc-skl256 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_0
| Source file and lines | random.tcc:397-425 |
| Module | attention-gcc-skl256 |
| nb instructions | 55 |
| nb uops | 62 |
| loop length | 298 |
| used x86 registers | 3 |
| used mmx registers | 0 |
| used xmm registers | 6 |
| used ymm registers | 8 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 15.50 cycles |
| front end | 15.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 9.58 | 9.42 | 4.83 | 4.50 | 4.00 | 9.50 | 9.50 | 4.67 |
| cycles | 9.58 | 9.42 | 4.83 | 4.50 | 4.00 | 9.50 | 9.50 | 4.67 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 15.50 |
| Dispatch | 9.58 |
| Overall L1 | 15.50 |
| all | 26% |
| load | 33% |
| store | 25% |
| mul | 100% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 27% |
| all | 16% |
| load | 16% |
| store | 14% |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| LEA 0x700(%RDI),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VPBROADCASTQ %RAX,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VPBROADCASTQ %RAX,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $0x7fffffff,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VMOVDQA %YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| VPBROADCASTQ %RAX,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VMOVDQA %YMM3,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| VPBROADCASTQ %RAX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| VMOVDQA %YMM4,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| VMOVDQA %YMM5,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VPANDQ 0x708(%RDI),%XMM4,%XMM1 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMOVDQA %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| MOV 0x710(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0x718(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| AND $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VPSRLQ $0x1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VPANDQ %XMM3,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VPMULLQ %XMM2,%XMM0,%XMM0 | 3 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 | vect (25.0%) |
| OR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| NEG %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| XOR 0x1378(%RDI),%RDX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA 0x1378(%RDI),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM1 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| MOV %RAX,0x710(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| LEA 0x718(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVDQU %XMM1,0x700(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 | vect (25.0%) |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1378(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV (%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOVQ $0,0x1380(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| AND $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| OR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| NEG %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| XOR 0xc60(%RDI),%RDX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,0x1378(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
The code analyzed by CQA in that panel excludes loops and represents 0.00% of application time for run run_0
| Source file and lines | random.tcc:397-425 |
| Module | attention-gcc-skl256 |
| nb instructions | 55 |
| nb uops | 62 |
| loop length | 298 |
| used x86 registers | 3 |
| used mmx registers | 0 |
| used xmm registers | 6 |
| used ymm registers | 8 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 15.50 cycles |
| front end | 15.50 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 9.58 | 9.42 | 4.83 | 4.50 | 4.00 | 9.50 | 9.50 | 4.67 |
| cycles | 9.58 | 9.42 | 4.83 | 4.50 | 4.00 | 9.50 | 9.50 | 4.67 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 15.50 |
| Dispatch | 9.58 |
| Overall L1 | 15.50 |
| all | 26% |
| load | 33% |
| store | 25% |
| mul | 100% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 27% |
| all | 16% |
| load | 16% |
| store | 14% |
| mul | 25% |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOV $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| LEA 0x700(%RDI),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VPBROADCASTQ %RAX,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VPBROADCASTQ %RAX,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $0x7fffffff,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VMOVDQA %YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| VPBROADCASTQ %RAX,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| MOV $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VMOVDQA %YMM3,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| VPBROADCASTQ %RAX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | scal (12.5%) |
| VMOVDQA %YMM4,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| VMOVDQA %YMM5,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (50.0%) |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| VPANDQ 0x708(%RDI),%XMM4,%XMM1 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VMOVDQA %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
| MOV 0x710(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV 0x718(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| VPTERNLOGQ $-0x14,0x700(%RDI),%XMM1,%XMM0 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| AND $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| VPSRLQ $0x1,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| VPANDQ %XMM3,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| VPMULLQ %XMM2,%XMM0,%XMM0 | 3 | 1.50 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 | vect (25.0%) |
| OR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| NEG %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| XOR 0x1378(%RDI),%RDX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| LEA 0x1378(%RDI),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VPTERNLOGQ $-0x6a,0x1368(%RDI),%XMM0,%XMM1 | 2 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 | vect (25.0%) |
| MOV %RAX,0x710(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| LEA 0x718(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
| VMOVDQU %XMM1,0x700(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 | vect (25.0%) |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
| MOV 0x1378(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOV (%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 | scal (12.5%) |
| MOVQ $0,0x1380(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 | scal (6.3%) |
| AND $0x7fffffff,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| AND $-0x80000000,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| OR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (12.5%) |
| AND $0x1,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| SHR $0x1,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 | scal (12.5%) |
| NEG %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| XOR 0xc60(%RDI),%RDX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 | scal (12.5%) |
| AND $-0x66f74f21,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (6.3%) |
| XOR %RDX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 | scal (12.5%) |
| MOV %RAX,0x1378(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 | scal (12.5%) |
| VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 | N/A |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼std::mersenne_twister_engine | 0.11 | 0.03 |
| ○Loop 42 - random.tcc:412-417 - attention-gcc-skl256 | 0.07 | 0.02 |
| ○Loop 41 - random.tcc:404-409 - attention-gcc-skl256 | 0.04 | 0.01 |
