Loop Id: 351 | Module: exec | Source: generate_chunk_kernel.f90:87-163 [...] | Coverage: 0.04% |
---|
Loop Id: 351 | Module: exec | Source: generate_chunk_kernel.f90:87-163 [...] | Coverage: 0.04% |
---|
0x442fc0 MOV -0x60(%RBP),%R8D |
0x442fc4 INC %R8D |
0x442fc7 MOV -0x138(%RBP),%RDI |
0x442fce INC %RDI |
0x442fd1 MOV -0x58(%RBP),%RSI |
0x442fd5 INC %RSI |
0x442fd8 MOV -0x130(%RBP),%RAX |
0x442fdf CMP %RAX,%RDI |
0x442fe2 MOV -0xd0(%RBP),%ECX |
0x442fe8 JE 442f00 |
0x442fee MOVSXD %R8D,%RDX |
0x442ff1 LEA (,%RDX,8),%R10 |
0x442ff9 CMP %RDX,%RSI |
0x442ffc MOV %RDX,%R9 |
0x442fff CMOVG %RSI,%R9 |
0x443003 SUB %RDX,%R9 |
0x443006 INC %R9 |
0x443009 SHR $0x3,%R9 |
0x44300d NEG %R9 |
0x443010 MOV -0x128(%RBP),%RAX |
0x443017 ADD %RDI,%RAX |
0x44301a MOV %RAX,-0x38(%RBP) |
0x44301e CMP $0x2,%ECX |
0x443021 MOV %R8D,-0x60(%RBP) |
0x443025 MOV %RSI,-0x58(%RBP) |
0x443029 MOV %RDI,-0x138(%RBP) |
0x443030 JGE 4431c0 |
0x443036 CMP $0x1,%ECX |
0x443039 JNE 442fc0 |
0x44303b MOV -0x38(%RBP),%RAX |
0x44303f LEA 0x1(%RAX),%R8 |
0x443043 MOV %R8,%RCX |
0x443046 SUB %RBX,%RCX |
0x443049 MOV 0xb8(%RBP),%RAX |
0x443050 VMOVSD (%RAX,%RCX,8),%XMM0 |
0x443055 MOV 0x50(%RBP),%RAX |
0x443059 MOV -0x50(%RBP),%RCX |
0x44305d VUCOMISD -0x8(%RAX,%RCX,8),%XMM0 |
0x443063 JB 442fc0 |
0x443069 MOV -0x38(%RBP),%RSI |
0x44306d SUB %RBX,%RSI |
0x443070 MOV 0x48(%RBP),%RAX |
0x443074 MOV -0x50(%RBP),%RCX |
0x443078 VMOVSD -0x8(%RAX,%RCX,8),%XMM0 |
0x44307e MOV 0xb8(%RBP),%RAX |
0x443085 VUCOMISD (%RAX,%RSI,8),%XMM0 |
0x44308a JBE 442fc0 |
0x443090 MOV %RSI,%R14 |
0x443093 MOVSXD -0x64(%RBP),%RCX |
0x443097 MOV %RCX,%RAX |
0x44309a MOV %RCX,-0x38(%RBP) |
0x44309e SUB -0x30(%RBP),%RCX |
0x4430a2 MOV 0xb0(%RBP),%RAX |
0x4430a9 VMOVSD 0x8(%RAX,%RCX,8),%XMM0 |
0x4430af MOV 0x40(%RBP),%RAX |
0x4430b3 MOV -0x50(%RBP),%RSI |
0x4430b7 VUCOMISD -0x8(%RAX,%RSI,8),%XMM0 |
0x4430bd JB 442fc0 |
0x4430c3 MOV 0x38(%RBP),%RAX |
0x4430c7 MOV -0x50(%RBP),%RSI |
0x4430cb VMOVSD -0x8(%RAX,%RSI,8),%XMM0 |
0x4430d1 MOV 0xb0(%RBP),%RAX |
0x4430d8 VUCOMISD (%RAX,%RCX,8),%XMM0 |
0x4430dd JBE 442fc0 |
0x4430e3 MOV 0xe0(%RBP),%RSI |
0x4430ea MOV (%RSI),%RDI |
0x4430ed IMUL %RCX,%RDI |
0x4430f1 ADD 0x90(%RBP),%RDI |
0x4430f8 MOV 0x68(%RBP),%RSI |
0x4430fc MOV -0x50(%RBP),%R11 |
0x443100 VMOVSD -0x8(%RSI,%R11,8),%XMM0 |
0x443107 VMOVSD %XMM0,(%RDI,%R14,8) |
0x44310d MOV 0xf0(%RBP),%RSI |
0x443114 IMUL (%RSI),%RCX |
0x443118 ADD 0x98(%RBP),%RCX |
0x44311f MOV 0x70(%RBP),%RSI |
0x443123 VMOVSD -0x8(%RSI,%R11,8),%XMM0 |
0x44312a MOV 0x88(%RBP),%RSI |
0x443131 VMOVSD %XMM0,(%RCX,%R14,8) |
0x443137 MOV 0x60(%RBP),%RAX |
0x44313b VMOVSD -0x8(%RAX,%R11,8),%XMM0 |
0x443142 MOV 0x100(%RBP),%RAX |
0x443149 MOV (%RAX),%R13 |
0x44314c MOV 0x58(%RBP),%RAX |
0x443150 VMOVSD -0x8(%RAX,%R11,8),%XMM1 |
0x443157 MOV 0x110(%RBP),%RAX |
0x44315e MOV (%RAX),%R11 |
0x443161 MOV -0xb0(%RBP),%RCX |
0x443168 MOV %RCX,%R14 |
0x44316b IMUL %R11,%R14 |
0x44316f MOV -0xa8(%RBP),%RAX |
0x443176 ADD %R10,%RAX |
0x443179 ADD %RAX,%R14 |
0x44317c MOV %RCX,%RAX |
0x44317f IMUL %R13,%RAX |
0x443183 ADD -0xa0(%RBP),%R10 |
0x44318a ADD %RAX,%R10 |
0x44318d VBROADCASTSD %XMM0,%ZMM2 |
0x443193 VBROADCASTSD %XMM1,%ZMM3 |
0x443199 XOR %R15D,%R15D |
0x44319c MOV %R11,-0x40(%RBP) |
0x4431a0 JMP 443673 |
0x4431c0 JE 443800 |
0x4431c6 CMP $0x3,%ECX |
0x4431c9 JNE 442fc0 |
0x4431cf MOV -0x38(%RBP),%RCX |
0x4431d3 SUB %RBX,%RCX |
0x4431d6 MOV 0xb8(%RBP),%RAX |
0x4431dd VMOVSD (%RAX,%RCX,8),%XMM0 |
0x4431e2 MOV -0xc0(%RBP),%RSI |
0x4431e9 VUCOMISD (%RSI),%XMM0 |
0x4431ed JNE 442fc0 |
0x4431f3 JP 442fc0 |
0x4431f9 MOVSXD -0x64(%RBP),%RDI |
0x4431fd MOV %RDI,%RAX |
0x443200 MOV %RDI,-0x40(%RBP) |
0x443204 SUB -0x30(%RBP),%RDI |
0x443208 MOV 0xb0(%RBP),%RAX |
0x44320f VMOVSD (%RAX,%RDI,8),%XMM0 |
0x443214 MOV -0xb8(%RBP),%RSI |
0x44321b VUCOMISD (%RSI),%XMM0 |
0x44321f JNE 442fc0 |
0x443225 JP 442fc0 |
0x44322b MOV 0xe0(%RBP),%RSI |
0x443232 MOV (%RSI),%RSI |
0x443235 IMUL %RDI,%RSI |
0x443239 ADD 0x90(%RBP),%RSI |
0x443240 MOV 0x68(%RBP),%R8 |
0x443244 MOV -0x50(%RBP),%R11 |
0x443248 VMOVSD -0x8(%R8,%R11,8),%XMM0 |
0x44324f VMOVSD %XMM0,(%RSI,%RCX,8) |
0x443254 MOV 0xf0(%RBP),%RSI |
0x44325b IMUL (%RSI),%RDI |
0x44325f ADD 0x98(%RBP),%RDI |
0x443266 MOV 0x70(%RBP),%RSI |
0x44326a VMOVSD -0x8(%RSI,%R11,8),%XMM0 |
0x443271 MOV 0x88(%RBP),%RSI |
0x443278 VMOVSD %XMM0,(%RDI,%RCX,8) |
0x44327d MOV 0x60(%RBP),%RCX |
0x443281 VMOVSD -0x8(%RCX,%R11,8),%XMM0 |
0x443288 MOV 0x100(%RBP),%RCX |
0x44328f MOV (%RCX),%R12 |
0x443292 MOV 0x58(%RBP),%RAX |
0x443296 VMOVSD -0x8(%RAX,%R11,8),%XMM1 |
0x44329d MOV 0x110(%RBP),%RCX |
0x4432a4 MOV (%RCX),%R11 |
0x4432a7 MOV -0xb0(%RBP),%RDI |
0x4432ae MOV %RDI,%R14 |
0x4432b1 IMUL %R11,%R14 |
0x4432b5 MOV -0xa8(%RBP),%RAX |
0x4432bc ADD %RAX,%R14 |
0x4432bf ADD %R10,%R14 |
0x4432c2 MOV %RDI,%RCX |
0x4432c5 IMUL %R12,%RCX |
0x4432c9 ADD -0xa0(%RBP),%R10 |
0x4432d0 ADD %RCX,%R10 |
0x4432d3 VBROADCASTSD %XMM0,%ZMM2 |
0x4432d9 VBROADCASTSD %XMM1,%ZMM3 |
0x4432df XOR %R15D,%R15D |
0x4432e2 JMP 4433f0 |
(354) 0x443300 MOV -0x40(%RBP),%RAX |
(354) 0x443304 LEA (%R15,%RAX,1),%R8 |
(354) 0x443308 SUB -0x30(%RBP),%R8 |
(354) 0x44330c MOV %R12,%R13 |
(354) 0x44330f IMUL %R8,%R13 |
(354) 0x443313 ADD %RDX,%RDI |
(354) 0x443316 IMUL %R11,%R8 |
(354) 0x44331a MOV 0x80(%RBP),%RAX |
(354) 0x443321 LEA (%RSI,%R13,1),%RCX |
(354) 0x443325 MOV %RDI,%RSI |
(354) 0x443328 SUB %RBX,%RSI |
(354) 0x44332b VMOVSD %XMM0,0x28(%RCX,%RSI,8) |
(354) 0x443331 LEA (%RAX,%R8,1),%RCX |
(354) 0x443335 VMOVSD %XMM1,0x28(%RCX,%RSI,8) |
(354) 0x44333b MOV 0x88(%RBP),%RSI |
(354) 0x443342 LEA (%RSI,%R13,1),%RCX |
(354) 0x443346 MOV %RDI,%RSI |
(354) 0x443349 SUB %RBX,%RSI |
(354) 0x44334c VMOVSD %XMM0,0x20(%RCX,%RSI,8) |
(354) 0x443352 LEA (%RAX,%R8,1),%RCX |
(354) 0x443356 VMOVSD %XMM1,0x20(%RCX,%RSI,8) |
(354) 0x44335c MOV 0x88(%RBP),%RSI |
(354) 0x443363 LEA (%RSI,%R13,1),%RCX |
(354) 0x443367 MOV %RDI,%RSI |
(354) 0x44336a SUB %RBX,%RSI |
(354) 0x44336d VMOVSD %XMM0,0x18(%RCX,%RSI,8) |
(354) 0x443373 LEA (%RAX,%R8,1),%RCX |
(354) 0x443377 VMOVSD %XMM1,0x18(%RCX,%RSI,8) |
(354) 0x44337d MOV 0x88(%RBP),%RSI |
(354) 0x443384 LEA (%RSI,%R13,1),%RCX |
(354) 0x443388 MOV %RDI,%RSI |
(354) 0x44338b SUB %RBX,%RSI |
(354) 0x44338e VMOVSD %XMM0,0x10(%RCX,%RSI,8) |
(354) 0x443394 LEA (%RAX,%R8,1),%RCX |
(354) 0x443398 VMOVSD %XMM1,0x10(%RCX,%RSI,8) |
(354) 0x44339e MOV 0x88(%RBP),%RSI |
(354) 0x4433a5 LEA (%RSI,%R13,1),%RCX |
(354) 0x4433a9 MOV %RDI,%RSI |
(354) 0x4433ac SUB %RBX,%RSI |
(354) 0x4433af VMOVSD %XMM0,0x8(%RCX,%RSI,8) |
(354) 0x4433b5 LEA (%RAX,%R8,1),%RCX |
(354) 0x4433b9 VMOVSD %XMM1,0x8(%RCX,%RSI,8) |
(354) 0x4433bf MOV 0x88(%RBP),%RSI |
(354) 0x4433c6 ADD %RSI,%R13 |
(354) 0x4433c9 SUB %RBX,%RDI |
(354) 0x4433cc VMOVSD %XMM0,(%R13,%RDI,8) |
(354) 0x4433d3 ADD %RAX,%R8 |
(354) 0x4433d6 VMOVSD %XMM1,(%R8,%RDI,8) |
(354) 0x4433dc ADD %R11,%R14 |
(354) 0x4433df ADD %R12,%R10 |
(354) 0x4433e2 CMP $0x1,%R15 |
(354) 0x4433e6 LEA 0x1(%R15),%R15 |
(354) 0x4433ea JE 442fc0 |
(354) 0x4433f0 MOV -0x38(%RBP),%RAX |
(354) 0x4433f4 LEA 0x1(%RAX),%RCX |
(354) 0x4433f8 CMP %RDX,%RCX |
(354) 0x4433fb CMOVLE %RDX,%RCX |
(354) 0x4433ff SUB %RDX,%RCX |
(354) 0x443402 LEA 0x1(%RCX),%RDI |
(354) 0x443406 CMP $0x8,%RDI |
(354) 0x44340a JB 443437 |
(354) 0x44340c MOV %R9,%R8 |
(354) 0x44340f XOR %R13D,%R13D |
(354) 0x443412 NOPW %CS:(%RAX,%RAX,1) |
(355) 0x443420 VMOVUPD %ZMM2,(%R10,%R13,1) |
(355) 0x443427 VMOVUPD %ZMM3,(%R14,%R13,1) |
(355) 0x44342e ADD $0x40,%R13 |
(355) 0x443432 INC %R8 |
(355) 0x443435 JNE 443420 |
(354) 0x443437 AND $-0x8,%RDI |
(354) 0x44343b SUB %RDI,%RCX |
(354) 0x44343e CMP $0x3,%RCX |
(354) 0x443442 JGE 443480 |
(354) 0x443444 TEST %RCX,%RCX |
(354) 0x443447 JLE 443540 |
(354) 0x44344d MOV -0x40(%RBP),%RAX |
(354) 0x443451 LEA (%R15,%RAX,1),%R8 |
(354) 0x443455 SUB -0x30(%RBP),%R8 |
(354) 0x443459 MOV %R12,%R13 |
(354) 0x44345c IMUL %R8,%R13 |
(354) 0x443460 ADD %RDX,%RDI |
(354) 0x443463 IMUL %R11,%R8 |
(354) 0x443467 CMP $0x1,%RCX |
(354) 0x44346b MOV 0x80(%RBP),%RAX |
(354) 0x443472 JNE 443384 |
(354) 0x443478 JMP 4433a5 |
(354) 0x443480 CMP $0x5,%RCX |
(354) 0x443484 JGE 4434c0 |
(354) 0x443486 MOV -0x40(%RBP),%RAX |
(354) 0x44348a LEA (%R15,%RAX,1),%R8 |
(354) 0x44348e SUB -0x30(%RBP),%R8 |
(354) 0x443492 MOV %R12,%R13 |
(354) 0x443495 IMUL %R8,%R13 |
(354) 0x443499 ADD %RDX,%RDI |
(354) 0x44349c IMUL %R11,%R8 |
(354) 0x4434a0 CMP $0x4,%RCX |
(354) 0x4434a4 MOV 0x80(%RBP),%RAX |
(354) 0x4434ab JE 443342 |
(354) 0x4434b1 JMP 443363 |
(354) 0x4434c0 JE 443300 |
(354) 0x4434c6 CMP $0x6,%RCX |
(354) 0x4434ca JNE 4433dc |
(354) 0x4434d0 MOV -0x40(%RBP),%RAX |
(354) 0x4434d4 LEA (%R15,%RAX,1),%R8 |
(354) 0x4434d8 SUB -0x30(%RBP),%R8 |
(354) 0x4434dc MOV %R12,%R13 |
(354) 0x4434df IMUL %R8,%R13 |
(354) 0x4434e3 LEA (%RSI,%R13,1),%RCX |
(354) 0x4434e7 ADD %RDX,%RDI |
(354) 0x4434ea MOV %RDI,%RSI |
(354) 0x4434ed SUB %RBX,%RSI |
(354) 0x4434f0 VMOVSD %XMM0,0x30(%RCX,%RSI,8) |
(354) 0x4434f6 IMUL %R11,%R8 |
(354) 0x4434fa MOV 0x80(%RBP),%RAX |
(354) 0x443501 LEA (%RAX,%R8,1),%RCX |
(354) 0x443505 VMOVSD %XMM1,0x30(%RCX,%RSI,8) |
(354) 0x44350b MOV 0x88(%RBP),%RSI |
(354) 0x443512 JMP 443321 |
(354) 0x443540 JNE 4433dc |
(354) 0x443546 MOV -0x40(%RBP),%RAX |
(354) 0x44354a LEA (%R15,%RAX,1),%R8 |
(354) 0x44354e SUB -0x30(%RBP),%R8 |
(354) 0x443552 MOV %R12,%R13 |
(354) 0x443555 IMUL %R8,%R13 |
(354) 0x443559 ADD %RDX,%RDI |
(354) 0x44355c IMUL %R11,%R8 |
(354) 0x443560 MOV 0x80(%RBP),%RAX |
(354) 0x443567 JMP 4433c6 |
(356) 0x443580 MOV -0x38(%RBP),%RAX |
(356) 0x443584 LEA (%R15,%RAX,1),%RDI |
(356) 0x443588 SUB -0x30(%RBP),%RDI |
(356) 0x44358c MOV %R13,%RAX |
(356) 0x44358f IMUL %RDI,%RAX |
(356) 0x443593 ADD %RDX,%R12 |
(356) 0x443596 IMUL %R11,%RDI |
(356) 0x44359a MOV 0x80(%RBP),%R11 |
(356) 0x4435a1 LEA (%RSI,%RAX,1),%RCX |
(356) 0x4435a5 MOV %R12,%RSI |
(356) 0x4435a8 SUB %RBX,%RSI |
(356) 0x4435ab VMOVSD %XMM0,0x28(%RCX,%RSI,8) |
(356) 0x4435b1 LEA (%R11,%RDI,1),%RCX |
(356) 0x4435b5 VMOVSD %XMM1,0x28(%RCX,%RSI,8) |
(356) 0x4435bb MOV 0x88(%RBP),%RSI |
(356) 0x4435c2 LEA (%RSI,%RAX,1),%RCX |
(356) 0x4435c6 MOV %R12,%RSI |
(356) 0x4435c9 SUB %RBX,%RSI |
(356) 0x4435cc VMOVSD %XMM0,0x20(%RCX,%RSI,8) |
(356) 0x4435d2 LEA (%R11,%RDI,1),%RCX |
(356) 0x4435d6 VMOVSD %XMM1,0x20(%RCX,%RSI,8) |
(356) 0x4435dc MOV 0x88(%RBP),%RSI |
(356) 0x4435e3 LEA (%RSI,%RAX,1),%RCX |
(356) 0x4435e7 MOV %R12,%RSI |
(356) 0x4435ea SUB %RBX,%RSI |
(356) 0x4435ed VMOVSD %XMM0,0x18(%RCX,%RSI,8) |
(356) 0x4435f3 LEA (%R11,%RDI,1),%RCX |
(356) 0x4435f7 VMOVSD %XMM1,0x18(%RCX,%RSI,8) |
(356) 0x4435fd MOV 0x88(%RBP),%RSI |
(356) 0x443604 LEA (%RSI,%RAX,1),%RCX |
(356) 0x443608 MOV %R12,%RSI |
(356) 0x44360b SUB %RBX,%RSI |
(356) 0x44360e VMOVSD %XMM0,0x10(%RCX,%RSI,8) |
(356) 0x443614 LEA (%R11,%RDI,1),%RCX |
(356) 0x443618 VMOVSD %XMM1,0x10(%RCX,%RSI,8) |
(356) 0x44361e MOV 0x88(%RBP),%RSI |
(356) 0x443625 LEA (%RSI,%RAX,1),%RCX |
(356) 0x443629 MOV %R12,%RSI |
(356) 0x44362c SUB %RBX,%RSI |
(356) 0x44362f VMOVSD %XMM0,0x8(%RCX,%RSI,8) |
(356) 0x443635 LEA (%R11,%RDI,1),%RCX |
(356) 0x443639 VMOVSD %XMM1,0x8(%RCX,%RSI,8) |
(356) 0x44363f MOV 0x88(%RBP),%RSI |
(356) 0x443646 ADD %RSI,%RAX |
(356) 0x443649 SUB %RBX,%R12 |
(356) 0x44364c VMOVSD %XMM0,(%RAX,%R12,8) |
(356) 0x443652 ADD %R11,%RDI |
(356) 0x443655 VMOVSD %XMM1,(%RDI,%R12,8) |
(356) 0x44365b MOV -0x40(%RBP),%R11 |
(356) 0x44365f ADD %R11,%R14 |
(356) 0x443662 ADD %R13,%R10 |
(356) 0x443665 CMP $0x1,%R15 |
(356) 0x443669 LEA 0x1(%R15),%R15 |
(356) 0x44366d JE 442fc0 |
(356) 0x443673 CMP %RDX,%R8 |
(356) 0x443676 MOV %RDX,%RCX |
(356) 0x443679 CMOVG %R8,%RCX |
(356) 0x44367d SUB %RDX,%RCX |
(356) 0x443680 LEA 0x1(%RCX),%R12 |
(356) 0x443684 CMP $0x8,%R12 |
(356) 0x443688 JB 4436a7 |
(356) 0x44368a MOV %R9,%RAX |
(356) 0x44368d XOR %EDI,%EDI |
(356) 0x44368f NOP |
(357) 0x443690 VMOVUPD %ZMM2,(%R10,%RDI,1) |
(357) 0x443697 VMOVUPD %ZMM3,(%R14,%RDI,1) |
(357) 0x44369e ADD $0x40,%RDI |
(357) 0x4436a2 INC %RAX |
(357) 0x4436a5 JNE 443690 |
(356) 0x4436a7 AND $-0x8,%R12 |
(356) 0x4436ab SUB %R12,%RCX |
(356) 0x4436ae CMP $0x3,%RCX |
(356) 0x4436b2 JGE 443700 |
(356) 0x4436b4 TEST %RCX,%RCX |
(356) 0x4436b7 JLE 4437c0 |
(356) 0x4436bd MOV -0x38(%RBP),%RAX |
(356) 0x4436c1 LEA (%R15,%RAX,1),%RDI |
(356) 0x4436c5 SUB -0x30(%RBP),%RDI |
(356) 0x4436c9 MOV %R13,%RAX |
(356) 0x4436cc IMUL %RDI,%RAX |
(356) 0x4436d0 ADD %RDX,%R12 |
(356) 0x4436d3 IMUL %R11,%RDI |
(356) 0x4436d7 CMP $0x1,%RCX |
(356) 0x4436db MOV 0x80(%RBP),%R11 |
(356) 0x4436e2 JNE 443604 |
(356) 0x4436e8 JMP 443625 |
(356) 0x443700 CMP $0x5,%RCX |
(356) 0x443704 JGE 443740 |
(356) 0x443706 MOV -0x38(%RBP),%RAX |
(356) 0x44370a LEA (%R15,%RAX,1),%RDI |
(356) 0x44370e SUB -0x30(%RBP),%RDI |
(356) 0x443712 MOV %R13,%RAX |
(356) 0x443715 IMUL %RDI,%RAX |
(356) 0x443719 ADD %RDX,%R12 |
(356) 0x44371c IMUL %R11,%RDI |
(356) 0x443720 CMP $0x4,%RCX |
(356) 0x443724 MOV 0x80(%RBP),%R11 |
(356) 0x44372b JE 4435c2 |
(356) 0x443731 JMP 4435e3 |
(356) 0x443740 JE 443580 |
(356) 0x443746 CMP $0x6,%RCX |
(356) 0x44374a JNE 44365f |
(356) 0x443750 MOV -0x38(%RBP),%RAX |
(356) 0x443754 LEA (%R15,%RAX,1),%RDI |
(356) 0x443758 SUB -0x30(%RBP),%RDI |
(356) 0x44375c MOV %R13,%RAX |
(356) 0x44375f IMUL %RDI,%RAX |
(356) 0x443763 LEA (%RSI,%RAX,1),%RCX |
(356) 0x443767 ADD %RDX,%R12 |
(356) 0x44376a MOV %R12,%RSI |
(356) 0x44376d SUB %RBX,%RSI |
(356) 0x443770 VMOVSD %XMM0,0x30(%RCX,%RSI,8) |
(356) 0x443776 IMUL %R11,%RDI |
(356) 0x44377a MOV 0x80(%RBP),%R11 |
(356) 0x443781 LEA (%R11,%RDI,1),%RCX |
(356) 0x443785 VMOVSD %XMM1,0x30(%RCX,%RSI,8) |
(356) 0x44378b MOV 0x88(%RBP),%RSI |
(356) 0x443792 JMP 4435a1 |
(356) 0x4437c0 JNE 44365f |
(356) 0x4437c6 MOV -0x38(%RBP),%RAX |
(356) 0x4437ca LEA (%R15,%RAX,1),%RDI |
(356) 0x4437ce SUB -0x30(%RBP),%RDI |
(356) 0x4437d2 MOV %R13,%RAX |
(356) 0x4437d5 IMUL %RDI,%RAX |
(356) 0x4437d9 ADD %RDX,%R12 |
(356) 0x4437dc IMUL %R11,%RDI |
(356) 0x4437e0 MOV 0x80(%RBP),%R11 |
(356) 0x4437e7 JMP 443646 |
0x443800 MOV -0x38(%RBP),%RCX |
0x443804 SUB %RBX,%RCX |
0x443807 MOV 0xa8(%RBP),%RAX |
0x44380e VMOVSD (%RAX,%RCX,8),%XMM0 |
0x443813 MOV -0xc0(%RBP),%RSI |
0x44381a VSUBSD (%RSI),%XMM0,%XMM0 |
0x44381e VMULSD %XMM0,%XMM0,%XMM0 |
0x443822 MOVSXD -0x64(%RBP),%RDI |
0x443826 MOV %RDI,%RAX |
0x443829 MOV %RDI,-0x40(%RBP) |
0x44382d SUB -0x30(%RBP),%RDI |
0x443831 MOV 0xa0(%RBP),%RAX |
0x443838 VMOVSD (%RAX,%RDI,8),%XMM1 |
0x44383d MOV -0xb8(%RBP),%RSI |
0x443844 VSUBSD (%RSI),%XMM1,%XMM1 |
0x443848 VFMADD213SD %XMM0,%XMM1,%XMM1 |
0x44384d VSQRTSD %XMM1,%XMM1,%XMM0 |
0x443851 MOV 0x30(%RBP),%RAX |
0x443855 MOV -0x50(%RBP),%RSI |
0x443859 VMOVSD -0x8(%RAX,%RSI,8),%XMM1 |
0x44385f VUCOMISD %XMM0,%XMM1 |
0x443863 JB 442fc0 |
0x443869 MOV 0xe0(%RBP),%RSI |
0x443870 MOV (%RSI),%RSI |
0x443873 IMUL %RDI,%RSI |
0x443877 ADD 0x90(%RBP),%RSI |
0x44387e MOV 0x68(%RBP),%R8 |
0x443882 MOV -0x50(%RBP),%R11 |
0x443886 VMOVSD -0x8(%R8,%R11,8),%XMM0 |
0x44388d VMOVSD %XMM0,(%RSI,%RCX,8) |
0x443892 MOV 0xf0(%RBP),%RSI |
0x443899 IMUL (%RSI),%RDI |
0x44389d ADD 0x98(%RBP),%RDI |
0x4438a4 MOV 0x70(%RBP),%RSI |
0x4438a8 VMOVSD -0x8(%RSI,%R11,8),%XMM0 |
0x4438af MOV 0x88(%RBP),%RSI |
0x4438b6 VMOVSD %XMM0,(%RDI,%RCX,8) |
0x4438bb MOV 0x60(%RBP),%RCX |
0x4438bf VMOVSD -0x8(%RCX,%R11,8),%XMM0 |
0x4438c6 MOV 0x100(%RBP),%RCX |
0x4438cd MOV (%RCX),%R12 |
0x4438d0 MOV 0x58(%RBP),%RAX |
0x4438d4 VMOVSD -0x8(%RAX,%R11,8),%XMM1 |
0x4438db MOV 0x110(%RBP),%RCX |
0x4438e2 MOV (%RCX),%R11 |
0x4438e5 MOV -0xb0(%RBP),%RDI |
0x4438ec MOV %RDI,%R14 |
0x4438ef IMUL %R11,%R14 |
0x4438f3 MOV -0xa8(%RBP),%RAX |
0x4438fa ADD %RAX,%R14 |
0x4438fd ADD %R10,%R14 |
0x443900 MOV %RDI,%RCX |
0x443903 IMUL %R12,%RCX |
0x443907 ADD -0xa0(%RBP),%R10 |
0x44390e ADD %RCX,%R10 |
0x443911 VBROADCASTSD %XMM0,%ZMM2 |
0x443917 VBROADCASTSD %XMM1,%ZMM3 |
0x44391d XOR %R15D,%R15D |
0x443920 JMP 443a30 |
(352) 0x443940 MOV -0x40(%RBP),%RAX |
(352) 0x443944 LEA (%R15,%RAX,1),%R8 |
(352) 0x443948 SUB -0x30(%RBP),%R8 |
(352) 0x44394c MOV %R12,%R13 |
(352) 0x44394f IMUL %R8,%R13 |
(352) 0x443953 ADD %RDX,%RDI |
(352) 0x443956 IMUL %R11,%R8 |
(352) 0x44395a MOV 0x80(%RBP),%RAX |
(352) 0x443961 LEA (%RSI,%R13,1),%RCX |
(352) 0x443965 MOV %RDI,%RSI |
(352) 0x443968 SUB %RBX,%RSI |
(352) 0x44396b VMOVSD %XMM0,0x28(%RCX,%RSI,8) |
(352) 0x443971 LEA (%RAX,%R8,1),%RCX |
(352) 0x443975 VMOVSD %XMM1,0x28(%RCX,%RSI,8) |
(352) 0x44397b MOV 0x88(%RBP),%RSI |
(352) 0x443982 LEA (%RSI,%R13,1),%RCX |
(352) 0x443986 MOV %RDI,%RSI |
(352) 0x443989 SUB %RBX,%RSI |
(352) 0x44398c VMOVSD %XMM0,0x20(%RCX,%RSI,8) |
(352) 0x443992 LEA (%RAX,%R8,1),%RCX |
(352) 0x443996 VMOVSD %XMM1,0x20(%RCX,%RSI,8) |
(352) 0x44399c MOV 0x88(%RBP),%RSI |
(352) 0x4439a3 LEA (%RSI,%R13,1),%RCX |
(352) 0x4439a7 MOV %RDI,%RSI |
(352) 0x4439aa SUB %RBX,%RSI |
(352) 0x4439ad VMOVSD %XMM0,0x18(%RCX,%RSI,8) |
(352) 0x4439b3 LEA (%RAX,%R8,1),%RCX |
(352) 0x4439b7 VMOVSD %XMM1,0x18(%RCX,%RSI,8) |
(352) 0x4439bd MOV 0x88(%RBP),%RSI |
(352) 0x4439c4 LEA (%RSI,%R13,1),%RCX |
(352) 0x4439c8 MOV %RDI,%RSI |
(352) 0x4439cb SUB %RBX,%RSI |
(352) 0x4439ce VMOVSD %XMM0,0x10(%RCX,%RSI,8) |
(352) 0x4439d4 LEA (%RAX,%R8,1),%RCX |
(352) 0x4439d8 VMOVSD %XMM1,0x10(%RCX,%RSI,8) |
(352) 0x4439de MOV 0x88(%RBP),%RSI |
(352) 0x4439e5 LEA (%RSI,%R13,1),%RCX |
(352) 0x4439e9 MOV %RDI,%RSI |
(352) 0x4439ec SUB %RBX,%RSI |
(352) 0x4439ef VMOVSD %XMM0,0x8(%RCX,%RSI,8) |
(352) 0x4439f5 LEA (%RAX,%R8,1),%RCX |
(352) 0x4439f9 VMOVSD %XMM1,0x8(%RCX,%RSI,8) |
(352) 0x4439ff MOV 0x88(%RBP),%RSI |
(352) 0x443a06 ADD %RSI,%R13 |
(352) 0x443a09 SUB %RBX,%RDI |
(352) 0x443a0c VMOVSD %XMM0,(%R13,%RDI,8) |
(352) 0x443a13 ADD %RAX,%R8 |
(352) 0x443a16 VMOVSD %XMM1,(%R8,%RDI,8) |
(352) 0x443a1c ADD %R11,%R14 |
(352) 0x443a1f ADD %R12,%R10 |
(352) 0x443a22 CMP $0x1,%R15 |
(352) 0x443a26 LEA 0x1(%R15),%R15 |
(352) 0x443a2a JE 442fc0 |
(352) 0x443a30 MOV -0x38(%RBP),%RAX |
(352) 0x443a34 LEA 0x1(%RAX),%RCX |
(352) 0x443a38 CMP %RDX,%RCX |
(352) 0x443a3b CMOVLE %RDX,%RCX |
(352) 0x443a3f SUB %RDX,%RCX |
(352) 0x443a42 LEA 0x1(%RCX),%RDI |
(352) 0x443a46 CMP $0x8,%RDI |
(352) 0x443a4a JB 443a77 |
(352) 0x443a4c MOV %R9,%R8 |
(352) 0x443a4f XOR %R13D,%R13D |
(352) 0x443a52 NOPW %CS:(%RAX,%RAX,1) |
(353) 0x443a60 VMOVUPD %ZMM2,(%R10,%R13,1) |
(353) 0x443a67 VMOVUPD %ZMM3,(%R14,%R13,1) |
(353) 0x443a6e ADD $0x40,%R13 |
(353) 0x443a72 INC %R8 |
(353) 0x443a75 JNE 443a60 |
(352) 0x443a77 AND $-0x8,%RDI |
(352) 0x443a7b SUB %RDI,%RCX |
(352) 0x443a7e CMP $0x3,%RCX |
(352) 0x443a82 JGE 443ac0 |
(352) 0x443a84 TEST %RCX,%RCX |
(352) 0x443a87 JLE 443b80 |
(352) 0x443a8d MOV -0x40(%RBP),%RAX |
(352) 0x443a91 LEA (%R15,%RAX,1),%R8 |
(352) 0x443a95 SUB -0x30(%RBP),%R8 |
(352) 0x443a99 MOV %R12,%R13 |
(352) 0x443a9c IMUL %R8,%R13 |
(352) 0x443aa0 ADD %RDX,%RDI |
(352) 0x443aa3 IMUL %R11,%R8 |
(352) 0x443aa7 CMP $0x1,%RCX |
(352) 0x443aab MOV 0x80(%RBP),%RAX |
(352) 0x443ab2 JNE 4439c4 |
(352) 0x443ab8 JMP 4439e5 |
(352) 0x443ac0 CMP $0x5,%RCX |
(352) 0x443ac4 JGE 443b00 |
(352) 0x443ac6 MOV -0x40(%RBP),%RAX |
(352) 0x443aca LEA (%R15,%RAX,1),%R8 |
(352) 0x443ace SUB -0x30(%RBP),%R8 |
(352) 0x443ad2 MOV %R12,%R13 |
(352) 0x443ad5 IMUL %R8,%R13 |
(352) 0x443ad9 ADD %RDX,%RDI |
(352) 0x443adc IMUL %R11,%R8 |
(352) 0x443ae0 CMP $0x4,%RCX |
(352) 0x443ae4 MOV 0x80(%RBP),%RAX |
(352) 0x443aeb JE 443982 |
(352) 0x443af1 JMP 4439a3 |
(352) 0x443b00 JE 443940 |
(352) 0x443b06 CMP $0x6,%RCX |
(352) 0x443b0a JNE 443a1c |
(352) 0x443b10 MOV -0x40(%RBP),%RAX |
(352) 0x443b14 LEA (%R15,%RAX,1),%R8 |
(352) 0x443b18 SUB -0x30(%RBP),%R8 |
(352) 0x443b1c MOV %R12,%R13 |
(352) 0x443b1f IMUL %R8,%R13 |
(352) 0x443b23 LEA (%RSI,%R13,1),%RCX |
(352) 0x443b27 ADD %RDX,%RDI |
(352) 0x443b2a MOV %RDI,%RSI |
(352) 0x443b2d SUB %RBX,%RSI |
(352) 0x443b30 VMOVSD %XMM0,0x30(%RCX,%RSI,8) |
(352) 0x443b36 IMUL %R11,%R8 |
(352) 0x443b3a MOV 0x80(%RBP),%RAX |
(352) 0x443b41 LEA (%RAX,%R8,1),%RCX |
(352) 0x443b45 VMOVSD %XMM1,0x30(%RCX,%RSI,8) |
(352) 0x443b4b MOV 0x88(%RBP),%RSI |
(352) 0x443b52 JMP 443961 |
(352) 0x443b80 JNE 443a1c |
(352) 0x443b86 MOV -0x40(%RBP),%RAX |
(352) 0x443b8a LEA (%R15,%RAX,1),%R8 |
(352) 0x443b8e SUB -0x30(%RBP),%R8 |
(352) 0x443b92 MOV %R12,%R13 |
(352) 0x443b95 IMUL %R8,%R13 |
(352) 0x443b99 ADD %RDX,%RDI |
(352) 0x443b9c IMUL %R11,%R8 |
(352) 0x443ba0 MOV 0x80(%RBP),%RAX |
(352) 0x443ba7 JMP 443a06 |
/scratch_na/users/xoserete/qaas_runs/171-214-9740/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/generate_chunk_kernel.f90: 87 - 163 |
-------------------------------------------------------------------------------- |
87: DO k=y_min-2,y_max+2 |
[...] |
128: DO j=x_min-2,x_max+2 |
129: IF(state_geometry(state).EQ.g_rect ) THEN |
130: IF(vertexx(j+1).GE.state_xmin(state).AND.vertexx(j).LT.state_xmax(state)) THEN |
131: IF(vertexy(k+1).GE.state_ymin(state).AND.vertexy(k).LT.state_ymax(state)) THEN |
132: energy0(j,k)=state_energy(state) |
133: density0(j,k)=state_density(state) |
134: DO kt=k,k+1 |
135: DO jt=j,j+1 |
136: xvel0(jt,kt)=state_xvel(state) |
137: yvel0(jt,kt)=state_yvel(state) |
138: ENDDO |
139: ENDDO |
140: ENDIF |
141: ENDIF |
142: ELSEIF(state_geometry(state).EQ.g_circ ) THEN |
143: radius=SQRT((cellx(j)-x_cent)*(cellx(j)-x_cent)+(celly(k)-y_cent)*(celly(k)-y_cent)) |
144: IF(radius.LE.state_radius(state))THEN |
145: energy0(j,k)=state_energy(state) |
146: density0(j,k)=state_density(state) |
147: DO kt=k,k+1 |
148: DO jt=j,j+1 |
149: xvel0(jt,kt)=state_xvel(state) |
150: yvel0(jt,kt)=state_yvel(state) |
151: ENDDO |
152: ENDDO |
153: ENDIF |
154: ELSEIF(state_geometry(state).EQ.g_point) THEN |
155: IF(vertexx(j).EQ.x_cent .AND. vertexy(k).EQ.y_cent) THEN |
156: energy0(j,k)=state_energy(state) |
157: density0(j,k)=state_density(state) |
158: DO kt=k,k+1 |
159: DO jt=j,j+1 |
160: xvel0(jt,kt)=state_xvel(state) |
161: yvel0(jt,kt)=state_yvel(state) |
162: ENDDO |
163: ENDDO |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.53 |
CQA speedup if FP arith vectorized | 2.85 |
CQA speedup if fully vectorized | 12.72 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 |
Bottlenecks | P2, P3, P11, |
Function | generate_chunk_kernel_.DIR.OMP.PARALLEL.2 |
Source | generate_chunk_kernel.f90:128-133,generate_chunk_kernel.f90:136-137,generate_chunk_kernel.f90:143-146,generate_chunk_kernel.f90:149-150,generate_chunk_kernel.f90:155-157,generate_chunk_kernel.f90:160-161 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 41.33 |
CQA cycles if no scalar integer | 16.33 |
CQA cycles if FP arith vectorized | 14.50 |
CQA cycles if fully vectorized | 3.25 |
Front-end cycles | 37.50 |
DIV/SQRT cycles | 16.20 |
P0 cycles | 19.13 |
P1 cycles | 41.33 |
P2 cycles | 41.33 |
P3 cycles | 7.00 |
P4 cycles | 16.20 |
P5 cycles | 16.10 |
P6 cycles | 7.00 |
P7 cycles | 7.00 |
P8 cycles | 7.00 |
P9 cycles | 16.20 |
P10 cycles | 41.33 |
P11 cycles | 4.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 47.60 - 47.23 |
Stall cycles (UFS) | 9.81 - 9.39 |
Nb insns | 219.00 |
Nb uops | 225.00 |
Nb loads | 124.00 |
Nb stores | 14.00 |
Nb stack references | 36.00 |
FLOP/cycle | 0.15 |
Nb FLOP add-sub | 2.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 1.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 26.13 |
Bytes prefetched | 0.00 |
Bytes loaded | 972.00 |
Bytes stored | 108.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.88 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.05 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 11.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.53 |
CQA speedup if FP arith vectorized | 2.85 |
CQA speedup if fully vectorized | 12.72 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 |
Bottlenecks | P2, P3, P11, |
Function | generate_chunk_kernel_.DIR.OMP.PARALLEL.2 |
Source | generate_chunk_kernel.f90:128-133,generate_chunk_kernel.f90:136-137,generate_chunk_kernel.f90:143-146,generate_chunk_kernel.f90:149-150,generate_chunk_kernel.f90:155-157,generate_chunk_kernel.f90:160-161 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 41.33 |
CQA cycles if no scalar integer | 16.33 |
CQA cycles if FP arith vectorized | 14.50 |
CQA cycles if fully vectorized | 3.25 |
Front-end cycles | 37.50 |
DIV/SQRT cycles | 16.20 |
P0 cycles | 19.13 |
P1 cycles | 41.33 |
P2 cycles | 41.33 |
P3 cycles | 7.00 |
P4 cycles | 16.20 |
P5 cycles | 16.10 |
P6 cycles | 7.00 |
P7 cycles | 7.00 |
P8 cycles | 7.00 |
P9 cycles | 16.20 |
P10 cycles | 41.33 |
P11 cycles | 4.50 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 47.60 - 47.23 |
Stall cycles (UFS) | 9.81 - 9.39 |
Nb insns | 219.00 |
Nb uops | 225.00 |
Nb loads | 124.00 |
Nb stores | 14.00 |
Nb stack references | 36.00 |
FLOP/cycle | 0.15 |
Nb FLOP add-sub | 2.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 1.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 26.13 |
Bytes prefetched | 0.00 |
Bytes loaded | 972.00 |
Bytes stored | 108.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.88 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.05 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 11.00 |
Path / |
Function | generate_chunk_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | generate_chunk_kernel.f90:87-163 |
Module | exec |
nb instructions | 219 |
nb uops | 225 |
loop length | 1073 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 36 |
ADD-SUB / MUL ratio | 2.00 |
micro-operation queue | 37.50 cycles |
front end | 37.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.20 | 16.30 | 41.33 | 41.33 | 7.00 | 16.20 | 16.10 | 7.00 | 7.00 | 7.00 | 16.20 | 41.33 |
cycles | 16.20 | 19.13 | 41.33 | 41.33 | 7.00 | 16.20 | 16.10 | 7.00 | 7.00 | 7.00 | 16.20 | 41.33 |
Cycles executing div or sqrt instructions | 4.50 |
FE+BE cycles | 47.60-47.23 |
Stall cycles | 9.81-9.39 |
LM full (events) | 21.37-19.95 |
Front-end | 37.50 |
Dispatch | 41.33 |
DIV/SQRT | 4.50 |
Overall L1 | 41.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 12% |
all | 11% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x60(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x138(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x130(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xd0(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442f00 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %R8D,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMOVG %RSI,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NEG %R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4431c0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD -0x8(%RAX,%RCX,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RAX,%RSI,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD -0x64(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD -0x8(%RAX,%RSI,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RSI,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RAX,%RCX,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RCX | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RCX,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R13,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 443673 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xcb3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JE 443800 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xe40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RSI),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JP 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x64(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RDI,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RSI),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JP 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RDI,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%R8,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RSI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RDI | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RCX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4433f0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xa30> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%RSI),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD -0x64(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RDI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%RSI),%XMM1,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VFMADD213SD %XMM0,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM1,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD %XMM0,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RDI,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%R8,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RSI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RDI | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RCX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443a30 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x1070> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | generate_chunk_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | generate_chunk_kernel.f90:87-163 |
Module | exec |
nb instructions | 219 |
nb uops | 225 |
loop length | 1073 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 36 |
ADD-SUB / MUL ratio | 2.00 |
micro-operation queue | 37.50 cycles |
front end | 37.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.20 | 16.30 | 41.33 | 41.33 | 7.00 | 16.20 | 16.10 | 7.00 | 7.00 | 7.00 | 16.20 | 41.33 |
cycles | 16.20 | 19.13 | 41.33 | 41.33 | 7.00 | 16.20 | 16.10 | 7.00 | 7.00 | 7.00 | 16.20 | 41.33 |
Cycles executing div or sqrt instructions | 4.50 |
FE+BE cycles | 47.60-47.23 |
Stall cycles | 9.81-9.39 |
LM full (events) | 21.37-19.95 |
Front-end | 37.50 |
Dispatch | 41.33 |
DIV/SQRT | 4.50 |
Overall L1 | 41.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
all | 10% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 12% |
all | 11% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x60(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x138(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x130(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xd0(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 442f00 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD %R8D,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMOVG %RSI,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NEG %R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x128(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4431c0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x800> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD -0x8(%RAX,%RCX,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RAX,%RSI,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD -0x64(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD -0x8(%RAX,%RSI,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RSI,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RAX,%RCX,8),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JBE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RCX | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RCX,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R13,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 443673 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xcb3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
JE 443800 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xe40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RSI),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JP 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x64(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RDI,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD (%RSI),%XMM0 | 2 | 1 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
JNE 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JP 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RDI,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%R8,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RSI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RDI | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RCX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4433f0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0xa30> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RCX,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%RSI),%XMM0,%XMM0 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMULSD %XMM0,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD -0x64(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB -0x30(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%RDI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD (%RSI),%XMM1,%XMM1 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VFMADD213SD %XMM0,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTSD %XMM1,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%RSI,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VUCOMISD %XMM0,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JB 442fc0 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x600> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xe0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RDI,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x90(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%R8,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RSI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL (%RSI),%RDI | 1 | 0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 1 |
ADD 0x98(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RSI,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RDI,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RCX,%R11,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x8(%RAX,%R11,8),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x110(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R11,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa0(%RBP),%R10 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
ADD %RCX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443a30 <generate_chunk_kernel_module_mp_generate_chunk_kernel_.DIR.OMP.PARALLEL.2+0x1070> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |