Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.43% |
---|
Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.43% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
0x440520 PUSH %RBP |
0x440521 MOV %RSP,%RBP |
0x440524 PUSH %R15 |
0x440526 PUSH %R14 |
0x440528 PUSH %R13 |
0x44052a PUSH %R12 |
0x44052c PUSH %RBX |
0x44052d MOV %RDI,%RBX |
0x440530 AND $-0x40,%RSP |
0x440534 SUB $0x140,%RSP |
0x44053b MOV 0x68(%RDI),%EAX |
0x44053e MOV 0x6c(%RDI),%EDX |
0x440541 MOV 0x60(%RDI),%EDI |
0x440544 MOV 0x64(%RBX),%ECX |
0x440547 ADD $0x2,%EDX |
0x44054a INC %EDI |
0x44054c LEA 0x1(%RAX),%R15D |
0x440550 MOV %EDX,0x9c(%RSP) |
0x440557 MOV %EDI,0x98(%RSP) |
0x44055e CMP %EDX,%R15D |
0x440561 JGE 441033 |
0x440567 MOV %EDX,%R13D |
0x44056a LEA 0x2(%RCX),%R14D |
0x44056e SUB %R15D,%R13D |
0x440571 CMP %R14D,%EDI |
0x440574 JGE 441033 |
0x44057a MOV %R14D,%ESI |
0x44057d SUB %EDI,%ESI |
0x44057f MOV %ESI,0xa0(%RSP) |
0x440586 CALL 404650 <omp_get_num_threads@plt> |
0x44058b MOV %EAX,%R12D |
0x44058e CALL 404540 <omp_get_thread_num@plt> |
0x440593 XOR %EDX,%EDX |
0x440595 MOV %EAX,%R8D |
0x440598 MOV 0xa0(%RSP),%EAX |
0x44059f IMUL %R13D,%EAX |
0x4405a3 DIV %R12D |
0x4405a6 MOV %EAX,%ECX |
0x4405a8 CMP %EDX,%R8D |
0x4405ab JB 441067 |
0x4405b1 IMUL %ECX,%R8D |
0x4405b5 LEA (%R8,%RDX,1),%EAX |
0x4405b9 LEA (%RCX,%RAX,1),%R9D |
0x4405bd MOV %EAX,0x138(%RSP) |
0x4405c4 MOV %R9D,0x94(%RSP) |
0x4405cc CMP %R9D,%EAX |
0x4405cf JAE 441033 |
0x4405d5 XOR %EDX,%EDX |
0x4405d7 MOV 0x98(%RSP),%R10D |
0x4405df MOV 0x50(%RBX),%RDI |
0x4405e3 MOV 0x58(%RBX),%R13 |
0x4405e7 DIVL 0xa0(%RSP) |
0x4405ee MOV 0x18(%RBX),%RSI |
0x4405f2 MOV 0x40(%RBX),%R12 |
0x4405f6 MOV 0x20(%RBX),%R8 |
0x4405fa MOV 0x48(%RBX),%R9 |
0x4405fe VMOVSD 0x20d1a(%RIP),%XMM0 |
0x440606 VMULSD (%RBX),%XMM0,%XMM21 |
0x44060c VMOVSD 0x1ff84(%RIP),%XMM10 |
0x440614 VBROADCASTSD %XMM21,%YMM0 |
0x44061a VBROADCASTSD %XMM10,%YMM14 |
0x44061f VBROADCASTSD %XMM21,%ZMM12 |
0x440625 VBROADCASTSD %XMM10,%ZMM11 |
0x44062b MOV %RDI,0x80(%RSP) |
0x440633 MOV %R13,0x70(%RSP) |
0x440638 MOV %RSI,0x68(%RSP) |
0x44063d MOV %R12,0x60(%RSP) |
0x440642 MOV %R8,0x58(%RSP) |
0x440647 MOV %R9,0x50(%RSP) |
0x44064c ADD %EDX,%R10D |
0x44064f LEA (%RAX,%R15,1),%R11D |
0x440653 MOV %R14D,%EAX |
0x440656 MOV 0x8(%RBX),%R15 |
0x44065a MOV 0x10(%RBX),%R14 |
0x44065e MOV 0x30(%RBX),%RDX |
0x440662 SUB %R10D,%EAX |
0x440665 MOV %R10D,0x120(%RSP) |
0x44066d MOV 0x38(%RBX),%R10 |
0x440671 MOV 0x28(%RBX),%RBX |
0x440675 MOVSXD %R11D,%R11 |
0x440678 MOV %R11,0x118(%RSP) |
0x440680 MOV %R15,0x88(%RSP) |
0x440688 MOV %R14,0x78(%RSP) |
0x44068d MOV %RDX,0x48(%RSP) |
0x440692 MOV %R10,0x40(%RSP) |
0x440697 MOV %RBX,0x38(%RSP) |
0x44069c NOPL (%RAX) |
(268) 0x4406a0 CMP %EAX,%ECX |
(268) 0x4406a2 CMOVBE %ECX,%EAX |
(268) 0x4406a5 MOV 0x138(%RSP),%ECX |
(268) 0x4406ac MOV %EAX,0x128(%RSP) |
(268) 0x4406b3 ADD %ECX,%EAX |
(268) 0x4406b5 MOV %EAX,0x13c(%RSP) |
(268) 0x4406bc CMP %EAX,%ECX |
(268) 0x4406be JAE 440fdd |
(268) 0x4406c4 MOV 0x78(%RSP),%R8 |
(268) 0x4406c9 MOV 0x70(%RSP),%RBX |
(268) 0x4406ce MOV 0x88(%RSP),%RDI |
(268) 0x4406d6 MOV 0x118(%RSP),%R9 |
(268) 0x4406de MOV 0x80(%RSP),%R15 |
(268) 0x4406e6 MOV (%R8),%RDX |
(268) 0x4406e9 VMOVQ 0x10(%R8),%XMM17 |
(268) 0x4406f0 MOV (%RBX),%R8 |
(268) 0x4406f3 MOV (%RDI),%RAX |
(268) 0x4406f6 MOV (%R15),%R14 |
(268) 0x4406f9 VMOVQ 0x10(%RDI),%XMM18 |
(268) 0x440700 MOV %R9,%RCX |
(268) 0x440703 MOV 0x68(%RSP),%RDI |
(268) 0x440708 VMOVQ 0x10(%R15),%XMM16 |
(268) 0x44070f MOV 0x60(%RSP),%R15 |
(268) 0x440714 MOV %R9,%R10 |
(268) 0x440717 MOV %R9,%R13 |
(268) 0x44071a IMUL %R8,%RCX |
(268) 0x44071e IMUL %RDX,%R10 |
(268) 0x440722 IMUL %R9,%RAX |
(268) 0x440726 MOV (%R15),%RSI |
(268) 0x440729 VMOVQ 0x10(%RDI),%XMM29 |
(268) 0x440730 VMOVQ 0x10(%R15),%XMM31 |
(268) 0x440737 IMUL %R14,%R13 |
(268) 0x44073b ADD %RCX,%R8 |
(268) 0x44073e VMOVQ %RCX,%XMM25 |
(268) 0x440744 MOV 0x50(%RSP),%RCX |
(268) 0x440749 LEA (%RDX,%R10,1),%R11 |
(268) 0x44074d MOV 0x58(%RSP),%RDX |
(268) 0x440752 VMOVQ %RAX,%XMM27 |
(268) 0x440758 MOV (%RDI),%RAX |
(268) 0x44075b VMOVQ %R10,%XMM19 |
(268) 0x440761 MOV %R11,0xb8(%RSP) |
(268) 0x440769 LEA (%R14,%R13,1),%R12 |
(268) 0x44076d MOV %R9,%R14 |
(268) 0x440770 VMOVQ %R13,%XMM20 |
(268) 0x440776 MOV 0x10(%RBX),%R13 |
(268) 0x44077a IMUL %R9,%RSI |
(268) 0x44077e MOV %R12,0xb0(%RSP) |
(268) 0x440786 MOV %R8,0xc0(%RSP) |
(268) 0x44078e MOV (%RCX),%RDI |
(268) 0x440791 MOV 0x10(%RDX),%R10 |
(268) 0x440795 MOV 0x10(%RCX),%RBX |
(268) 0x440799 VMOVQ %RSI,%XMM30 |
(268) 0x44079f IMUL %R9,%RAX |
(268) 0x4407a3 IMUL (%RDX),%R9 |
(268) 0x4407a7 IMUL %R14,%RDI |
(268) 0x4407ab MOV %R10,0xd0(%RSP) |
(268) 0x4407b3 MOV %RBX,0x110(%RSP) |
(268) 0x4407bb VMOVQ %RAX,%XMM28 |
(268) 0x4407c1 MOV %RDI,0xd8(%RSP) |
(268) 0x4407c9 MOV %R9,0xc8(%RSP) |
(268) 0x4407d1 MOV 0x48(%RSP),%RAX |
(268) 0x4407d6 MOV 0x40(%RSP),%R10 |
(268) 0x4407db MOV (%RAX),%RDX |
(268) 0x4407de MOV 0x10(%RAX),%R15 |
(268) 0x4407e2 MOV (%R10),%RBX |
(268) 0x4407e5 MOV 0x10(%R10),%RCX |
(268) 0x4407e9 IMUL %R14,%RDX |
(268) 0x4407ed MOV %R15,0x100(%RSP) |
(268) 0x4407f5 IMUL %R14,%RBX |
(268) 0x4407f9 MOV %RCX,0xf0(%RSP) |
(268) 0x440801 MOV %RDX,0x108(%RSP) |
(268) 0x440809 MOV 0x38(%RSP),%RDX |
(268) 0x44080e MOV %RBX,0xf8(%RSP) |
(268) 0x440816 MOV (%RDX),%RAX |
(268) 0x440819 MOV 0x10(%RDX),%RDI |
(268) 0x44081d IMUL %R14,%RAX |
(268) 0x440821 MOV 0x128(%RSP),%R14D |
(268) 0x440829 MOV %RDI,0xe0(%RSP) |
(268) 0x440831 MOV %RAX,0xe8(%RSP) |
(268) 0x440839 LEA -0x1(%R14),%R15D |
(268) 0x44083d CMP $0x6,%R15D |
(268) 0x440841 JBE 441050 |
(268) 0x440847 MOVSXD 0x120(%RSP),%RAX |
(268) 0x44084f VMOVQ %XMM27,%RSI |
(268) 0x440855 VMOVQ %XMM20,%RDI |
(268) 0x44085b MOV %R13,0xa8(%RSP) |
(268) 0x440863 VMOVQ %XMM18,%R10 |
(268) 0x440869 VMOVQ %XMM18,%R15 |
(268) 0x44086f VMOVQ %XMM16,%R14 |
(268) 0x440875 LEA (%RSI,%RAX,1),%RBX |
(268) 0x440879 LEA (%R12,%RAX,1),%RDX |
(268) 0x44087d LEA (%RDI,%RAX,1),%RSI |
(268) 0x440881 VMOVQ %XMM25,%RDI |
(268) 0x440887 SAL $0x3,%RBX |
(268) 0x44088b VMOVQ %XMM16,%R12 |
(268) 0x440891 SAL $0x3,%RSI |
(268) 0x440895 SAL $0x3,%RDX |
(268) 0x440899 LEA (%R10,%RBX,1),%RCX |
(268) 0x44089d LEA 0x8(%R15,%RBX,1),%R15 |
(268) 0x4408a2 VMOVQ %XMM16,%RBX |
(268) 0x4408a8 LEA (%R14,%RSI,1),%R10 |
(268) 0x4408ac LEA 0x8(%R14,%RSI,1),%R14 |
(268) 0x4408b1 LEA 0x8(%RBX,%RDX,1),%RSI |
(268) 0x4408b6 ADD %RDX,%R12 |
(268) 0x4408b9 VMOVQ %XMM19,%RDX |
(268) 0x4408bf VMOVQ %RSI,%XMM24 |
(268) 0x4408c5 LEA (%RDI,%RAX,1),%RSI |
(268) 0x4408c9 LEA (%RDX,%RAX,1),%RBX |
(268) 0x4408cd VMOVQ %R12,%XMM7 |
(268) 0x4408d2 SAL $0x3,%RSI |
(268) 0x4408d6 VMOVQ %XMM17,%R12 |
(268) 0x4408dc LEA (%R13,%RSI,1),%RDX |
(268) 0x4408e1 LEA (%R12,%RBX,8),%R12 |
(268) 0x4408e5 LEA 0x8(%R13,%RSI,1),%RBX |
(268) 0x4408ea LEA (%R8,%RAX,1),%RSI |
(268) 0x4408ee SAL $0x3,%RSI |
(268) 0x4408f2 VMOVQ %RBX,%XMM4 |
(268) 0x4408f7 VMOVQ %RDX,%XMM13 |
(268) 0x4408fc LEA (%R11,%RAX,1),%RDI |
(268) 0x440900 VMOVQ %XMM17,%R11 |
(268) 0x440906 LEA (%R13,%RSI,1),%RDX |
(268) 0x44090b LEA (%R11,%RDI,8),%RBX |
(268) 0x44090f VMOVQ %XMM28,%R11 |
(268) 0x440915 LEA 0x8(%R13,%RSI,1),%RDI |
(268) 0x44091a VMOVQ %RDX,%XMM23 |
(268) 0x440920 LEA (%R11,%RAX,1),%R8 |
(268) 0x440924 VMOVQ %XMM29,%RSI |
(268) 0x44092a VMOVQ %XMM30,%RDX |
(268) 0x440930 MOV %RDI,%R13 |
(268) 0x440933 LEA (%RSI,%R8,8),%R11 |
(268) 0x440937 ADD %RAX,%RDX |
(268) 0x44093a VMOVQ %XMM31,%R8 |
(268) 0x440940 LEA (%R8,%RDX,8),%RSI |
(268) 0x440944 LEA (%R9,%RAX,1),%RDX |
(268) 0x440948 MOV 0xd0(%RSP),%R9 |
(268) 0x440950 MOV 0xd8(%RSP),%R8 |
(268) 0x440958 VMOVQ %RSI,%XMM15 |
(268) 0x44095d LEA (%R9,%RDX,8),%RDX |
(268) 0x440961 MOV 0x110(%RSP),%R9 |
(268) 0x440969 LEA (%R8,%RAX,1),%RSI |
(268) 0x44096d MOV 0x100(%RSP),%R8 |
(268) 0x440975 LEA (%R9,%RSI,8),%R9 |
(268) 0x440979 MOV 0x108(%RSP),%RSI |
(268) 0x440981 MOV %R9,%RDI |
(268) 0x440984 MOV %RCX,%R9 |
(268) 0x440987 MOV %R11,%RCX |
(268) 0x44098a VMOVQ %XMM4,%R11 |
(268) 0x44098f ADD %RAX,%RSI |
(268) 0x440992 LEA (%R8,%RSI,8),%RSI |
(268) 0x440996 MOV 0xf0(%RSP),%R8 |
(268) 0x44099e VMOVQ %RSI,%XMM9 |
(268) 0x4409a3 MOV 0xf8(%RSP),%RSI |
(268) 0x4409ab ADD %RAX,%RSI |
(268) 0x4409ae LEA (%R8,%RSI,8),%RSI |
(268) 0x4409b2 MOV 0xe8(%RSP),%R8 |
(268) 0x4409ba VMOVQ %RSI,%XMM8 |
(268) 0x4409bf MOV 0xe0(%RSP),%RSI |
(268) 0x4409c7 ADD %R8,%RAX |
(268) 0x4409ca MOV 0x128(%RSP),%R8D |
(268) 0x4409d2 LEA (%RSI,%RAX,8),%RAX |
(268) 0x4409d6 VMOVQ %RAX,%XMM26 |
(268) 0x4409dc XOR %EAX,%EAX |
(268) 0x4409de SHR $0x3,%R8D |
(268) 0x4409e2 SAL $0x6,%R8 |
(268) 0x4409e6 MOV %R8,0x130(%RSP) |
(268) 0x4409ee VMOVQ %XMM7,%R8 |
(268) 0x4409f3 NOPW %CS:(%RAX,%RAX,1) |
(268) 0x4409fe XCHG %AX,%AX |
(270) 0x440a00 VMOVQ %XMM13,%RSI |
(270) 0x440a05 VMOVUPD (%R12,%RAX,1),%ZMM3 |
(270) 0x440a0c VMOVUPD (%R15,%RAX,1),%ZMM7 |
(270) 0x440a13 VMOVUPD (%R8,%RAX,1),%ZMM5 |
(270) 0x440a1a VMOVUPD (%RSI,%RAX,1),%ZMM1 |
(270) 0x440a21 VADDPD (%R11,%RAX,1),%ZMM1,%ZMM2 |
(270) 0x440a28 VADDPD (%R10,%RAX,1),%ZMM5,%ZMM4 |
(270) 0x440a2f VMOVQ %XMM24,%RSI |
(270) 0x440a35 VMOVUPD (%RSI,%RAX,1),%ZMM5 |
(270) 0x440a3c VMOVQ %XMM23,%RSI |
(270) 0x440a42 VADDPD (%R14,%RAX,1),%ZMM5,%ZMM1 |
(270) 0x440a49 VADDPD %ZMM3,%ZMM3,%ZMM6 |
(270) 0x440a4f VADDPD %ZMM7,%ZMM7,%ZMM3 |
(270) 0x440a55 VMOVUPD (%RBX,%RAX,1),%ZMM7 |
(270) 0x440a5c VMULPD %ZMM6,%ZMM2,%ZMM2 |
(270) 0x440a62 VMOVUPD (%RSI,%RAX,1),%ZMM6 |
(270) 0x440a69 VADDPD (%R13,%RAX,1),%ZMM6,%ZMM5 |
(270) 0x440a71 VMOVQ %XMM15,%RSI |
(270) 0x440a76 VADDPD %ZMM7,%ZMM7,%ZMM6 |
(270) 0x440a7c VMULPD %ZMM6,%ZMM5,%ZMM7 |
(270) 0x440a82 VMOVUPD (%RSI,%RAX,1),%ZMM5 |
(270) 0x440a89 VMOVQ %XMM9,%RSI |
(270) 0x440a8e VFMADD231PD %ZMM1,%ZMM3,%ZMM7 |
(270) 0x440a94 VMOVUPD (%R9,%RAX,1),%ZMM3 |
(270) 0x440a9b VADDPD %ZMM3,%ZMM3,%ZMM1 |
(270) 0x440aa1 VADDPD (%RDI,%RAX,1),%ZMM5,%ZMM3 |
(270) 0x440aa8 VFMADD132PD %ZMM4,%ZMM2,%ZMM1 |
(270) 0x440aae VSUBPD %ZMM1,%ZMM7,%ZMM4 |
(270) 0x440ab4 VMOVUPD (%RCX,%RAX,1),%ZMM7 |
(270) 0x440abb VDIVPD (%RDX,%RAX,1),%ZMM3,%ZMM1 |
(270) 0x440ac2 VMULPD %ZMM12,%ZMM4,%ZMM6 |
(270) 0x440ac8 VADDPD (%RCX,%RAX,1),%ZMM6,%ZMM2 |
(270) 0x440acf VDIVPD %ZMM2,%ZMM7,%ZMM4 |
(270) 0x440ad5 VDIVPD %ZMM7,%ZMM11,%ZMM2 |
(270) 0x440adb VMULPD %ZMM2,%ZMM1,%ZMM7 |
(270) 0x440ae1 VFNMADD213PD (%RSI,%RAX,1),%ZMM7,%ZMM6 |
(270) 0x440ae8 VMOVQ %XMM8,%RSI |
(270) 0x440aed VMOVUPD %ZMM6,(%RSI,%RAX,1) |
(270) 0x440af4 VMULPD (%RDX,%RAX,1),%ZMM4,%ZMM6 |
(270) 0x440afb VMOVQ %XMM26,%RSI |
(270) 0x440b01 VMOVUPD %ZMM6,(%RSI,%RAX,1) |
(270) 0x440b08 MOV 0x130(%RSP),%RSI |
(270) 0x440b10 ADD $0x40,%RAX |
(270) 0x440b14 CMP %RSI,%RAX |
(270) 0x440b17 JNE 440a00 |
(268) 0x440b1d MOV 0x128(%RSP),%R10D |
(268) 0x440b25 MOV 0x120(%RSP),%R15D |
(268) 0x440b2d MOV 0xa8(%RSP),%R13 |
(268) 0x440b35 MOV %R10D,%EDX |
(268) 0x440b38 AND $-0x8,%EDX |
(268) 0x440b3b ADD %EDX,0x138(%RSP) |
(268) 0x440b42 LEA (%RDX,%R15,1),%R14D |
(268) 0x440b46 MOV %R14D,0xa8(%RSP) |
(268) 0x440b4e TEST $0x7,%R10B |
(268) 0x440b52 JE 440fcf |
(268) 0x440b58 MOV 0x128(%RSP),%R12D |
(268) 0x440b60 SUB %EDX,%R12D |
(268) 0x440b63 MOV %R12D,0xa4(%RSP) |
(268) 0x440b6b DEC %R12D |
(268) 0x440b6e CMP $0x2,%R12D |
(268) 0x440b72 JBE 440dac |
(268) 0x440b78 MOVSXD 0x120(%RSP),%RAX |
(268) 0x440b80 MOV 0xb0(%RSP),%R9 |
(268) 0x440b88 VMOVQ %XMM25,%R11 |
(268) 0x440b8e VMOVQ %XMM30,%R15 |
(268) 0x440b94 VMOVQ %XMM27,%RBX |
(268) 0x440b9a VMOVQ %XMM28,%R10 |
(268) 0x440ba0 VMOVQ %XMM20,%RDI |
(268) 0x440ba6 LEA (%R9,%RAX,1),%RSI |
(268) 0x440baa LEA (%R11,%RAX,1),%R9 |
(268) 0x440bae MOV 0xc8(%RSP),%R11 |
(268) 0x440bb6 LEA (%R15,%RAX,1),%R12 |
(268) 0x440bba MOV 0xd0(%RSP),%R15 |
(268) 0x440bc2 LEA (%RBX,%RAX,1),%RCX |
(268) 0x440bc6 LEA (%R10,%RAX,1),%R14 |
(268) 0x440bca LEA (%R12,%RDX,1),%RBX |
(268) 0x440bce MOV %RBX,0x130(%RSP) |
(268) 0x440bd6 MOV 0xd8(%RSP),%RBX |
(268) 0x440bde ADD %RDX,%RSI |
(268) 0x440be1 LEA (%RDI,%RAX,1),%R8 |
(268) 0x440be5 ADD %RDX,%R8 |
(268) 0x440be8 ADD %RDX,%R9 |
(268) 0x440beb MOV 0xc0(%RSP),%RDI |
(268) 0x440bf3 ADD %RDX,%RCX |
(268) 0x440bf6 VMOVUPD 0x8(%R13,%R9,8),%YMM4 |
(268) 0x440bfd VADDPD (%R13,%R9,8),%YMM4,%YMM5 |
(268) 0x440c04 VMOVQ %XMM18,%R9 |
(268) 0x440c0a ADD %RDX,%R14 |
(268) 0x440c0d LEA (%R11,%RAX,1),%R10 |
(268) 0x440c11 ADD %RDX,%R10 |
(268) 0x440c14 LEA (%R15,%R10,8),%R12 |
(268) 0x440c18 MOV 0xf8(%RSP),%R15 |
(268) 0x440c20 LEA (%RBX,%RAX,1),%R11 |
(268) 0x440c24 MOV 0x108(%RSP),%R10 |
(268) 0x440c2c ADD %RDX,%R11 |
(268) 0x440c2f ADD %RAX,%RDI |
(268) 0x440c32 SAL $0x3,%RSI |
(268) 0x440c36 MOV %R11,0x128(%RSP) |
(268) 0x440c3e SAL $0x3,%RCX |
(268) 0x440c42 ADD %RAX,%R15 |
(268) 0x440c45 LEA (%R10,%RAX,1),%RBX |
(268) 0x440c49 MOV 0xe8(%RSP),%R10 |
(268) 0x440c51 VMOVUPD 0x8(%R9,%RCX,1),%YMM3 |
(268) 0x440c58 LEA (%R15,%RDX,1),%R11 |
(268) 0x440c5c VMOVQ %XMM16,%R15 |
(268) 0x440c62 ADD %RDX,%RDI |
(268) 0x440c65 ADD %RDX,%RBX |
(268) 0x440c68 VMOVUPD (%R15,%R8,8),%YMM13 |
(268) 0x440c6e VADDPD (%R15,%RSI,1),%YMM13,%YMM15 |
(268) 0x440c74 VMOVQ %XMM19,%R15 |
(268) 0x440c7a MOV %R11,0x120(%RSP) |
(268) 0x440c82 ADD %RAX,%R15 |
(268) 0x440c85 VMOVQ %XMM17,%R11 |
(268) 0x440c8b ADD %RDX,%R15 |
(268) 0x440c8e VMOVUPD (%R11,%R15,8),%YMM9 |
(268) 0x440c94 VMOVQ %XMM16,%R15 |
(268) 0x440c9a LEA (%R10,%RAX,1),%R10 |
(268) 0x440c9e VMOVUPD 0x8(%R15,%RSI,1),%YMM2 |
(268) 0x440ca5 VADDPD 0x8(%R15,%R8,8),%YMM2,%YMM6 |
(268) 0x440cac MOV 0xb8(%RSP),%R8 |
(268) 0x440cb4 VADDPD %YMM3,%YMM3,%YMM7 |
(268) 0x440cb8 VMOVUPD (%R9,%RCX,1),%YMM3 |
(268) 0x440cbe VMOVQ %XMM29,%RCX |
(268) 0x440cc4 MOV 0x130(%RSP),%R9 |
(268) 0x440ccc ADD %RDX,%R10 |
(268) 0x440ccf VMOVUPD (%RCX,%R14,8),%YMM2 |
(268) 0x440cd5 MOV 0x110(%RSP),%R14 |
(268) 0x440cdd VADDPD %YMM9,%YMM9,%YMM8 |
(268) 0x440ce2 ADD %R8,%RAX |
(268) 0x440ce5 ADD %RDX,%RAX |
(268) 0x440ce8 VMOVUPD (%R11,%RAX,8),%YMM13 |
(268) 0x440cee VMOVQ %XMM31,%R11 |
(268) 0x440cf4 VMULPD %YMM5,%YMM8,%YMM1 |
(268) 0x440cf8 VMOVUPD 0x8(%R13,%RDI,8),%YMM8 |
(268) 0x440cff VADDPD (%R13,%RDI,8),%YMM8,%YMM4 |
(268) 0x440d06 MOV 0x128(%RSP),%RDI |
(268) 0x440d0e VADDPD %YMM13,%YMM13,%YMM9 |
(268) 0x440d13 VMULPD %YMM4,%YMM9,%YMM5 |
(268) 0x440d17 VMOVUPD (%R14,%RDI,8),%YMM9 |
(268) 0x440d1d VADDPD (%R11,%R9,8),%YMM9,%YMM8 |
(268) 0x440d23 VDIVPD (%R12),%YMM8,%YMM4 |
(268) 0x440d29 MOV 0x100(%RSP),%R15 |
(268) 0x440d31 MOV 0x120(%RSP),%RSI |
(268) 0x440d39 VFMADD231PD %YMM6,%YMM7,%YMM5 |
(268) 0x440d3e VADDPD %YMM3,%YMM3,%YMM7 |
(268) 0x440d42 VFMADD132PD %YMM15,%YMM1,%YMM7 |
(268) 0x440d47 VSUBPD %YMM7,%YMM5,%YMM15 |
(268) 0x440d4b VDIVPD %YMM2,%YMM14,%YMM5 |
(268) 0x440d4f VMULPD %YMM0,%YMM15,%YMM1 |
(268) 0x440d53 VADDPD %YMM2,%YMM1,%YMM6 |
(268) 0x440d57 VDIVPD %YMM6,%YMM2,%YMM13 |
(268) 0x440d5b VMULPD %YMM5,%YMM4,%YMM3 |
(268) 0x440d5f VFNMADD213PD (%R15,%RBX,8),%YMM3,%YMM1 |
(268) 0x440d65 MOV 0xf0(%RSP),%RBX |
(268) 0x440d6d VMOVUPD %YMM1,(%RBX,%RSI,8) |
(268) 0x440d72 VMULPD (%R12),%YMM13,%YMM7 |
(268) 0x440d78 MOV 0xe0(%RSP),%R12 |
(268) 0x440d80 VMOVUPD %YMM7,(%R12,%R10,8) |
(268) 0x440d86 MOV 0xa4(%RSP),%R10D |
(268) 0x440d8e TEST $0x3,%R10B |
(268) 0x440d92 JE 440fcf |
(268) 0x440d98 AND $-0x4,%R10D |
(268) 0x440d9c ADD %R10D,0x138(%RSP) |
(268) 0x440da4 ADD %R10D,0xa8(%RSP) |
(268) 0x440dac MOVSXD 0xa8(%RSP),%R8 |
(268) 0x440db4 VMOVQ %XMM18,%RDX |
(268) 0x440dba VMOVQ %XMM27,%RAX |
(268) 0x440dc0 MOV 0xb0(%RSP),%R11 |
(268) 0x440dc8 LEA (%RDX,%RAX,8),%RDI |
(268) 0x440dcc MOV 0xc0(%RSP),%RAX |
(268) 0x440dd4 VMOVQ %XMM16,%RCX |
(268) 0x440dda VMOVQ %XMM20,%R14 |
(268) 0x440de0 VMOVQ %XMM19,%R15 |
(268) 0x440de6 VMOVQ %XMM25,%RBX |
(268) 0x440dec LEA (%RCX,%R14,8),%RSI |
(268) 0x440df0 VMOVQ %XMM17,%R12 |
(268) 0x440df6 VMOVQ %XMM17,%R9 |
(268) 0x440dfc LEA (%R13,%RBX,8),%RDX |
(268) 0x440e01 LEA (%R9,%R15,8),%R9 |
(268) 0x440e05 VMOVQ %XMM28,%R14 |
(268) 0x440e0b VMOVQ %XMM31,%R15 |
(268) 0x440e11 MOV %R8,%R10 |
(268) 0x440e14 MOV %R8,0x128(%RSP) |
(268) 0x440e1c MOV 0xb8(%RSP),%R8 |
(268) 0x440e24 LEA (%RCX,%R11,8),%RCX |
(268) 0x440e28 LEA (%R13,%RAX,8),%RBX |
(268) 0x440e2d VMOVQ %XMM29,%R13 |
(268) 0x440e33 MOV 0xd0(%RSP),%RAX |
(268) 0x440e3b LEA (%R12,%R8,8),%R11 |
(268) 0x440e3f VMOVQ %XMM30,%R8 |
(268) 0x440e45 LEA (%R13,%R14,8),%R12 |
(268) 0x440e4a MOV 0xc8(%RSP),%R14 |
(268) 0x440e52 LEA (%R15,%R8,8),%R13 |
(268) 0x440e56 MOV 0xd8(%RSP),%R8 |
(268) 0x440e5e MOV 0x110(%RSP),%R15 |
(268) 0x440e66 LEA (%RAX,%R14,8),%R14 |
(268) 0x440e6a MOV 0x100(%RSP),%RAX |
(268) 0x440e72 LEA (%R15,%R8,8),%R15 |
(268) 0x440e76 MOV 0x108(%RSP),%R8 |
(268) 0x440e7e VMOVQ %R15,%XMM15 |
(268) 0x440e83 MOV %R12,%R15 |
(268) 0x440e86 MOV %R9,%R12 |
(268) 0x440e89 LEA (%RAX,%R8,8),%RAX |
(268) 0x440e8d MOV 0xf8(%RSP),%R8 |
(268) 0x440e95 VMOVQ %RAX,%XMM13 |
(268) 0x440e9a MOV 0xf0(%RSP),%RAX |
(268) 0x440ea2 LEA (%RAX,%R8,8),%RAX |
(268) 0x440ea6 MOV 0xe8(%RSP),%R8 |
(268) 0x440eae VMOVQ %RAX,%XMM9 |
(268) 0x440eb3 MOV 0xe0(%RSP),%RAX |
(268) 0x440ebb LEA (%RAX,%R8,8),%R8 |
(268) 0x440ebf MOV 0x138(%RSP),%EAX |
(268) 0x440ec6 SUB %R10D,%EAX |
(268) 0x440ec9 MOV %R13,%R10 |
(268) 0x440ecc MOV %R11,%R13 |
(268) 0x440ecf MOV %R8,%R11 |
(268) 0x440ed2 MOV %EAX,0x130(%RSP) |
(268) 0x440ed9 MOV 0x128(%RSP),%RAX |
(268) 0x440ee1 NOPW %CS:(%RAX,%RAX,1) |
(268) 0x440eec NOPL (%RAX) |
(269) 0x440ef0 VMOVSD (%RSI,%RAX,8),%XMM1 |
(269) 0x440ef5 VMOVSD (%RDX,%RAX,8),%XMM2 |
(269) 0x440efa VADDSD (%RCX,%RAX,8),%XMM1,%XMM8 |
(269) 0x440eff VADDSD 0x8(%RDX,%RAX,8),%XMM2,%XMM6 |
(269) 0x440f05 VMOVSD (%R12,%RAX,8),%XMM4 |
(269) 0x440f0b VMOVSD 0x8(%RDI,%RAX,8),%XMM7 |
(269) 0x440f11 VMOVSD 0x8(%RSI,%RAX,8),%XMM1 |
(269) 0x440f17 VADDSD %XMM4,%XMM4,%XMM5 |
(269) 0x440f1b VADDSD %XMM7,%XMM7,%XMM2 |
(269) 0x440f1f VADDSD 0x8(%RCX,%RAX,8),%XMM1,%XMM4 |
(269) 0x440f25 VMOVSD (%R13,%RAX,8),%XMM7 |
(269) 0x440f2c VMULSD %XMM5,%XMM6,%XMM3 |
(269) 0x440f30 VADDSD %XMM7,%XMM7,%XMM1 |
(269) 0x440f34 VMOVSD (%RBX,%RAX,8),%XMM6 |
(269) 0x440f39 VADDSD 0x8(%RBX,%RAX,8),%XMM6,%XMM5 |
(269) 0x440f3f VMULSD %XMM1,%XMM5,%XMM6 |
(269) 0x440f43 VFMADD132SD %XMM2,%XMM6,%XMM4 |
(269) 0x440f48 VMOVSD (%RDI,%RAX,8),%XMM2 |
(269) 0x440f4d VADDSD %XMM2,%XMM2,%XMM5 |
(269) 0x440f51 VFMADD132SD %XMM8,%XMM3,%XMM5 |
(269) 0x440f56 VMOVQ %XMM15,%R9 |
(269) 0x440f5b VMOVSD (%R10,%RAX,8),%XMM1 |
(269) 0x440f61 VMOVSD (%R15,%RAX,8),%XMM7 |
(269) 0x440f67 VADDSD (%R9,%RAX,8),%XMM1,%XMM2 |
(269) 0x440f6d VMOVQ %XMM13,%R8 |
(269) 0x440f72 VMOVQ %XMM9,%R9 |
(269) 0x440f77 VSUBSD %XMM5,%XMM4,%XMM8 |
(269) 0x440f7b VDIVSD (%R14,%RAX,8),%XMM2,%XMM5 |
(269) 0x440f81 VMULSD %XMM21,%XMM8,%XMM3 |
(269) 0x440f87 VDIVSD %XMM7,%XMM10,%XMM8 |
(269) 0x440f8b VADDSD %XMM7,%XMM3,%XMM4 |
(269) 0x440f8f VDIVSD %XMM4,%XMM7,%XMM6 |
(269) 0x440f93 VMULSD %XMM8,%XMM5,%XMM7 |
(269) 0x440f98 VFNMADD213SD (%R8,%RAX,8),%XMM7,%XMM3 |
(269) 0x440f9e VMOVSD %XMM3,(%R9,%RAX,8) |
(269) 0x440fa4 MOV 0x130(%RSP),%R9D |
(269) 0x440fac MOV 0x13c(%RSP),%R8D |
(269) 0x440fb4 VMULSD (%R14,%RAX,8),%XMM6,%XMM3 |
(269) 0x440fba VMOVSD %XMM3,(%R11,%RAX,8) |
(269) 0x440fc0 INC %RAX |
(269) 0x440fc3 ADD %EAX,%R9D |
(269) 0x440fc6 CMP %R8D,%R9D |
(269) 0x440fc9 JB 440ef0 |
(268) 0x440fcf MOV 0x13c(%RSP),%EDI |
(268) 0x440fd6 MOV %EDI,0x138(%RSP) |
(268) 0x440fdd INCQ 0x118(%RSP) |
(268) 0x440fe5 MOV 0x118(%RSP),%RSI |
(268) 0x440fed ADD $0,%ESI |
(268) 0x440ff0 CMP %ESI,0x9c(%RSP) |
(268) 0x440ff7 JLE 441030 |
(268) 0x440ff9 MOV 0x94(%RSP),%ECX |
(268) 0x441000 MOV 0x138(%RSP),%EDX |
(268) 0x441007 MOV 0x98(%RSP),%EBX |
(268) 0x44100e MOV 0xa0(%RSP),%EAX |
(268) 0x441015 SUB %EDX,%ECX |
(268) 0x441017 MOV %EBX,0x120(%RSP) |
(268) 0x44101e JMP 4406a0 |
0x441023 NOPW %CS:(%RAX,%RAX,1) |
0x44102e XCHG %AX,%AX |
0x441030 VZEROUPPER |
0x441033 LEA -0x28(%RBP),%RSP |
0x441037 POP %RBX |
0x441038 POP %R12 |
0x44103a POP %R13 |
0x44103c POP %R14 |
0x44103e POP %R15 |
0x441040 POP %RBP |
0x441041 RET |
0x441042 NOPW %CS:(%RAX,%RAX,1) |
0x44104d NOPL (%RAX) |
(268) 0x441050 MOV 0x120(%RSP),%R9D |
(268) 0x441058 XOR %EDX,%EDX |
(268) 0x44105a MOV %R9D,0xa8(%RSP) |
(268) 0x441062 JMP 440b58 |
0x441067 INC %ECX |
0x441069 XOR %EDX,%EDX |
0x44106b JMP 4405b1 |
Path / |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 103 |
nb uops | 103 |
loop length | 438 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 17.17 cycles |
front end | 17.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 13.67 | 13.67 | 13.67 | 0.50 | 2.50 | 2.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 13.67 | 13.67 | 13.67 | 0.50 | 2.50 | 2.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 17.17 |
Dispatch | 13.67 |
DIV/SQRT | 12.00 |
Overall L1 | 17.17 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x9c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xa0(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 441067 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb47> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%RAX,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,0x94(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x98(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
DIVL 0xa0(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV 0x18(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD 0x20d1a(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM0,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x1ff84(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM21,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM10,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %R10D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RBX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %R11D,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4405b1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 103 |
nb uops | 103 |
loop length | 438 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 17.17 cycles |
front end | 17.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 13.67 | 13.67 | 13.67 | 0.50 | 2.50 | 2.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 13.67 | 13.67 | 13.67 | 0.50 | 2.50 | 2.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 17.17 |
Dispatch | 13.67 |
DIV/SQRT | 12.00 |
Overall L1 | 17.17 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x2,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x9c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R15D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14D,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xa0(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 441067 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb47> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%RAX,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9D,0x94(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 441033 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb13> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x98(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x50(%RBX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
DIVL 0xa0(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV 0x18(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RBX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RBX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RBX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD 0x20d1a(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM0,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x1ff84(%RIP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM21,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM21,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM10,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R15,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RBX),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %R10D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RBX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %R11D,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4405b1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0– | 5.43 | 2.73 |
▼Loop 268 - PdV.cpp:50-63 - exec– | 0.03 | 0.01 |
○Loop 270 - PdV.cpp:55-63 - exec | 5.41 | 2.72 |
○Loop 269 - PdV.cpp:55-63 - exec | 0 | 0 |