Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.34% |
---|
Function: PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D<d ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.34% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
0x43e430 PUSH %RBP |
0x43e431 MOV %RSP,%RBP |
0x43e434 PUSH %R15 |
0x43e436 PUSH %R14 |
0x43e438 PUSH %R13 |
0x43e43a PUSH %R12 |
0x43e43c PUSH %RBX |
0x43e43d MOV %RDI,%RBX |
0x43e440 AND $-0x40,%RSP |
0x43e444 SUB $0x1c0,%RSP |
0x43e44b MOV 0x68(%RDI),%EAX |
0x43e44e MOV 0x6c(%RDI),%EDX |
0x43e451 MOV 0x60(%RDI),%EDI |
0x43e454 MOV 0x64(%RBX),%ECX |
0x43e457 ADD $0x2,%EDX |
0x43e45a LEA 0x1(%RAX),%R15D |
0x43e45e LEA 0x1(%RDI),%ESI |
0x43e461 MOV %EDX,0x80(%RSP) |
0x43e468 MOV %ESI,0x7c(%RSP) |
0x43e46c CMP %EDX,%R15D |
0x43e46f JGE 43ef5b |
0x43e475 MOV %EDX,%R13D |
0x43e478 LEA 0x2(%RCX),%R14D |
0x43e47c SUB %R15D,%R13D |
0x43e47f CMP %R14D,%ESI |
0x43e482 JGE 43ef5b |
0x43e488 MOV %R14D,%R8D |
0x43e48b SUB %ESI,%R8D |
0x43e48e MOV %R8D,0x84(%RSP) |
0x43e496 CALL 404650 <omp_get_num_threads@plt> |
0x43e49b MOV %EAX,%R12D |
0x43e49e CALL 404540 <omp_get_thread_num@plt> |
0x43e4a3 XOR %EDX,%EDX |
0x43e4a5 MOV %EAX,%R9D |
0x43e4a8 MOV 0x84(%RSP),%EAX |
0x43e4af IMUL %R13D,%EAX |
0x43e4b3 DIV %R12D |
0x43e4b6 MOV %EAX,%ECX |
0x43e4b8 CMP %EDX,%R9D |
0x43e4bb JB 43ef87 |
0x43e4c1 IMUL %ECX,%R9D |
0x43e4c5 LEA (%R9,%RDX,1),%EAX |
0x43e4c9 LEA (%RCX,%RAX,1),%R10D |
0x43e4cd MOV %EAX,0x1b8(%RSP) |
0x43e4d4 MOV %R10D,0x78(%RSP) |
0x43e4d9 CMP %R10D,%EAX |
0x43e4dc JAE 43ef5b |
0x43e4e2 XOR %EDX,%EDX |
0x43e4e4 MOV 0x7c(%RSP),%R11D |
0x43e4e9 VMOVSD 0x20f2f(%RIP),%XMM2 |
0x43e4f1 DIVL 0x84(%RSP) |
0x43e4f8 MOV 0x8(%RBX),%RSI |
0x43e4fc MOV 0x10(%RBX),%R13 |
0x43e500 MOV 0x58(%RBX),%R8 |
0x43e504 MOV 0x18(%RBX),%R12 |
0x43e508 MOV 0x40(%RBX),%R9 |
0x43e50c MOV 0x20(%RBX),%R10 |
0x43e510 MOV %RSI,0x70(%RSP) |
0x43e515 VMOVSD 0x2007b(%RIP),%XMM3 |
0x43e51d MOV %R13,0x60(%RSP) |
0x43e522 MOV %R8,0x58(%RSP) |
0x43e527 MOV %R12,0x50(%RSP) |
0x43e52c MOV %R9,0x48(%RSP) |
0x43e531 MOV %R10,0x40(%RSP) |
0x43e536 VMULSD (%RBX),%XMM2,%XMM2 |
0x43e53a VBROADCASTSD %XMM3,%YMM6 |
0x43e53f VBROADCASTSD %XMM3,%ZMM4 |
0x43e545 VBROADCASTSD %XMM2,%YMM1 |
0x43e54a VBROADCASTSD %XMM2,%ZMM5 |
0x43e550 LEA (%RDX,%R11,1),%EDI |
0x43e554 LEA (%RAX,%R15,1),%R15D |
0x43e558 MOV %R14D,%EAX |
0x43e55b MOV 0x48(%RBX),%RDX |
0x43e55f MOV 0x50(%RBX),%R14 |
0x43e563 MOV 0x30(%RBX),%R11 |
0x43e567 MOV %EDI,0x148(%RSP) |
0x43e56e SUB %EDI,%EAX |
0x43e570 MOV 0x38(%RBX),%RDI |
0x43e574 MOV 0x28(%RBX),%RBX |
0x43e578 MOVSXD %R15D,%R15 |
0x43e57b MOV %RDX,0x38(%RSP) |
0x43e580 MOV %R14,0x68(%RSP) |
0x43e585 MOV %R11,0x30(%RSP) |
0x43e58a MOV %RDI,0x28(%RSP) |
0x43e58f MOV %RBX,0x20(%RSP) |
0x43e594 MOV %R15,0x140(%RSP) |
0x43e59c NOPL (%RAX) |
(271) 0x43e5a0 CMP %EAX,%ECX |
(271) 0x43e5a2 CMOVBE %ECX,%EAX |
(271) 0x43e5a5 MOV 0x1b8(%RSP),%ECX |
(271) 0x43e5ac MOV %EAX,0x14c(%RSP) |
(271) 0x43e5b3 ADD %ECX,%EAX |
(271) 0x43e5b5 MOV %EAX,0x1bc(%RSP) |
(271) 0x43e5bc CMP %EAX,%ECX |
(271) 0x43e5be JAE 43ef0e |
(271) 0x43e5c4 MOV 0x68(%RSP),%R14 |
(271) 0x43e5c9 MOV 0x140(%RSP),%RDI |
(271) 0x43e5d1 MOV 0x70(%RSP),%RAX |
(271) 0x43e5d6 MOV 0x58(%RSP),%R11 |
(271) 0x43e5db MOV (%R14),%R8 |
(271) 0x43e5de MOV %RDI,%RCX |
(271) 0x43e5e1 MOV 0x60(%RSP),%R9 |
(271) 0x43e5e6 MOV %RDI,%RBX |
(271) 0x43e5e9 MOV 0x10(%RAX),%R13 |
(271) 0x43e5ed MOV (%RAX),%RSI |
(271) 0x43e5f0 IMUL %R8,%RCX |
(271) 0x43e5f4 MOV (%R11),%RAX |
(271) 0x43e5f7 MOV 0x10(%R11),%R15 |
(271) 0x43e5fb MOV %RDI,%R11 |
(271) 0x43e5fe MOV (%R9),%RDX |
(271) 0x43e601 MOV 0x10(%R9),%R10 |
(271) 0x43e605 IMUL %RDI,%RSI |
(271) 0x43e609 MOV %R13,0x90(%RSP) |
(271) 0x43e611 IMUL %RAX,%R11 |
(271) 0x43e615 MOV %R15,0x1b0(%RSP) |
(271) 0x43e61d MOV 0x48(%RSP),%R15 |
(271) 0x43e622 ADD %RCX,%R8 |
(271) 0x43e625 IMUL %RDX,%RBX |
(271) 0x43e629 MOV 0x10(%R14),%R12 |
(271) 0x43e62d MOV %R10,0xc0(%RSP) |
(271) 0x43e635 MOV %R8,0xb8(%RSP) |
(271) 0x43e63d MOV 0x50(%RSP),%R8 |
(271) 0x43e642 LEA (%RAX,%R11,1),%R14 |
(271) 0x43e646 MOV %RDI,%RAX |
(271) 0x43e649 MOV 0x10(%R15),%R10 |
(271) 0x43e64d MOV %RSI,0x88(%RSP) |
(271) 0x43e655 IMUL (%R8),%RDI |
(271) 0x43e659 MOV 0x10(%R8),%R9 |
(271) 0x43e65d ADD %RBX,%RDX |
(271) 0x43e660 MOV %R14,0xd0(%RSP) |
(271) 0x43e668 MOV %R10,0xe8(%RSP) |
(271) 0x43e670 MOV %R9,0xd8(%RSP) |
(271) 0x43e678 MOV %R12,0x1a8(%RSP) |
(271) 0x43e680 MOV %RCX,0x98(%RSP) |
(271) 0x43e688 MOV %RBX,0xa0(%RSP) |
(271) 0x43e690 MOV %R11,0xa8(%RSP) |
(271) 0x43e698 MOV %RDX,0xc8(%RSP) |
(271) 0x43e6a0 MOV %RDI,0xb0(%RSP) |
(271) 0x43e6a8 MOV (%R15),%RDX |
(271) 0x43e6ab MOV 0x40(%RSP),%R9 |
(271) 0x43e6b0 MOV 0x38(%RSP),%R15 |
(271) 0x43e6b5 IMUL %RAX,%RDX |
(271) 0x43e6b9 MOV (%R9),%R8 |
(271) 0x43e6bc MOV 0x10(%R9),%R14 |
(271) 0x43e6c0 MOV 0x10(%R15),%R10 |
(271) 0x43e6c4 IMUL %RAX,%R8 |
(271) 0x43e6c8 MOV %R14,0xf8(%RSP) |
(271) 0x43e6d0 MOV 0x30(%RSP),%R14 |
(271) 0x43e6d5 MOV %RDX,0xe0(%RSP) |
(271) 0x43e6dd MOV (%R15),%RDX |
(271) 0x43e6e0 MOV 0x28(%RSP),%R15 |
(271) 0x43e6e5 MOV 0x10(%R14),%R9 |
(271) 0x43e6e9 MOV %R10,0x138(%RSP) |
(271) 0x43e6f1 MOV %R8,0xf0(%RSP) |
(271) 0x43e6f9 MOV (%R14),%R8 |
(271) 0x43e6fc IMUL %RAX,%RDX |
(271) 0x43e700 MOV 0x10(%R15),%R10 |
(271) 0x43e704 MOV (%R15),%R14 |
(271) 0x43e707 MOV %R9,0x128(%RSP) |
(271) 0x43e70f IMUL %RAX,%R8 |
(271) 0x43e713 MOV 0x20(%RSP),%R15 |
(271) 0x43e718 IMUL %RAX,%R14 |
(271) 0x43e71c MOV %R10,0x118(%RSP) |
(271) 0x43e724 MOV 0x10(%R15),%R9 |
(271) 0x43e728 MOV %RDX,0x100(%RSP) |
(271) 0x43e730 MOV %R8,0x130(%RSP) |
(271) 0x43e738 MOV (%R15),%R8 |
(271) 0x43e73b MOV %R14,0x120(%RSP) |
(271) 0x43e743 IMUL %RAX,%R8 |
(271) 0x43e747 MOV 0x14c(%RSP),%EAX |
(271) 0x43e74e MOV %R9,0x108(%RSP) |
(271) 0x43e756 LEA -0x1(%RAX),%R10D |
(271) 0x43e75a MOV %R8,0x110(%RSP) |
(271) 0x43e762 CMP $0x6,%R10D |
(271) 0x43e766 JBE 43ef70 |
(271) 0x43e76c MOVSXD 0x148(%RSP),%RAX |
(271) 0x43e774 MOV 0xb8(%RSP),%R9 |
(271) 0x43e77c MOV %R12,%R8 |
(271) 0x43e77f LEA (%R9,%RAX,1),%R10 |
(271) 0x43e783 ADD %RAX,%RSI |
(271) 0x43e786 LEA (%R11,%RAX,1),%R9 |
(271) 0x43e78a ADD %RAX,%RCX |
(271) 0x43e78d SAL $0x3,%RSI |
(271) 0x43e791 SAL $0x3,%R10 |
(271) 0x43e795 LEA (%R12,%R10,1),%RDX |
(271) 0x43e799 SAL $0x3,%RCX |
(271) 0x43e79d LEA (%R13,%RSI,1),%R15 |
(271) 0x43e7a2 SAL $0x3,%R9 |
(271) 0x43e7a6 LEA 0x8(%R13,%RSI,1),%R13 |
(271) 0x43e7ab LEA 0x8(%R8,%R10,1),%RSI |
(271) 0x43e7b0 MOV 0xc0(%RSP),%R10 |
(271) 0x43e7b8 MOV %RDX,0x160(%RSP) |
(271) 0x43e7c0 LEA (%R12,%RCX,1),%R14 |
(271) 0x43e7c4 MOV %RSI,0x198(%RSP) |
(271) 0x43e7cc LEA 0x8(%R12,%RCX,1),%R12 |
(271) 0x43e7d1 MOV 0x1b0(%RSP),%RSI |
(271) 0x43e7d9 LEA (%RBX,%RAX,1),%RCX |
(271) 0x43e7dd MOV 0xc8(%RSP),%RDX |
(271) 0x43e7e5 LEA (%R10,%RCX,8),%RBX |
(271) 0x43e7e9 MOV 0xd0(%RSP),%RCX |
(271) 0x43e7f1 LEA (%RSI,%R9,1),%R11 |
(271) 0x43e7f5 MOV %R11,0x1a0(%RSP) |
(271) 0x43e7fd LEA 0x8(%RSI,%R9,1),%R11 |
(271) 0x43e802 LEA (%RDX,%RAX,1),%R8 |
(271) 0x43e806 LEA (%RCX,%RAX,1),%R9 |
(271) 0x43e80a MOV 0xe0(%RSP),%RCX |
(271) 0x43e812 LEA (%R10,%R8,8),%R10 |
(271) 0x43e816 SAL $0x3,%R9 |
(271) 0x43e81a LEA (%RSI,%R9,1),%RDX |
(271) 0x43e81e LEA 0x8(%RSI,%R9,1),%R9 |
(271) 0x43e823 ADD %RAX,%RCX |
(271) 0x43e826 LEA (%RDI,%RAX,1),%RSI |
(271) 0x43e82a MOV %RDX,0x190(%RSP) |
(271) 0x43e832 MOV 0xd8(%RSP),%RDI |
(271) 0x43e83a MOV 0xe8(%RSP),%RDX |
(271) 0x43e842 LEA (%RDI,%RSI,8),%R8 |
(271) 0x43e846 MOV 0xf0(%RSP),%RDI |
(271) 0x43e84e LEA (%RDX,%RCX,8),%RSI |
(271) 0x43e852 MOV 0xf8(%RSP),%RDX |
(271) 0x43e85a MOV %RSI,0x188(%RSP) |
(271) 0x43e862 MOV 0x100(%RSP),%RSI |
(271) 0x43e86a LEA (%RDI,%RAX,1),%RCX |
(271) 0x43e86e LEA (%RDX,%RCX,8),%RDX |
(271) 0x43e872 MOV 0x138(%RSP),%RDI |
(271) 0x43e87a LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e87e MOV 0x130(%RSP),%RSI |
(271) 0x43e886 LEA (%RDI,%RCX,8),%RDI |
(271) 0x43e88a LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e88e MOV 0x128(%RSP),%RSI |
(271) 0x43e896 LEA (%RSI,%RCX,8),%RCX |
(271) 0x43e89a MOV 0x120(%RSP),%RSI |
(271) 0x43e8a2 MOV %RCX,0x180(%RSP) |
(271) 0x43e8aa LEA (%RSI,%RAX,1),%RCX |
(271) 0x43e8ae MOV 0x118(%RSP),%RSI |
(271) 0x43e8b6 LEA (%RSI,%RCX,8),%RCX |
(271) 0x43e8ba MOV 0x110(%RSP),%RSI |
(271) 0x43e8c2 MOV %RCX,0x158(%RSP) |
(271) 0x43e8ca MOV 0x108(%RSP),%RCX |
(271) 0x43e8d2 ADD %RSI,%RAX |
(271) 0x43e8d5 LEA (%RCX,%RAX,8),%RSI |
(271) 0x43e8d9 MOV 0x14c(%RSP),%ECX |
(271) 0x43e8e0 XOR %EAX,%EAX |
(271) 0x43e8e2 SHR $0x3,%ECX |
(271) 0x43e8e5 SAL $0x6,%RCX |
(271) 0x43e8e9 MOV %RCX,0x150(%RSP) |
(271) 0x43e8f1 NOPL (%RAX) |
(273) 0x43e8f8 MOV 0x160(%RSP),%RCX |
(273) 0x43e900 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(273) 0x43e907 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(273) 0x43e90f VMOVUPD (%RCX,%RAX,1),%ZMM7 |
(273) 0x43e916 MOV 0x1a0(%RSP),%RCX |
(273) 0x43e91e VADDPD %ZMM9,%ZMM9,%ZMM11 |
(273) 0x43e924 VMOVUPD (%R10,%RAX,1),%ZMM9 |
(273) 0x43e92b VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(273) 0x43e932 MOV 0x198(%RSP),%RCX |
(273) 0x43e93a VADDPD (%R14,%RAX,1),%ZMM7,%ZMM10 |
(273) 0x43e941 VADDPD %ZMM12,%ZMM12,%ZMM7 |
(273) 0x43e947 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(273) 0x43e94e MOV 0x190(%RSP),%RCX |
(273) 0x43e956 VADDPD (%R11,%RAX,1),%ZMM0,%ZMM8 |
(273) 0x43e95d VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(273) 0x43e964 VADDPD (%R12,%RAX,1),%ZMM13,%ZMM15 |
(273) 0x43e96b VMOVUPD (%R15,%RAX,1),%ZMM13 |
(273) 0x43e972 MOV 0x188(%RSP),%RCX |
(273) 0x43e97a VMULPD %ZMM11,%ZMM8,%ZMM14 |
(273) 0x43e980 VADDPD (%R9,%RAX,1),%ZMM0,%ZMM8 |
(273) 0x43e987 VADDPD %ZMM9,%ZMM9,%ZMM11 |
(273) 0x43e98d VMOVUPD (%RCX,%RAX,1),%ZMM9 |
(273) 0x43e994 MOV 0x180(%RSP),%RCX |
(273) 0x43e99c VMULPD %ZMM11,%ZMM8,%ZMM12 |
(273) 0x43e9a2 VADDPD (%RDI,%RAX,1),%ZMM9,%ZMM11 |
(273) 0x43e9a9 VMOVUPD (%R8,%RAX,1),%ZMM8 |
(273) 0x43e9b0 VFMADD132PD %ZMM15,%ZMM12,%ZMM7 |
(273) 0x43e9b6 VADDPD %ZMM13,%ZMM13,%ZMM15 |
(273) 0x43e9bc VDIVPD (%RDX,%RAX,1),%ZMM11,%ZMM12 |
(273) 0x43e9c3 VDIVPD %ZMM8,%ZMM4,%ZMM13 |
(273) 0x43e9c9 VFMADD231PD %ZMM10,%ZMM15,%ZMM14 |
(273) 0x43e9cf VMULPD %ZMM13,%ZMM12,%ZMM15 |
(273) 0x43e9d5 VSUBPD %ZMM14,%ZMM7,%ZMM10 |
(273) 0x43e9db VMULPD %ZMM5,%ZMM10,%ZMM0 |
(273) 0x43e9e1 VADDPD %ZMM8,%ZMM0,%ZMM14 |
(273) 0x43e9e7 VFNMADD213PD (%RCX,%RAX,1),%ZMM15,%ZMM0 |
(273) 0x43e9ee MOV 0x158(%RSP),%RCX |
(273) 0x43e9f6 VDIVPD %ZMM14,%ZMM8,%ZMM7 |
(273) 0x43e9fc VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(273) 0x43ea03 MOV 0x150(%RSP),%RCX |
(273) 0x43ea0b VMULPD (%RDX,%RAX,1),%ZMM7,%ZMM10 |
(273) 0x43ea12 VMOVUPD %ZMM10,(%RSI,%RAX,1) |
(273) 0x43ea19 ADD $0x40,%RAX |
(273) 0x43ea1d CMP %RCX,%RAX |
(273) 0x43ea20 JNE 43e8f8 |
(271) 0x43ea26 MOV 0x14c(%RSP),%R15D |
(271) 0x43ea2e MOV 0x148(%RSP),%R14D |
(271) 0x43ea36 MOV %R15D,%EDX |
(271) 0x43ea39 AND $-0x8,%EDX |
(271) 0x43ea3c LEA (%RDX,%R14,1),%R13D |
(271) 0x43ea40 ADD %EDX,0x1b8(%RSP) |
(271) 0x43ea47 MOV %R13D,0x190(%RSP) |
(271) 0x43ea4f TEST $0x7,%R15B |
(271) 0x43ea53 JE 43eefe |
(271) 0x43ea59 MOV 0x14c(%RSP),%R12D |
(271) 0x43ea61 SUB %EDX,%R12D |
(271) 0x43ea64 MOV %R12D,0x188(%RSP) |
(271) 0x43ea6c DEC %R12D |
(271) 0x43ea6f CMP $0x2,%R12D |
(271) 0x43ea73 JBE 43ecca |
(271) 0x43ea79 MOVSXD 0x148(%RSP),%RAX |
(271) 0x43ea81 MOV 0xb8(%RSP),%R11 |
(271) 0x43ea89 MOV 0xa8(%RSP),%R10 |
(271) 0x43ea91 MOV 0xb0(%RSP),%R15 |
(271) 0x43ea99 LEA (%R11,%RAX,1),%R8 |
(271) 0x43ea9d MOV 0xf0(%RSP),%R11 |
(271) 0x43eaa5 MOV 0xe0(%RSP),%R14 |
(271) 0x43eaad LEA (%R10,%RAX,1),%R9 |
(271) 0x43eab1 LEA (%R15,%RAX,1),%R13 |
(271) 0x43eab5 MOV 0xf8(%RSP),%R15 |
(271) 0x43eabd MOV 0x88(%RSP),%RBX |
(271) 0x43eac5 LEA (%R11,%RAX,1),%R10 |
(271) 0x43eac9 LEA (%R14,%RAX,1),%R12 |
(271) 0x43eacd MOV 0x100(%RSP),%R14 |
(271) 0x43ead5 MOV 0x98(%RSP),%RDI |
(271) 0x43eadd ADD %RDX,%R10 |
(271) 0x43eae0 LEA (%RBX,%RAX,1),%RCX |
(271) 0x43eae4 LEA (%R12,%RDX,1),%RBX |
(271) 0x43eae8 ADD %RDX,%R8 |
(271) 0x43eaeb LEA (%R15,%R10,8),%R15 |
(271) 0x43eaef MOV 0x110(%RSP),%R10 |
(271) 0x43eaf7 LEA (%R14,%RAX,1),%R12 |
(271) 0x43eafb ADD %RAX,%RDI |
(271) 0x43eafe ADD %RDX,%RDI |
(271) 0x43eb01 ADD %RDX,%R9 |
(271) 0x43eb04 MOV %RBX,0x1a0(%RSP) |
(271) 0x43eb0c MOV 0xd0(%RSP),%RSI |
(271) 0x43eb14 LEA (%R10,%RAX,1),%R14 |
(271) 0x43eb18 MOV 0x130(%RSP),%RBX |
(271) 0x43eb20 MOV 0x120(%RSP),%R11 |
(271) 0x43eb28 ADD %RDX,%RCX |
(271) 0x43eb2b LEA (%R14,%RDX,1),%R10 |
(271) 0x43eb2f MOV 0x1a8(%RSP),%R14 |
(271) 0x43eb37 LEA (%RSI,%RAX,1),%RSI |
(271) 0x43eb3b ADD %RDX,%R13 |
(271) 0x43eb3e MOV %R10,0x198(%RSP) |
(271) 0x43eb46 MOV 0xa0(%RSP),%R10 |
(271) 0x43eb4e LEA (%RBX,%RAX,1),%RBX |
(271) 0x43eb52 ADD %RAX,%R11 |
(271) 0x43eb55 VMOVUPD (%R14,%R8,8),%YMM0 |
(271) 0x43eb5b ADD %RDX,%RSI |
(271) 0x43eb5e ADD %RDX,%R12 |
(271) 0x43eb61 ADD %RDX,%RBX |
(271) 0x43eb64 ADD %RDX,%R11 |
(271) 0x43eb67 VADDPD (%R14,%RDI,8),%YMM0,%YMM14 |
(271) 0x43eb6d LEA (%R10,%RAX,1),%R14 |
(271) 0x43eb71 MOV 0xc0(%RSP),%R10 |
(271) 0x43eb79 ADD %RDX,%R14 |
(271) 0x43eb7c VMOVUPD (%R10,%R14,8),%YMM8 |
(271) 0x43eb82 MOV 0x1b0(%RSP),%R14 |
(271) 0x43eb8a VMOVUPD (%R14,%R9,8),%YMM7 |
(271) 0x43eb90 VADDPD %YMM8,%YMM8,%YMM9 |
(271) 0x43eb95 VADDPD 0x8(%R14,%R9,8),%YMM7,%YMM11 |
(271) 0x43eb9c MOV 0x1a8(%RSP),%R14 |
(271) 0x43eba4 MOV 0x90(%RSP),%R9 |
(271) 0x43ebac VMOVUPD 0x8(%R14,%R8,8),%YMM15 |
(271) 0x43ebb3 VMOVUPD 0x8(%R9,%RCX,8),%YMM13 |
(271) 0x43ebba VMULPD %YMM11,%YMM9,%YMM12 |
(271) 0x43ebbf VADDPD 0x8(%R14,%RDI,8),%YMM15,%YMM10 |
(271) 0x43ebc6 MOV 0xc8(%RSP),%RDI |
(271) 0x43ebce VADDPD %YMM13,%YMM13,%YMM0 |
(271) 0x43ebd3 ADD %RDI,%RAX |
(271) 0x43ebd6 ADD %RDX,%RAX |
(271) 0x43ebd9 VMOVUPD (%R10,%RAX,8),%YMM8 |
(271) 0x43ebdf MOV 0x1b0(%RSP),%RDX |
(271) 0x43ebe7 VMOVUPD (%R9,%RCX,8),%YMM15 |
(271) 0x43ebed MOV 0xd8(%RSP),%RAX |
(271) 0x43ebf5 VMOVUPD (%RDX,%RSI,8),%YMM7 |
(271) 0x43ebfa VADDPD %YMM8,%YMM8,%YMM9 |
(271) 0x43ebff MOV 0x138(%RSP),%RCX |
(271) 0x43ec07 MOV 0xe8(%RSP),%R8 |
(271) 0x43ec0f VADDPD 0x8(%RDX,%RSI,8),%YMM7,%YMM11 |
(271) 0x43ec15 MOV 0x1a0(%RSP),%RSI |
(271) 0x43ec1d VMOVAPD %YMM7,0x160(%RSP) |
(271) 0x43ec26 VMULPD %YMM11,%YMM9,%YMM13 |
(271) 0x43ec2b VMOVUPD (%RCX,%R12,8),%YMM9 |
(271) 0x43ec31 MOV 0x118(%RSP),%R12 |
(271) 0x43ec39 VADDPD (%R8,%RSI,8),%YMM9,%YMM7 |
(271) 0x43ec3f VFMADD132PD %YMM10,%YMM13,%YMM0 |
(271) 0x43ec44 VADDPD %YMM15,%YMM15,%YMM10 |
(271) 0x43ec49 VDIVPD (%R15),%YMM7,%YMM13 |
(271) 0x43ec4e VFMADD132PD %YMM14,%YMM12,%YMM10 |
(271) 0x43ec53 VSUBPD %YMM10,%YMM0,%YMM14 |
(271) 0x43ec58 VMOVUPD (%RAX,%R13,8),%YMM0 |
(271) 0x43ec5e MOV 0x128(%RSP),%R13 |
(271) 0x43ec66 VDIVPD %YMM0,%YMM6,%YMM15 |
(271) 0x43ec6a VMULPD %YMM1,%YMM14,%YMM12 |
(271) 0x43ec6e VADDPD %YMM0,%YMM12,%YMM8 |
(271) 0x43ec72 VDIVPD %YMM8,%YMM0,%YMM11 |
(271) 0x43ec77 VMULPD %YMM15,%YMM13,%YMM10 |
(271) 0x43ec7c VFNMADD213PD (%R13,%RBX,8),%YMM10,%YMM12 |
(271) 0x43ec83 MOV 0x198(%RSP),%RBX |
(271) 0x43ec8b VMOVUPD %YMM12,(%R12,%R11,8) |
(271) 0x43ec91 MOV 0x188(%RSP),%R11D |
(271) 0x43ec99 VMULPD (%R15),%YMM11,%YMM14 |
(271) 0x43ec9e MOV 0x108(%RSP),%R15 |
(271) 0x43eca6 VMOVUPD %YMM14,(%R15,%RBX,8) |
(271) 0x43ecac TEST $0x3,%R11B |
(271) 0x43ecb0 JE 43eefe |
(271) 0x43ecb6 AND $-0x4,%R11D |
(271) 0x43ecba ADD %R11D,0x1b8(%RSP) |
(271) 0x43ecc2 ADD %R11D,0x190(%RSP) |
(271) 0x43ecca MOV 0x1a8(%RSP),%RDX |
(271) 0x43ecd2 MOVSXD 0x190(%RSP),%R9 |
(271) 0x43ecda MOV 0x90(%RSP),%R14 |
(271) 0x43ece2 MOV 0x88(%RSP),%RDI |
(271) 0x43ecea MOV 0x98(%RSP),%RAX |
(271) 0x43ecf2 MOV 0xb8(%RSP),%RCX |
(271) 0x43ecfa MOV %R9,0x198(%RSP) |
(271) 0x43ed02 MOV %R9,%R10 |
(271) 0x43ed05 MOV 0x1b0(%RSP),%R12 |
(271) 0x43ed0d MOV 0xc0(%RSP),%R13 |
(271) 0x43ed15 LEA (%R14,%RDI,8),%R9 |
(271) 0x43ed19 MOV 0xa0(%RSP),%RSI |
(271) 0x43ed21 MOV 0xa8(%RSP),%RBX |
(271) 0x43ed29 LEA (%RDX,%RAX,8),%R8 |
(271) 0x43ed2d LEA (%RDX,%RCX,8),%RDI |
(271) 0x43ed31 MOV 0xc8(%RSP),%R11 |
(271) 0x43ed39 MOV 0xd0(%RSP),%RDX |
(271) 0x43ed41 MOV 0xd8(%RSP),%RAX |
(271) 0x43ed49 LEA (%R13,%RSI,8),%R15 |
(271) 0x43ed4e LEA (%R12,%RBX,8),%RSI |
(271) 0x43ed52 MOV 0xe0(%RSP),%RBX |
(271) 0x43ed5a LEA (%R13,%R11,8),%R14 |
(271) 0x43ed5f LEA (%R12,%RDX,8),%RCX |
(271) 0x43ed63 MOV 0xb0(%RSP),%R13 |
(271) 0x43ed6b MOV 0xe8(%RSP),%R12 |
(271) 0x43ed73 MOV 0xf8(%RSP),%R11 |
(271) 0x43ed7b MOV 0xf0(%RSP),%RDX |
(271) 0x43ed83 LEA (%RAX,%R13,8),%R13 |
(271) 0x43ed87 LEA (%R12,%RBX,8),%R12 |
(271) 0x43ed8b MOV 0x138(%RSP),%RAX |
(271) 0x43ed93 MOV 0x100(%RSP),%RBX |
(271) 0x43ed9b LEA (%R11,%RDX,8),%RDX |
(271) 0x43ed9f LEA (%RAX,%RBX,8),%R11 |
(271) 0x43eda3 MOV 0x128(%RSP),%RAX |
(271) 0x43edab MOV 0x130(%RSP),%RBX |
(271) 0x43edb3 MOV %R11,0x1b0(%RSP) |
(271) 0x43edbb LEA (%RAX,%RBX,8),%R11 |
(271) 0x43edbf MOV 0x118(%RSP),%RAX |
(271) 0x43edc7 MOV 0x120(%RSP),%RBX |
(271) 0x43edcf MOV %R11,0x1a8(%RSP) |
(271) 0x43edd7 LEA (%RAX,%RBX,8),%R11 |
(271) 0x43eddb MOV 0x108(%RSP),%RAX |
(271) 0x43ede3 MOV 0x110(%RSP),%RBX |
(271) 0x43edeb LEA (%RAX,%RBX,8),%RBX |
(271) 0x43edef MOV 0x1b8(%RSP),%EAX |
(271) 0x43edf6 SUB %R10D,%EAX |
(271) 0x43edf9 MOV %EAX,0x1a0(%RSP) |
(271) 0x43ee00 MOV 0x198(%RSP),%RAX |
(271) 0x43ee08 MOV %R11,0x198(%RSP) |
(272) 0x43ee10 VMOVSD (%RSI,%RAX,8),%XMM0 |
(272) 0x43ee15 VMOVSD (%R15,%RAX,8),%XMM9 |
(272) 0x43ee1b VMOVSD 0x8(%R9,%RAX,8),%XMM15 |
(272) 0x43ee22 VMOVSD (%R8,%RAX,8),%XMM12 |
(272) 0x43ee28 VADDSD 0x8(%RSI,%RAX,8),%XMM0,%XMM8 |
(272) 0x43ee2e VADDSD %XMM9,%XMM9,%XMM7 |
(272) 0x43ee33 VMOVSD (%RCX,%RAX,8),%XMM0 |
(272) 0x43ee38 VMOVSD (%R14,%RAX,8),%XMM9 |
(272) 0x43ee3e VMOVSD 0x8(%R8,%RAX,8),%XMM14 |
(272) 0x43ee45 VADDSD %XMM15,%XMM15,%XMM10 |
(272) 0x43ee4a VADDSD (%RDI,%RAX,8),%XMM12,%XMM11 |
(272) 0x43ee4f MOV 0x1b0(%RSP),%R10 |
(272) 0x43ee57 VMULSD %XMM7,%XMM8,%XMM13 |
(272) 0x43ee5b VADDSD 0x8(%RCX,%RAX,8),%XMM0,%XMM8 |
(272) 0x43ee61 VADDSD %XMM9,%XMM9,%XMM7 |
(272) 0x43ee66 MOV 0x1a8(%RSP),%R11 |
(272) 0x43ee6e VADDSD 0x8(%RDI,%RAX,8),%XMM14,%XMM12 |
(272) 0x43ee74 VMOVSD (%R9,%RAX,8),%XMM14 |
(272) 0x43ee7a VMULSD %XMM7,%XMM8,%XMM15 |
(272) 0x43ee7e VMOVSD (%R12,%RAX,8),%XMM8 |
(272) 0x43ee84 VADDSD (%R10,%RAX,8),%XMM8,%XMM7 |
(272) 0x43ee8a MOV 0x198(%RSP),%R10 |
(272) 0x43ee92 VFMADD132SD %XMM12,%XMM15,%XMM10 |
(272) 0x43ee97 VADDSD %XMM14,%XMM14,%XMM12 |
(272) 0x43ee9c VDIVSD (%RDX,%RAX,8),%XMM7,%XMM15 |
(272) 0x43eea1 VFMADD132SD %XMM11,%XMM13,%XMM12 |
(272) 0x43eea6 VSUBSD %XMM12,%XMM10,%XMM11 |
(272) 0x43eeab VMOVSD (%R13,%RAX,8),%XMM10 |
(272) 0x43eeb2 VDIVSD %XMM10,%XMM3,%XMM14 |
(272) 0x43eeb7 VMULSD %XMM2,%XMM11,%XMM13 |
(272) 0x43eebb VADDSD %XMM10,%XMM13,%XMM0 |
(272) 0x43eec0 VDIVSD %XMM0,%XMM10,%XMM9 |
(272) 0x43eec4 VMULSD %XMM14,%XMM15,%XMM12 |
(272) 0x43eec9 VFNMADD213SD (%R11,%RAX,8),%XMM12,%XMM13 |
(272) 0x43eecf MOV 0x1bc(%RSP),%R11D |
(272) 0x43eed7 VMOVSD %XMM13,(%R10,%RAX,8) |
(272) 0x43eedd MOV 0x1a0(%RSP),%R10D |
(272) 0x43eee5 VMULSD (%RDX,%RAX,8),%XMM9,%XMM11 |
(272) 0x43eeea VMOVSD %XMM11,(%RBX,%RAX,8) |
(272) 0x43eeef INC %RAX |
(272) 0x43eef2 ADD %EAX,%R10D |
(272) 0x43eef5 CMP %R11D,%R10D |
(272) 0x43eef8 JB 43ee10 |
(271) 0x43eefe MOV 0x1bc(%RSP),%R9D |
(271) 0x43ef06 MOV %R9D,0x1b8(%RSP) |
(271) 0x43ef0e INCQ 0x140(%RSP) |
(271) 0x43ef16 MOV 0x140(%RSP),%R8 |
(271) 0x43ef1e ADD $0,%R8D |
(271) 0x43ef22 CMP %R8D,0x80(%RSP) |
(271) 0x43ef2a JLE 43ef58 |
(271) 0x43ef2c MOV 0x78(%RSP),%ECX |
(271) 0x43ef30 MOV 0x1b8(%RSP),%EDI |
(271) 0x43ef37 MOV 0x7c(%RSP),%R15D |
(271) 0x43ef3c MOV 0x84(%RSP),%EAX |
(271) 0x43ef43 SUB %EDI,%ECX |
(271) 0x43ef45 MOV %R15D,0x148(%RSP) |
(271) 0x43ef4d JMP 43e5a0 |
0x43ef52 NOPW (%RAX,%RAX,1) |
0x43ef58 VZEROUPPER |
0x43ef5b LEA -0x28(%RBP),%RSP |
0x43ef5f POP %RBX |
0x43ef60 POP %R12 |
0x43ef62 POP %R13 |
0x43ef64 POP %R14 |
0x43ef66 POP %R15 |
0x43ef68 POP %RBP |
0x43ef69 RET |
0x43ef6a NOPW (%RAX,%RAX,1) |
(271) 0x43ef70 MOV 0x148(%RSP),%R13D |
(271) 0x43ef78 XOR %EDX,%EDX |
(271) 0x43ef7a MOV %R13D,0x190(%RSP) |
(271) 0x43ef82 JMP 43ea59 |
0x43ef87 INC %ECX |
0x43ef89 XOR %EDX,%EDX |
0x43ef8b JMP 43e4c1 |
Path / |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDI),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43ef87 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20f2f(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2007b(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43e4c1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDI),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43ef87 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43ef5b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20f2f(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2007b(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %EDI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43e4c1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼PdV_kernel(bool, int, int, int, int, double, clover::Buffer2D | 5.34 | 4.02 |
▼Loop 271 - PdV.cpp:50-63 - exec– | 0 | 0 |
○Loop 273 - PdV.cpp:51-63 - exec | 5.34 | 4.02 |
○Loop 272 - PdV.cpp:55-63 - exec | 0 | 0 |