Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 2.57% |
---|
Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 2.57% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 66 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
65: } |
66: } |
0x4504e0 PUSH %RBP |
0x4504e1 MOV %RSP,%RBP |
0x4504e4 PUSH %R15 |
0x4504e6 PUSH %R14 |
0x4504e8 PUSH %R13 |
0x4504ea PUSH %R12 |
0x4504ec PUSH %RBX |
0x4504ed AND $-0x40,%RSP |
0x4504f1 SUB $0x3c0,%RSP |
0x4504f8 MOV %R8,0x50(%RSP) |
0x4504fd MOV 0x48(%RBP),%RAX |
0x450501 MOV 0x38(%RBP),%R15 |
0x450505 MOV 0x30(%RBP),%R8 |
0x450509 MOV 0x28(%RBP),%RSI |
0x45050d MOV 0x20(%RBP),%R14 |
0x450511 MOV 0x18(%RBP),%R12 |
0x450515 MOV 0x10(%RBP),%RBX |
0x450519 MOVL $0,0x44(%RSP) |
0x450521 TEST %RAX,%RAX |
0x450524 JS 450b7f |
0x45052a MOV %R8,0x48(%RSP) |
0x45052f MOV %RSI,0xc0(%RSP) |
0x450537 MOV %RDX,%R13 |
0x45053a MOV %RCX,0x100(%RSP) |
0x450542 MOV %R9,0x38(%RSP) |
0x450547 MOV (%RDI),%ESI |
0x450549 MOVQ $0,0x60(%RSP) |
0x450552 MOV %RAX,0x58(%RSP) |
0x450557 MOVQ $0x1,0x78(%RSP) |
0x450560 SUB $0x8,%RSP |
0x450564 LEA 0x80(%RSP),%RAX |
0x45056c LEA 0x4c(%RSP),%RCX |
0x450571 LEA 0x68(%RSP),%R8 |
0x450576 LEA 0x60(%RSP),%R9 |
0x45057b MOV $0x6845d0,%EDI |
0x450580 MOV %ESI,0x48(%RSP) |
0x450584 MOV $0x22,%EDX |
0x450589 PUSH $0x1 |
0x45058b PUSH $0x1 |
0x45058d PUSH %RAX |
0x45058e CALL 403180 <__kmpc_for_static_init_8@plt> |
0x450593 ADD $0x20,%RSP |
0x450597 MOV 0x60(%RSP),%R9 |
0x45059c MOV 0x58(%RSP),%R10 |
0x4505a1 CMP %R10,%R9 |
0x4505a4 JA 450b9d |
0x4505aa LEA 0x1(%R9),%RAX |
0x4505ae INC %R10 |
0x4505b1 CMP %R10,%RAX |
0x4505b4 CMOVG %RAX,%R10 |
0x4505b8 MOV $-0x1,%R11D |
0x4505be MOV 0x48(%RSP),%R8 |
0x4505c3 SUB %R8D,%R15D |
0x4505c6 VPBROADCASTQ %R15,%ZMM16 |
0x4505cc MOV (%R12),%RAX |
0x4505d0 MOV 0x10(%R12),%RSI |
0x4505d5 VPBROADCASTQ %RAX,%ZMM20 |
0x4505db MOV (%R14),%RCX |
0x4505de MOV 0x10(%R14),%RDI |
0x4505e2 MOV 0x8(%R13),%R15 |
0x4505e6 MOV 0x100(%RSP),%RAX |
0x4505ee MOV 0x8(%RAX),%R12 |
0x4505f2 VPBROADCASTQ %RCX,%ZMM0 |
0x4505f8 VMOVDQU64 %ZMM0,0x2c0(%RSP) |
0x450600 MOV 0x38(%RSP),%RAX |
0x450605 MOV (%RAX),%RCX |
0x450608 MOV 0x10(%RAX),%R13 |
0x45060c VPBROADCASTQ %RCX,%ZMM22 |
0x450612 MOV (%RBX),%RAX |
0x450615 MOV 0x10(%RBX),%RCX |
0x450619 SUB %R9,%R10 |
0x45061c VPBROADCASTQ %RAX,%ZMM0 |
0x450622 VMOVDQU64 %ZMM0,0x100(%RSP) |
0x45062a MOV $-0x8,%R14D |
0x450630 MOV 0xc0(%RSP),%RDX |
0x450638 VPBROADCASTD %EDX,%YMM0 |
0x45063e VMOVDQU %YMM0,0xa0(%RSP) |
0x450647 VPBROADCASTD %R8D,%YMM0 |
0x45064d VMOVDQU %YMM0,0x80(%RSP) |
0x450656 MOV %RCX,0x38(%RSP) |
0x45065b VPBROADCASTQ %RCX,%ZMM0 |
0x450661 VMOVDQU64 %ZMM0,0x280(%RSP) |
0x450669 AND %R10,%R14 |
0x45066c VMOVDQU64 %ZMM16,0x340(%RSP) |
0x450674 JE 450bbc |
0x45067a MOV %R10,0x68(%RSP) |
0x45067f MOV %R9,0x70(%RSP) |
0x450684 VPBROADCASTQ %R9,%ZMM0 |
0x45068a VMOVDQU64 0x25b2c(%RIP),%ZMM27 |
0x450694 VMOVDQU64 %ZMM0,0x240(%RSP) |
0x45069c VPADDQ %ZMM27,%ZMM0,%ZMM18 |
0x4506a2 LEA 0x1(%R8),%RAX |
0x4506a6 VPBROADCASTQ %RAX,%ZMM0 |
0x4506ac VMOVDQU64 %ZMM0,0x200(%RSP) |
0x4506b4 LEA 0x1(%RDX),%RAX |
0x4506b8 VPBROADCASTQ %RAX,%ZMM0 |
0x4506be VMOVDQU64 %ZMM0,0x1c0(%RSP) |
0x4506c6 LEA -0x1(%R8),%EAX |
0x4506ca VPBROADCASTD %EAX,%YMM0 |
0x4506d0 VMOVDQU %YMM0,0x180(%RSP) |
0x4506d9 LEA (%RDX,%R11,1),%RAX |
0x4506dd VPBROADCASTQ %RAX,%ZMM0 |
0x4506e3 VMOVDQU64 %ZMM0,0x140(%RSP) |
0x4506eb XOR %EBX,%EBX |
0x4506ed VBROADCASTSD 0x12f51(%RIP),%ZMM31 |
0x4506f7 VBROADCASTSD 0x11bc7(%RIP),%ZMM24 |
0x450701 VBROADCASTSD 0x1225d(%RIP),%ZMM26 |
0x45070b JMP 450733 |
0x45070d NOPL (%RAX) |
(665) 0x450710 VPBROADCASTQ 0x12f3e(%RIP),%ZMM0 |
(665) 0x45071a VPADDQ %ZMM0,%ZMM17,%ZMM18 |
(665) 0x450720 VPADDQ %ZMM0,%ZMM27,%ZMM27 |
(665) 0x450726 ADD $0x8,%RBX |
(665) 0x45072a CMP %R14,%RBX |
(665) 0x45072d JAE 450b8e |
(665) 0x450733 VPADDQ 0x240(%RSP),%ZMM27,%ZMM17 |
(665) 0x45073b VMOVDQA64 %ZMM17,%ZMM0 |
(665) 0x450741 VMOVDQA64 %ZMM16,%ZMM1 |
(665) 0x450747 MOV $0x4513a0,%RAX |
(665) 0x45074e CALL %RAX |
(665) 0x450750 VPMOVQD %ZMM0,%YMM28 |
(665) 0x450756 VPADDQ 0x200(%RSP),%ZMM0,%ZMM0 |
(665) 0x45075e VPSLLQ $0x20,%ZMM0,%ZMM0 |
(665) 0x450765 VPSRAQ $0x20,%ZMM0,%ZMM19 |
(665) 0x45076c VMOVDQA64 %ZMM18,%ZMM0 |
(665) 0x450772 VMOVDQA64 %ZMM16,%ZMM1 |
(665) 0x450778 CALL 451520 <__svml_u64div8_z0> |
(665) 0x45077e VPMOVQD %ZMM0,%YMM1 |
(665) 0x450784 VPADDD 0xa0(%RSP),%YMM1,%YMM5 |
(665) 0x45078d VPMOVSXDQ %YMM5,%ZMM1 |
(665) 0x450793 VPXOR %XMM3,%XMM3,%XMM3 |
(665) 0x450797 VPMULLQ %ZMM1,%ZMM20,%ZMM3 |
(665) 0x45079d VPADDQ %ZMM3,%ZMM19,%ZMM2 |
(665) 0x4507a3 VXORPD %XMM7,%XMM7,%XMM7 |
(665) 0x4507a7 KXNORW %K0,%K0,%K1 |
(665) 0x4507ab VGATHERQPD (%RSI,%ZMM2,8),%ZMM7{%K1} |
(665) 0x4507b2 VPADDQ 0x1c0(%RSP),%ZMM0,%ZMM2 |
(665) 0x4507ba VPSLLQ $0x20,%ZMM2,%ZMM2 |
(665) 0x4507c1 VPSRAQ $0x20,%ZMM2,%ZMM6 |
(665) 0x4507c8 VPXOR %XMM4,%XMM4,%XMM4 |
(665) 0x4507cc VPMULLQ %ZMM6,%ZMM20,%ZMM4 |
(665) 0x4507d2 VPADDQ %ZMM4,%ZMM19,%ZMM2 |
(665) 0x4507d8 VXORPD %XMM8,%XMM8,%XMM8 |
(665) 0x4507dd KXNORW %K0,%K0,%K1 |
(665) 0x4507e1 VGATHERQPD (%RSI,%ZMM2,8),%ZMM8{%K1} |
(665) 0x4507e8 VPADDD 0x80(%RSP),%YMM28,%YMM10 |
(665) 0x4507f0 VPMOVSXDQ %YMM10,%ZMM2 |
(665) 0x4507f6 VPADDQ %ZMM2,%ZMM3,%ZMM3 |
(665) 0x4507fc VXORPD %XMM9,%XMM9,%XMM9 |
(665) 0x450801 KXNORW %K0,%K0,%K1 |
(665) 0x450805 VGATHERQPD (%RSI,%ZMM3,8),%ZMM9{%K1} |
(665) 0x45080c VPADDQ %ZMM2,%ZMM4,%ZMM3 |
(665) 0x450812 VXORPD %XMM12,%XMM12,%XMM12 |
(665) 0x450817 KXNORW %K0,%K0,%K1 |
(665) 0x45081b VGATHERQPD (%RSI,%ZMM3,8),%ZMM12{%K1} |
(665) 0x450822 VMOVDQU64 0x2c0(%RSP),%ZMM13 |
(665) 0x45082a VPXOR %XMM3,%XMM3,%XMM3 |
(665) 0x45082e VPMULLQ %ZMM6,%ZMM13,%ZMM3 |
(665) 0x450834 VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(665) 0x45083a VXORPD %XMM11,%XMM11,%XMM11 |
(665) 0x45083f KXNORW %K0,%K0,%K1 |
(665) 0x450843 VGATHERQPD (%RDI,%ZMM4,8),%ZMM11{%K1} |
(665) 0x45084a VPADDQ %ZMM3,%ZMM19,%ZMM3 |
(665) 0x450850 VXORPD %XMM14,%XMM14,%XMM14 |
(665) 0x450855 KXNORW %K0,%K0,%K1 |
(665) 0x450859 VGATHERQPD (%RDI,%ZMM3,8),%ZMM14{%K1} |
(665) 0x450860 VPXOR %XMM3,%XMM3,%XMM3 |
(665) 0x450864 VPMULLQ %ZMM1,%ZMM13,%ZMM3 |
(665) 0x45086a VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(665) 0x450870 VXORPD %XMM15,%XMM15,%XMM15 |
(665) 0x450875 KXNORW %K0,%K0,%K1 |
(665) 0x450879 VGATHERQPD (%RDI,%ZMM4,8),%ZMM15{%K1} |
(665) 0x450880 VPADDQ %ZMM3,%ZMM19,%ZMM3 |
(665) 0x450886 VPXORD %XMM18,%XMM18,%XMM18 |
(665) 0x45088c KXNORW %K0,%K0,%K1 |
(665) 0x450890 VGATHERQPD (%RDI,%ZMM3,8),%ZMM18{%K1} |
(665) 0x450897 VXORPD %XMM3,%XMM3,%XMM3 |
(665) 0x45089b KXNORW %K0,%K0,%K1 |
(665) 0x45089f VGATHERDPD (%R15,%YMM10,8),%ZMM3{%K1} |
(665) 0x4508a6 VXORPD %XMM4,%XMM4,%XMM4 |
(665) 0x4508aa KXNORW %K0,%K0,%K1 |
(665) 0x4508ae VGATHERDPD (%R12,%YMM5,8),%ZMM4{%K1} |
(665) 0x4508b5 VBROADCASTSD 0x12091(%RIP),%ZMM21 |
(665) 0x4508bf VDIVPD %ZMM3,%ZMM21,%ZMM13 |
(665) 0x4508c5 VADDPD %ZMM15,%ZMM11,%ZMM29 |
(665) 0x4508cb VADDPD %ZMM14,%ZMM18,%ZMM30 |
(665) 0x4508d1 VPMULLQ %ZMM1,%ZMM22,%ZMM23 |
(665) 0x4508d7 VPADDQ %ZMM23,%ZMM19,%ZMM19 |
(665) 0x4508dd VXORPD %XMM25,%XMM25,%XMM25 |
(665) 0x4508e3 KXNORW %K0,%K0,%K1 |
(665) 0x4508e7 VGATHERQPD (%R13,%ZMM19,8),%ZMM25{%K1} |
(665) 0x4508ef VSUBPD %ZMM29,%ZMM30,%ZMM19 |
(665) 0x4508f5 VMULPD %ZMM13,%ZMM31,%ZMM29 |
(665) 0x4508fb VPADDD 0x180(%RSP),%YMM28,%YMM28 |
(665) 0x450903 VPMOVSXDQ %YMM28,%ZMM28 |
(665) 0x450909 VPADDQ %ZMM28,%ZMM23,%ZMM23 |
(665) 0x45090f VPXORD %XMM28,%XMM28,%XMM28 |
(665) 0x450915 KXNORW %K0,%K0,%K1 |
(665) 0x450919 VGATHERQPD (%R13,%ZMM23,8),%ZMM28{%K1} |
(665) 0x450921 VMULPD %ZMM29,%ZMM19,%ZMM19 |
(665) 0x450927 VDIVPD %ZMM4,%ZMM21,%ZMM23 |
(665) 0x45092d VADDPD %ZMM9,%ZMM7,%ZMM29 |
(665) 0x450933 VADDPD %ZMM8,%ZMM12,%ZMM30 |
(665) 0x450939 VSUBPD %ZMM29,%ZMM30,%ZMM29 |
(665) 0x45093f VMULPD %ZMM31,%ZMM23,%ZMM30 |
(665) 0x450945 VFMADD213PD %ZMM19,%ZMM29,%ZMM30 |
(665) 0x45094b VADDPD %ZMM7,%ZMM8,%ZMM7 |
(665) 0x450951 VPCMPEQD %YMM8,%YMM8,%YMM8 |
(665) 0x450956 VPSUBD %YMM8,%YMM10,%YMM8 |
(665) 0x45095b VXORPD %XMM10,%XMM10,%XMM10 |
(665) 0x450960 KXNORW %K0,%K0,%K1 |
(665) 0x450964 VGATHERDPD (%R15,%YMM8,8),%ZMM10{%K1} |
(665) 0x45096b VADDPD %ZMM12,%ZMM9,%ZMM8 |
(665) 0x450971 VPMULLQ %ZMM6,%ZMM22,%ZMM6 |
(665) 0x450977 VPADDQ %ZMM2,%ZMM6,%ZMM6 |
(665) 0x45097d VXORPD %XMM9,%XMM9,%XMM9 |
(665) 0x450982 KXNORW %K0,%K0,%K1 |
(665) 0x450986 VGATHERQPD (%R13,%ZMM6,8),%ZMM9{%K1} |
(665) 0x45098e VSUBPD %ZMM8,%ZMM7,%ZMM7 |
(665) 0x450994 VADDPD %ZMM11,%ZMM14,%ZMM8 |
(665) 0x45099a VADDPD %ZMM18,%ZMM15,%ZMM11 |
(665) 0x4509a0 VSUBPD %ZMM28,%ZMM25,%ZMM6 |
(665) 0x4509a6 VPADDQ 0x140(%RSP),%ZMM0,%ZMM0 |
(665) 0x4509ae VPSLLQ $0x20,%ZMM0,%ZMM0 |
(665) 0x4509b5 VPSRAQ $0x20,%ZMM0,%ZMM0 |
(665) 0x4509bc VPMULLQ %ZMM0,%ZMM22,%ZMM0 |
(665) 0x4509c2 VPADDQ %ZMM2,%ZMM0,%ZMM0 |
(665) 0x4509c8 VXORPD %XMM12,%XMM12,%XMM12 |
(665) 0x4509cd KXNORW %K0,%K0,%K1 |
(665) 0x4509d1 VGATHERQPD (%R13,%ZMM0,8),%ZMM12{%K1} |
(665) 0x4509d9 VADDPD %ZMM3,%ZMM10,%ZMM0 |
(665) 0x4509df VDIVPD %ZMM0,%ZMM6,%ZMM6 |
(665) 0x4509e5 VPADDD 0x15291(%RIP){1to8},%YMM5,%YMM0 |
(665) 0x4509ef VXORPD %XMM5,%XMM5,%XMM5 |
(665) 0x4509f3 KXNORW %K0,%K0,%K1 |
(665) 0x4509f7 VGATHERDPD (%R12,%YMM0,8),%ZMM5{%K1} |
(665) 0x4509fe VSUBPD %ZMM11,%ZMM8,%ZMM0 |
(665) 0x450a04 VSUBPD %ZMM12,%ZMM9,%ZMM8 |
(665) 0x450a0a VADDPD %ZMM4,%ZMM5,%ZMM5 |
(665) 0x450a10 VDIVPD %ZMM5,%ZMM8,%ZMM5 |
(665) 0x450a16 VMULPD %ZMM30,%ZMM5,%ZMM8 |
(665) 0x450a1c VMULPD %ZMM31,%ZMM7,%ZMM9 |
(665) 0x450a22 VMULPD %ZMM13,%ZMM6,%ZMM10 |
(665) 0x450a28 VFMADD213PD %ZMM8,%ZMM9,%ZMM10 |
(665) 0x450a2e VMULPD %ZMM5,%ZMM5,%ZMM8 |
(665) 0x450a34 VMULPD %ZMM6,%ZMM10,%ZMM9 |
(665) 0x450a3a VMULPD %ZMM31,%ZMM0,%ZMM10 |
(665) 0x450a40 VMULPD %ZMM8,%ZMM23,%ZMM11 |
(665) 0x450a46 VFMADD213PD %ZMM9,%ZMM10,%ZMM11 |
(665) 0x450a4c VMULPD %ZMM7,%ZMM3,%ZMM7 |
(665) 0x450a52 VFMADD231PD %ZMM0,%ZMM4,%ZMM7 |
(665) 0x450a58 VFMADD231PD %ZMM6,%ZMM6,%ZMM8 |
(665) 0x450a5e VMAXPD %ZMM24,%ZMM8,%ZMM0 |
(665) 0x450a64 VDIVPD %ZMM0,%ZMM11,%ZMM0 |
(665) 0x450a6a VFPCLASSPD $0x56,%ZMM0,%K1 |
(665) 0x450a71 VFPCLASSPD $0x50,%ZMM7,%K1{%K1} |
(665) 0x450a78 KNOTB %K1,%K2 |
(665) 0x450a7c VMOVDQU64 0x100(%RSP),%ZMM7 |
(665) 0x450a84 VPMULLQ %ZMM1,%ZMM7,%ZMM7 |
(665) 0x450a8a VPADDQ %ZMM2,%ZMM7,%ZMM7 |
(665) 0x450a90 MOV 0x38(%RSP),%RAX |
(665) 0x450a95 VXORPD %XMM8,%XMM8,%XMM8 |
(665) 0x450a9a VSCATTERQPD %ZMM8,(%RAX,%ZMM7,8){%K2} |
(665) 0x450aa1 KORTESTB %K1,%K1 |
(665) 0x450aa5 JE 450710 |
(665) 0x450aab VANDPD %ZMM26,%ZMM6,%ZMM8 |
(665) 0x450ab1 VMAXPD %ZMM24,%ZMM8,%ZMM8 |
(665) 0x450ab7 VFPCLASSPD $0x50,%ZMM6,%K2 |
(665) 0x450abe VBROADCASTSD 0x13bf8(%RIP),%ZMM6 |
(665) 0x450ac8 VXORPD %ZMM6,%ZMM8,%ZMM8{%K2} |
(665) 0x450ace VANDPD %ZMM26,%ZMM5,%ZMM5 |
(665) 0x450ad4 VMAXPD %ZMM24,%ZMM5,%ZMM5 |
(665) 0x450ada VFPCLASSPD $0x50,%ZMM8,%K2 |
(665) 0x450ae1 VXORPD %ZMM6,%ZMM5,%ZMM5{%K2} |
(665) 0x450ae7 VMULPD %ZMM8,%ZMM8,%ZMM6 |
(665) 0x450aed VFMADD231PD %ZMM5,%ZMM5,%ZMM6 |
(665) 0x450af3 VSQRTPD %ZMM6,%ZMM6 |
(665) 0x450af9 VPSLLQ $0x3,%ZMM7,%ZMM7 |
(665) 0x450b00 VPADDQ 0x280(%RSP),%ZMM7,%ZMM7 |
(665) 0x450b08 VMULPD %ZMM3,%ZMM6,%ZMM3 |
(665) 0x450b0e VDIVPD %ZMM8,%ZMM3,%ZMM3 |
(665) 0x450b14 VANDPD %ZMM26,%ZMM3,%ZMM3 |
(665) 0x450b1a VMULPD %ZMM4,%ZMM6,%ZMM4 |
(665) 0x450b20 VDIVPD %ZMM5,%ZMM4,%ZMM4 |
(665) 0x450b26 VANDPD %ZMM26,%ZMM4,%ZMM4 |
(665) 0x450b2c MOV 0x50(%RSP),%RCX |
(665) 0x450b31 MOV 0x10(%RCX),%RAX |
(665) 0x450b35 VPMULLQ (%RCX){1to8},%ZMM1,%ZMM1 |
(665) 0x450b3b VPADDQ %ZMM2,%ZMM1,%ZMM1 |
(665) 0x450b41 VPXOR %XMM2,%XMM2,%XMM2 |
(665) 0x450b45 KMOVQ %K1,%K2 |
(665) 0x450b4a VGATHERQPD (%RAX,%ZMM1,8),%ZMM2{%K2} |
(665) 0x450b51 VMINPD %ZMM4,%ZMM3,%ZMM1 |
(665) 0x450b57 VMULPD %ZMM0,%ZMM1,%ZMM0 |
(665) 0x450b5d VMULPD %ZMM0,%ZMM0,%ZMM0 |
(665) 0x450b63 VADDPD %ZMM2,%ZMM2,%ZMM1 |
(665) 0x450b69 VMULPD %ZMM1,%ZMM0,%ZMM0 |
(665) 0x450b6f VSCATTERQPD %ZMM0,(,%ZMM7,1){%K1} |
(665) 0x450b7a JMP 450710 |
0x450b7f LEA -0x28(%RBP),%RSP |
0x450b83 POP %RBX |
0x450b84 POP %R12 |
0x450b86 POP %R13 |
0x450b88 POP %R14 |
0x450b8a POP %R15 |
0x450b8c POP %RBP |
0x450b8d RET |
0x450b8e MOV 0x68(%RSP),%R10 |
0x450b93 CMP %R14,%R10 |
0x450b96 MOV 0x70(%RSP),%R9 |
0x450b9b JNE 450bc4 |
0x450b9d MOV $0x6845f0,%EDI |
0x450ba2 MOV 0x40(%RSP),%ESI |
0x450ba6 LEA -0x28(%RBP),%RSP |
0x450baa POP %RBX |
0x450bab POP %R12 |
0x450bad POP %R13 |
0x450baf POP %R14 |
0x450bb1 POP %R15 |
0x450bb3 POP %RBP |
0x450bb4 VZEROUPPER |
0x450bb7 JMP 402fe0 |
0x450bbc XOR %R14D,%R14D |
0x450bbf MOV %R9,%RAX |
0x450bc2 JMP 450bdb |
0x450bc4 LEA (%R9,%R14,1),%RAX |
0x450bc8 MOV 0xc0(%RSP),%RDX |
0x450bd0 MOV 0x48(%RSP),%R8 |
0x450bd5 MOV $-0x1,%R11D |
0x450bdb VPBROADCASTQ %RAX,%ZMM0 |
0x450be1 VMOVDQU64 0x255d5(%RIP),%ZMM27 |
0x450beb VPADDQ %ZMM27,%ZMM0,%ZMM17 |
0x450bf1 SUB %R14,%R10 |
0x450bf4 VPBROADCASTQ %R10,%ZMM28 |
0x450bfa ADD %R14,%R9 |
0x450bfd VPBROADCASTQ %R9,%ZMM0 |
0x450c03 VMOVDQU64 %ZMM0,0x200(%RSP) |
0x450c0b MOV %R8,%RAX |
0x450c0e INC %RAX |
0x450c11 VPBROADCASTQ %RAX,%ZMM0 |
0x450c17 VMOVDQU64 %ZMM0,0x1c0(%RSP) |
0x450c1f LEA 0x1(%RDX),%RAX |
0x450c23 VPBROADCASTQ %RAX,%ZMM0 |
0x450c29 VMOVDQU64 %ZMM0,0x180(%RSP) |
0x450c31 LEA -0x1(%R8),%EAX |
0x450c35 VPBROADCASTD %EAX,%YMM0 |
0x450c3b VMOVDQU %YMM0,0x140(%RSP) |
0x450c44 ADD %RDX,%R11 |
0x450c47 VPBROADCASTQ %R11,%ZMM0 |
0x450c4d VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x450c55 VPBROADCASTQ 0x129f9(%RIP),%ZMM16 |
0x450c5f MOV $0x4513a0,%RBX |
0x450c66 MOV $0x451520,%R14 |
0x450c6d VBROADCASTSD 0x11651(%RIP),%ZMM21 |
0x450c77 VMOVDQU64 %ZMM20,0x300(%RSP) |
0x450c7f JMP 450cbb |
0x450c81 NOPW %CS:(%RAX,%RAX,1) |
(664) 0x450c90 VPADDQ %ZMM16,%ZMM18,%ZMM0 |
(664) 0x450c96 VMOVDQU64 0x300(%RSP),%ZMM20 |
(664) 0x450c9e VMOVDQA64 %ZMM0,%ZMM17{%K3} |
(664) 0x450ca4 VPADDQ %ZMM16,%ZMM27,%ZMM27 |
(664) 0x450caa VPCMPLTUQ %ZMM28,%ZMM27,%K0 |
(664) 0x450cb1 KORTESTB %K0,%K0 |
(664) 0x450cb5 JE 450b9d |
(664) 0x450cbb VPCMPLTUQ %ZMM28,%ZMM27,%K3 |
(664) 0x450cc2 KORTESTB %K3,%K3 |
(664) 0x450cc6 VPXOR %XMM0,%XMM0,%XMM0 |
(664) 0x450cca JE 450c9e |
(664) 0x450ccc VPADDQ 0x200(%RSP),%ZMM27,%ZMM18 |
(664) 0x450cd4 VPBROADCASTQ 0x123e2(%RIP),%ZMM0 |
(664) 0x450cde VPBLENDMQ 0x340(%RSP),%ZMM0,%ZMM19{%K3} |
(664) 0x450ce6 VMOVDQA64 %ZMM18,%ZMM0 |
(664) 0x450cec VMOVDQA64 %ZMM19,%ZMM1 |
(664) 0x450cf2 KMOVW %K3,0x240(%RSP) |
(664) 0x450cfb CALL %RBX |
(664) 0x450cfd VPMOVQD %ZMM0,%YMM23 |
(664) 0x450d03 VPADDQ 0x1c0(%RSP),%ZMM0,%ZMM0 |
(664) 0x450d0b VPSLLQ $0x20,%ZMM0,%ZMM0 |
(664) 0x450d12 VPSRAQ $0x20,%ZMM0,%ZMM24 |
(664) 0x450d19 VMOVDQA64 %ZMM17,%ZMM0 |
(664) 0x450d1f VMOVDQA64 %ZMM19,%ZMM1 |
(664) 0x450d25 CALL %R14 |
(664) 0x450d28 KMOVW 0x240(%RSP),%K3 |
(664) 0x450d31 VPMOVQD %ZMM0,%YMM1 |
(664) 0x450d37 VPADDD 0xa0(%RSP),%YMM1,%YMM5 |
(664) 0x450d40 VPMOVSXDQ %YMM5,%ZMM31 |
(664) 0x450d46 VPXOR %XMM3,%XMM3,%XMM3 |
(664) 0x450d4a VPMULLQ %ZMM31,%ZMM20,%ZMM3 |
(664) 0x450d50 VPADDQ %ZMM3,%ZMM24,%ZMM2 |
(664) 0x450d56 KMOVQ %K3,%K1 |
(664) 0x450d5b VXORPD %XMM6,%XMM6,%XMM6 |
(664) 0x450d5f VGATHERQPD (%RSI,%ZMM2,8),%ZMM6{%K1} |
(664) 0x450d66 VPADDQ 0x180(%RSP),%ZMM0,%ZMM2 |
(664) 0x450d6e VPSLLQ $0x20,%ZMM2,%ZMM2 |
(664) 0x450d75 VPSRAQ $0x20,%ZMM2,%ZMM8 |
(664) 0x450d7c VPXOR %XMM4,%XMM4,%XMM4 |
(664) 0x450d80 VPMULLQ %ZMM8,%ZMM20,%ZMM4 |
(664) 0x450d86 VPADDQ %ZMM4,%ZMM24,%ZMM2 |
(664) 0x450d8c KMOVQ %K3,%K1 |
(664) 0x450d91 VXORPD %XMM7,%XMM7,%XMM7 |
(664) 0x450d95 VGATHERQPD (%RSI,%ZMM2,8),%ZMM7{%K1} |
(664) 0x450d9c VPADDD 0x80(%RSP),%YMM23,%YMM11 |
(664) 0x450da4 VPMOVSXDQ %YMM11,%ZMM2 |
(664) 0x450daa VPADDQ %ZMM2,%ZMM3,%ZMM3 |
(664) 0x450db0 KMOVQ %K3,%K1 |
(664) 0x450db5 VXORPD %XMM9,%XMM9,%XMM9 |
(664) 0x450dba VGATHERQPD (%RSI,%ZMM3,8),%ZMM9{%K1} |
(664) 0x450dc1 VPADDQ %ZMM2,%ZMM4,%ZMM3 |
(664) 0x450dc7 KMOVQ %K3,%K1 |
(664) 0x450dcc VXORPD %XMM10,%XMM10,%XMM10 |
(664) 0x450dd1 VGATHERQPD (%RSI,%ZMM3,8),%ZMM10{%K1} |
(664) 0x450dd8 VMOVDQU64 0x2c0(%RSP),%ZMM1 |
(664) 0x450de0 VPXOR %XMM3,%XMM3,%XMM3 |
(664) 0x450de4 VPMULLQ %ZMM8,%ZMM1,%ZMM3 |
(664) 0x450dea VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(664) 0x450df0 KMOVQ %K3,%K1 |
(664) 0x450df5 VXORPD %XMM12,%XMM12,%XMM12 |
(664) 0x450dfa VGATHERQPD (%RDI,%ZMM4,8),%ZMM12{%K1} |
(664) 0x450e01 VPADDQ %ZMM3,%ZMM24,%ZMM3 |
(664) 0x450e07 KMOVQ %K3,%K1 |
(664) 0x450e0c VXORPD %XMM14,%XMM14,%XMM14 |
(664) 0x450e11 VGATHERQPD (%RDI,%ZMM3,8),%ZMM14{%K1} |
(664) 0x450e18 VPXOR %XMM3,%XMM3,%XMM3 |
(664) 0x450e1c VPMULLQ %ZMM31,%ZMM1,%ZMM3 |
(664) 0x450e22 VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(664) 0x450e28 KMOVQ %K3,%K1 |
(664) 0x450e2d VXORPD %XMM15,%XMM15,%XMM15 |
(664) 0x450e32 VGATHERQPD (%RDI,%ZMM4,8),%ZMM15{%K1} |
(664) 0x450e39 VPADDQ %ZMM3,%ZMM24,%ZMM3 |
(664) 0x450e3f KMOVQ %K3,%K1 |
(664) 0x450e44 VPXORD %XMM19,%XMM19,%XMM19 |
(664) 0x450e4a VGATHERQPD (%RDI,%ZMM3,8),%ZMM19{%K1} |
(664) 0x450e51 KMOVQ %K3,%K1 |
(664) 0x450e56 VXORPD %XMM3,%XMM3,%XMM3 |
(664) 0x450e5a VGATHERDPD (%R15,%YMM11,8),%ZMM3{%K1} |
(664) 0x450e61 KMOVQ %K3,%K1 |
(664) 0x450e66 VXORPD %XMM4,%XMM4,%XMM4 |
(664) 0x450e6a VGATHERDPD (%R12,%YMM5,8),%ZMM4{%K1} |
(664) 0x450e71 VBROADCASTSD 0x11ad5(%RIP),%ZMM30 |
(664) 0x450e7b VDIVPD %ZMM3,%ZMM30,%ZMM13 |
(664) 0x450e81 VADDPD %ZMM15,%ZMM12,%ZMM25 |
(664) 0x450e87 VADDPD %ZMM14,%ZMM19,%ZMM29 |
(664) 0x450e8d VSUBPD %ZMM25,%ZMM29,%ZMM25 |
(664) 0x450e93 VPXORD %XMM29,%XMM29,%XMM29 |
(664) 0x450e99 VPMULLQ %ZMM31,%ZMM22,%ZMM29 |
(664) 0x450e9f VPADDQ %ZMM29,%ZMM24,%ZMM24 |
(664) 0x450ea5 KMOVQ %K3,%K1 |
(664) 0x450eaa VXORPD %XMM26,%XMM26,%XMM26 |
(664) 0x450eb0 VGATHERQPD (%R13,%ZMM24,8),%ZMM26{%K1} |
(664) 0x450eb8 VDIVPD %ZMM4,%ZMM30,%ZMM24 |
(664) 0x450ebe VPADDD 0x140(%RSP),%YMM23,%YMM23 |
(664) 0x450ec6 VPMOVSXDQ %YMM23,%ZMM23 |
(664) 0x450ecc VPADDQ %ZMM23,%ZMM29,%ZMM23 |
(664) 0x450ed2 KMOVQ %K3,%K1 |
(664) 0x450ed7 VPXORD %XMM29,%XMM29,%XMM29 |
(664) 0x450edd VGATHERQPD (%R13,%ZMM23,8),%ZMM29{%K1} |
(664) 0x450ee5 VBROADCASTSD 0x12759(%RIP),%ZMM20 |
(664) 0x450eef VMULPD %ZMM13,%ZMM20,%ZMM23 |
(664) 0x450ef5 VMULPD %ZMM23,%ZMM25,%ZMM23 |
(664) 0x450efb VADDPD %ZMM9,%ZMM6,%ZMM25 |
(664) 0x450f01 VADDPD %ZMM7,%ZMM10,%ZMM30 |
(664) 0x450f07 VSUBPD %ZMM25,%ZMM30,%ZMM25 |
(664) 0x450f0d VPCMPEQD %YMM1,%YMM1,%YMM1 |
(664) 0x450f11 VPSUBD %YMM1,%YMM11,%YMM11 |
(664) 0x450f15 KMOVQ %K3,%K1 |
(664) 0x450f1a VXORPD %XMM30,%XMM30,%XMM30 |
(664) 0x450f20 VGATHERDPD (%R15,%YMM11,8),%ZMM30{%K1} |
(664) 0x450f27 VMULPD %ZMM20,%ZMM24,%ZMM11 |
(664) 0x450f2d VFMADD213PD %ZMM23,%ZMM25,%ZMM11 |
(664) 0x450f33 VPMULLQ %ZMM8,%ZMM22,%ZMM8 |
(664) 0x450f39 VPADDQ %ZMM2,%ZMM8,%ZMM8 |
(664) 0x450f3f KMOVQ %K3,%K1 |
(664) 0x450f44 VXORPD %XMM23,%XMM23,%XMM23 |
(664) 0x450f4a VGATHERQPD (%R13,%ZMM8,8),%ZMM23{%K1} |
(664) 0x450f52 VADDPD %ZMM6,%ZMM7,%ZMM6 |
(664) 0x450f58 VADDPD %ZMM10,%ZMM9,%ZMM7 |
(664) 0x450f5e VSUBPD %ZMM7,%ZMM6,%ZMM7 |
(664) 0x450f64 VADDPD %ZMM12,%ZMM14,%ZMM8 |
(664) 0x450f6a VADDPD %ZMM19,%ZMM15,%ZMM9 |
(664) 0x450f70 VPADDQ 0xc0(%RSP),%ZMM0,%ZMM0 |
(664) 0x450f78 VPSLLQ $0x20,%ZMM0,%ZMM0 |
(664) 0x450f7f VPSRAQ $0x20,%ZMM0,%ZMM0 |
(664) 0x450f86 VPMULLQ %ZMM0,%ZMM22,%ZMM0 |
(664) 0x450f8c VPADDQ %ZMM2,%ZMM0,%ZMM0 |
(664) 0x450f92 KMOVQ %K3,%K1 |
(664) 0x450f97 VXORPD %XMM10,%XMM10,%XMM10 |
(664) 0x450f9c VGATHERQPD (%R13,%ZMM0,8),%ZMM10{%K1} |
(664) 0x450fa4 VSUBPD %ZMM29,%ZMM26,%ZMM0 |
(664) 0x450faa VADDPD %ZMM3,%ZMM30,%ZMM6 |
(664) 0x450fb0 VDIVPD %ZMM6,%ZMM0,%ZMM6 |
(664) 0x450fb6 VPADDD 0x14cc0(%RIP){1to8},%YMM5,%YMM0 |
(664) 0x450fc0 KMOVQ %K3,%K1 |
(664) 0x450fc5 VXORPD %XMM5,%XMM5,%XMM5 |
(664) 0x450fc9 VGATHERDPD (%R12,%YMM0,8),%ZMM5{%K1} |
(664) 0x450fd0 VSUBPD %ZMM9,%ZMM8,%ZMM0 |
(664) 0x450fd6 VSUBPD %ZMM10,%ZMM23,%ZMM8 |
(664) 0x450fdc VADDPD %ZMM4,%ZMM5,%ZMM5 |
(664) 0x450fe2 VDIVPD %ZMM5,%ZMM8,%ZMM5 |
(664) 0x450fe8 VMULPD %ZMM11,%ZMM5,%ZMM8 |
(664) 0x450fee VMULPD %ZMM20,%ZMM7,%ZMM9 |
(664) 0x450ff4 VMULPD %ZMM13,%ZMM6,%ZMM10 |
(664) 0x450ffa VFMADD213PD %ZMM8,%ZMM9,%ZMM10 |
(664) 0x451000 VMULPD %ZMM5,%ZMM5,%ZMM8 |
(664) 0x451006 VMULPD %ZMM6,%ZMM10,%ZMM9 |
(664) 0x45100c VMULPD %ZMM20,%ZMM0,%ZMM10 |
(664) 0x451012 VMULPD %ZMM8,%ZMM24,%ZMM11 |
(664) 0x451018 VFMADD213PD %ZMM9,%ZMM10,%ZMM11 |
(664) 0x45101e VMULPD %ZMM7,%ZMM3,%ZMM7 |
(664) 0x451024 VFMADD231PD %ZMM0,%ZMM4,%ZMM7 |
(664) 0x45102a VFMADD231PD %ZMM6,%ZMM6,%ZMM8 |
(664) 0x451030 VMAXPD %ZMM21,%ZMM8,%ZMM0 |
(664) 0x451036 VDIVPD %ZMM0,%ZMM11,%ZMM0 |
(664) 0x45103c VFPCLASSPD $0x56,%ZMM0,%K1 |
(664) 0x451043 VFPCLASSPD $0x50,%ZMM7,%K0{%K1} |
(664) 0x45104a KANDNB %K3,%K0,%K1 |
(664) 0x45104e VMOVDQU64 0x100(%RSP),%ZMM7 |
(664) 0x451056 VPMULLQ %ZMM31,%ZMM7,%ZMM7 |
(664) 0x45105c VPADDQ %ZMM2,%ZMM7,%ZMM7 |
(664) 0x451062 MOV 0x38(%RSP),%RAX |
(664) 0x451067 VXORPD %XMM8,%XMM8,%XMM8 |
(664) 0x45106c VSCATTERQPD %ZMM8,(%RAX,%ZMM7,8){%K1} |
(664) 0x451073 KANDB %K0,%K3,%K1 |
(664) 0x451077 KORTESTB %K1,%K1 |
(664) 0x45107b JE 450c90 |
(664) 0x451081 VBROADCASTSD 0x118dd(%RIP),%ZMM1 |
(664) 0x45108b VANDPD %ZMM1,%ZMM6,%ZMM8 |
(664) 0x451091 VMAXPD %ZMM21,%ZMM8,%ZMM8 |
(664) 0x451097 VFPCLASSPD $0x50,%ZMM6,%K2 |
(664) 0x45109e VBROADCASTSD 0x13618(%RIP),%ZMM6 |
(664) 0x4510a8 VXORPD %ZMM6,%ZMM8,%ZMM8{%K2} |
(664) 0x4510ae VANDPD %ZMM1,%ZMM5,%ZMM5 |
(664) 0x4510b4 VMAXPD %ZMM21,%ZMM5,%ZMM5 |
(664) 0x4510ba VFPCLASSPD $0x50,%ZMM8,%K2 |
(664) 0x4510c1 VXORPD %ZMM6,%ZMM5,%ZMM5{%K2} |
(664) 0x4510c7 VMULPD %ZMM8,%ZMM8,%ZMM6 |
(664) 0x4510cd VFMADD231PD %ZMM5,%ZMM5,%ZMM6 |
(664) 0x4510d3 VSQRTPD %ZMM6,%ZMM6 |
(664) 0x4510d9 VPSLLQ $0x3,%ZMM7,%ZMM7 |
(664) 0x4510e0 VPADDQ 0x280(%RSP),%ZMM7,%ZMM7 |
(664) 0x4510e8 VMULPD %ZMM3,%ZMM6,%ZMM3 |
(664) 0x4510ee VDIVPD %ZMM8,%ZMM3,%ZMM3 |
(664) 0x4510f4 VANDPD %ZMM1,%ZMM3,%ZMM3 |
(664) 0x4510fa VMULPD %ZMM4,%ZMM6,%ZMM4 |
(664) 0x451100 VDIVPD %ZMM5,%ZMM4,%ZMM4 |
(664) 0x451106 VANDPD %ZMM1,%ZMM4,%ZMM4 |
(664) 0x45110c MOV 0x50(%RSP),%RCX |
(664) 0x451111 MOV 0x10(%RCX),%RAX |
(664) 0x451115 VPXOR %XMM1,%XMM1,%XMM1 |
(664) 0x451119 VPMULLQ (%RCX){1to8},%ZMM31,%ZMM1 |
(664) 0x45111f VPADDQ %ZMM2,%ZMM1,%ZMM1 |
(664) 0x451125 KMOVQ %K1,%K2 |
(664) 0x45112a VPXOR %XMM2,%XMM2,%XMM2 |
(664) 0x45112e VGATHERQPD (%RAX,%ZMM1,8),%ZMM2{%K2} |
(664) 0x451135 VMINPD %ZMM4,%ZMM3,%ZMM1 |
(664) 0x45113b VMULPD %ZMM0,%ZMM1,%ZMM0 |
(664) 0x451141 VMULPD %ZMM0,%ZMM0,%ZMM0 |
(664) 0x451147 VADDPD %ZMM2,%ZMM2,%ZMM1 |
(664) 0x45114d VMULPD %ZMM1,%ZMM0,%ZMM0 |
(664) 0x451153 VSCATTERQPD %ZMM0,(,%ZMM7,1){%K1} |
(664) 0x45115e JMP 450c90 |
0x451163 NOPW %CS:(%RAX,%RAX,1) |
0x45116d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 169 |
nb uops | 171 |
loop length | 846 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 12 |
nb stack references | 32 |
micro-operation queue | 28.50 cycles |
front end | 28.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.50 | 7.60 | 16.33 | 16.33 | 20.00 | 20.00 | 7.50 | 20.00 | 20.00 | 20.00 | 7.40 | 16.33 |
cycles | 7.50 | 7.60 | 16.33 | 16.33 | 20.00 | 20.00 | 7.50 | 20.00 | 20.00 | 20.00 | 7.40 | 16.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.32-28.33 |
Stall cycles | 0.00 |
Front-end | 28.50 |
Dispatch | 20.00 |
Overall L1 | 28.50 |
all | 28% |
load | 14% |
store | 56% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 27% |
load | 11% |
store | 56% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 33% |
load | 24% |
store | 54% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 32% |
load | 21% |
store | 54% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x3c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 450b7f <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6845d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 450b9d <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%R9),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R8D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R15,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xc0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %R8D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
AND %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVDQU64 %ZMM16,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
JE 450bbc <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6dc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25b2c(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQU64 %ZMM0,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDQ %ZMM27,%ZMM0,%ZMM18 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA -0x1(%R8),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%RDX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f51(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11bc7(%RIP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1225d(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 450733 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x253> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x68(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x70(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 450bc4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6845f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 450bdb <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6fb> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%R9,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $-0x1,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x255d5(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM27,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R10,%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA -0x1(%R8),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
ADD %RDX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R11,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x129f9(%RIP),%ZMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV $0x4513a0,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x451520,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD 0x11651(%RIP),%ZMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQU64 %ZMM20,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
JMP 450cbb <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x7db> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 169 |
nb uops | 171 |
loop length | 846 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 12 |
nb stack references | 32 |
micro-operation queue | 28.50 cycles |
front end | 28.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.50 | 7.60 | 16.33 | 16.33 | 20.00 | 20.00 | 7.50 | 20.00 | 20.00 | 20.00 | 7.40 | 16.33 |
cycles | 7.50 | 7.60 | 16.33 | 16.33 | 20.00 | 20.00 | 7.50 | 20.00 | 20.00 | 20.00 | 7.40 | 16.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 28.32-28.33 |
Stall cycles | 0.00 |
Front-end | 28.50 |
Dispatch | 20.00 |
Overall L1 | 28.50 |
all | 28% |
load | 14% |
store | 56% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 27% |
load | 11% |
store | 56% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 33% |
load | 24% |
store | 54% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 32% |
load | 21% |
store | 54% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x3c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 450b7f <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6845d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R10,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 450b9d <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%R9),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R10,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x48(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R8D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R15,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R9,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xc0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %R8D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
AND %R10,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVDQU64 %ZMM16,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
JE 450bbc <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6dc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25b2c(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQU64 %ZMM0,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDQ %ZMM27,%ZMM0,%ZMM18 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA -0x1(%R8),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%RDX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f51(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11bc7(%RIP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1225d(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 450733 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x253> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x68(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x70(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 450bc4 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6845f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 450bdb <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6fb> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%R9,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $-0x1,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x255d5(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM27,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R10,%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R9,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA -0x1(%R8),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
ADD %RDX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R11,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x129f9(%RIP),%ZMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV $0x4513a0,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x451520,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD 0x11651(%RIP),%ZMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQU64 %ZMM20,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
JMP 450cbb <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x7db> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼viscosity_kernel(int, int, int, int, clover::Buffer1D | 2.57 | 2.19 |
○Loop 665 - viscosity.cpp:37-66 - exec | 2.57 | 2.19 |
○Loop 664 - viscosity.cpp:37-64 - exec | 0 | 0 |