Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 3.36% |
---|
Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 3.36% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 66 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
65: } |
66: } |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x465650 PUSH %RBP |
0x465651 MOV %RSP,%RBP |
0x465654 PUSH %R15 |
0x465656 PUSH %R14 |
0x465658 PUSH %R13 |
0x46565a PUSH %R12 |
0x46565c PUSH %RBX |
0x46565d AND $-0x40,%RSP |
0x465661 SUB $0x1c0,%RSP |
0x465668 MOV %R8,0x20(%RSP) |
0x46566d MOV 0x48(%RBP),%RAX |
0x465671 MOV 0x38(%RBP),%R15 |
0x465675 MOV 0x30(%RBP),%RSI |
0x465679 MOV 0x20(%RBP),%R14 |
0x46567d MOV 0x18(%RBP),%R12 |
0x465681 MOV 0x10(%RBP),%RBX |
0x465685 MOV 0x28(%RBP),%R13D |
0x465689 MOVL $0,0x1c(%RSP) |
0x465691 TEST %RAX,%RAX |
0x465694 JS 465c80 |
0x46569a MOV %RSI,0x8(%RSP) |
0x46569f MOV %RDX,0x40(%RSP) |
0x4656a4 MOV %RCX,0x80(%RSP) |
0x4656ac MOV %R9,0x10(%RSP) |
0x4656b1 MOV (%RDI),%ESI |
0x4656b3 MOVQ $0,0x30(%RSP) |
0x4656bc MOV %RAX,0x28(%RSP) |
0x4656c1 MOVQ $0x1,0x38(%RSP) |
0x4656ca SUB $0x8,%RSP |
0x4656ce LEA 0x40(%RSP),%RAX |
0x4656d3 LEA 0x24(%RSP),%RCX |
0x4656d8 LEA 0x38(%RSP),%R8 |
0x4656dd LEA 0x30(%RSP),%R9 |
0x4656e2 MOV $0x69a5c0,%EDI |
0x4656e7 MOV %ESI,0x20(%RSP) |
0x4656eb MOV $0x22,%EDX |
0x4656f0 PUSH $0x1 |
0x4656f2 PUSH $0x1 |
0x4656f4 PUSH %RAX |
0x4656f5 CALL 403090 <__kmpc_for_static_init_8@plt> |
0x4656fa ADD $0x20,%RSP |
0x4656fe MOV 0x30(%RSP),%R8 |
0x465703 MOV 0x28(%RSP),%R9 |
0x465708 CMP %R9,%R8 |
0x46570b JA 465cd5 |
0x465711 LEA 0x1(%R8),%RAX |
0x465715 INC %R9 |
0x465718 CMP %R9,%RAX |
0x46571b CMOVG %RAX,%R9 |
0x46571f MOV 0x8(%RSP),%RDX |
0x465724 SUB %EDX,%R15D |
0x465727 VPBROADCASTQ %R15,%ZMM16 |
0x46572d VPBROADCASTD %R13D,%YMM0 |
0x465733 VMOVDQU %YMM0,0x60(%RSP) |
0x465739 MOV (%R12),%RAX |
0x46573d MOV 0x10(%R12),%RSI |
0x465742 VPBROADCASTQ %RAX,%ZMM20 |
0x465748 MOV (%R14),%RCX |
0x46574b MOV 0x10(%R14),%RDI |
0x46574f MOV 0x40(%RSP),%RAX |
0x465754 MOV 0x8(%RAX),%R15 |
0x465758 MOV 0x80(%RSP),%RAX |
0x465760 MOV 0x8(%RAX),%R12 |
0x465764 VPBROADCASTQ %RCX,%ZMM21 |
0x46576a MOV 0x10(%RSP),%RAX |
0x46576f MOV (%RAX),%RCX |
0x465772 MOV 0x10(%RAX),%R13 |
0x465776 VPBROADCASTQ %RCX,%ZMM22 |
0x46577c MOV (%RBX),%RAX |
0x46577f MOV 0x10(%RBX),%RCX |
0x465783 SUB %R8,%R9 |
0x465786 VPBROADCASTQ %RAX,%ZMM0 |
0x46578c VMOVDQU64 %ZMM0,0x80(%RSP) |
0x465794 MOV $-0x8,%R14D |
0x46579a VPBROADCASTD %EDX,%YMM0 |
0x4657a0 VMOVDQU %YMM0,0x40(%RSP) |
0x4657a6 MOV %RCX,0x10(%RSP) |
0x4657ab VPBROADCASTQ %RCX,%ZMM0 |
0x4657b1 VMOVDQU64 %ZMM0,0x140(%RSP) |
0x4657b9 AND %R9,%R14 |
0x4657bc JE 465d00 |
0x4657c2 MOV %R9,0xc0(%RSP) |
0x4657ca MOV %R8,0x100(%RSP) |
0x4657d2 VPBROADCASTQ %R8,%ZMM18 |
0x4657d8 VMOVDQU64 0x25ade(%RIP),%ZMM26 |
0x4657e2 VPADDQ %ZMM26,%ZMM18,%ZMM17 |
0x4657e8 LEA 0x1(%RDX),%RAX |
0x4657ec VPBROADCASTQ %RAX,%ZMM27 |
0x4657f2 XOR %EBX,%EBX |
0x4657f4 VBROADCASTSD 0x12f6a(%RIP),%ZMM28 |
0x4657fe VBROADCASTSD 0x12248(%RIP),%ZMM29 |
0x465808 VBROADCASTSD 0x11bb6(%RIP),%ZMM31 |
0x465812 VPBROADCASTQ 0x12f5c(%RIP),%ZMM25 |
0x46581c VBROADCASTSD 0x12242(%RIP),%ZMM30 |
0x465826 JMP 465859 |
0x465828 NOPW %CS:(%RAX,%RAX,1) |
0x465837 NOPW (%RAX,%RAX,1) |
(662) 0x465840 VPADDQ %ZMM25,%ZMM17,%ZMM17 |
(662) 0x465846 VPADDQ %ZMM25,%ZMM26,%ZMM26 |
(662) 0x46584c ADD $0x8,%RBX |
(662) 0x465850 CMP %R14,%RBX |
(662) 0x465853 JAE 465cc0 |
(662) 0x465859 VPADDQ %ZMM26,%ZMM18,%ZMM0 |
(662) 0x46585f VMOVDQA64 %ZMM16,%ZMM1 |
(662) 0x465865 MOV $0x4664a0,%RAX |
(662) 0x46586c CALL %RAX |
(662) 0x46586e VPMOVQD %ZMM0,%YMM19 |
(662) 0x465874 VPADDQ %ZMM0,%ZMM27,%ZMM0 |
(662) 0x46587a VPSLLQ $0x20,%ZMM0,%ZMM0 |
(662) 0x465881 VPSRAQ $0x20,%ZMM0,%ZMM23 |
(662) 0x465888 VMOVDQA64 %ZMM17,%ZMM0 |
(662) 0x46588e VMOVDQA64 %ZMM16,%ZMM1 |
(662) 0x465894 CALL 466620 <__svml_u64div8_z0> |
(662) 0x46589a VPMOVQD %ZMM0,%YMM0 |
(662) 0x4658a0 VPADDD 0x60(%RSP),%YMM0,%YMM3 |
(662) 0x4658a6 VPMOVSXDQ %YMM3,%ZMM24 |
(662) 0x4658ac VPXOR %XMM2,%XMM2,%XMM2 |
(662) 0x4658b0 VPMULLQ %ZMM24,%ZMM20,%ZMM2 |
(662) 0x4658b6 VPADDQ %ZMM2,%ZMM23,%ZMM1 |
(662) 0x4658bc VXORPD %XMM4,%XMM4,%XMM4 |
(662) 0x4658c0 KXNORW %K0,%K0,%K1 |
(662) 0x4658c4 VGATHERQPD (%RSI,%ZMM1,8),%ZMM4{%K1} |
(662) 0x4658cb VPCMPEQD %YMM1,%YMM1,%YMM1 |
(662) 0x4658cf VPSUBD %YMM1,%YMM3,%YMM1 |
(662) 0x4658d3 VPCMPEQD %YMM12,%YMM12,%YMM12 |
(662) 0x4658d8 VPMOVSXDQ %YMM1,%ZMM5 |
(662) 0x4658de VPXOR %XMM6,%XMM6,%XMM6 |
(662) 0x4658e2 VPMULLQ %ZMM5,%ZMM20,%ZMM6 |
(662) 0x4658e8 VPADDQ %ZMM6,%ZMM23,%ZMM1 |
(662) 0x4658ee VXORPD %XMM8,%XMM8,%XMM8 |
(662) 0x4658f3 KXNORW %K0,%K0,%K1 |
(662) 0x4658f7 VGATHERQPD (%RSI,%ZMM1,8),%ZMM8{%K1} |
(662) 0x4658fe VPADDD 0x40(%RSP),%YMM19,%YMM7 |
(662) 0x465906 VPMOVSXDQ %YMM7,%ZMM1 |
(662) 0x46590c VPADDQ %ZMM1,%ZMM2,%ZMM2 |
(662) 0x465912 VXORPD %XMM9,%XMM9,%XMM9 |
(662) 0x465917 KXNORW %K0,%K0,%K1 |
(662) 0x46591b VGATHERQPD (%RSI,%ZMM2,8),%ZMM9{%K1} |
(662) 0x465922 VPADDQ %ZMM1,%ZMM6,%ZMM2 |
(662) 0x465928 VPXOR %XMM6,%XMM6,%XMM6 |
(662) 0x46592c KXNORW %K0,%K0,%K1 |
(662) 0x465930 VGATHERQPD (%RSI,%ZMM2,8),%ZMM6{%K1} |
(662) 0x465937 VPXOR %XMM2,%XMM2,%XMM2 |
(662) 0x46593b VPMULLQ %ZMM5,%ZMM21,%ZMM2 |
(662) 0x465941 VPADDQ %ZMM1,%ZMM2,%ZMM10 |
(662) 0x465947 VXORPD %XMM11,%XMM11,%XMM11 |
(662) 0x46594c KXNORW %K0,%K0,%K1 |
(662) 0x465950 VGATHERQPD (%RDI,%ZMM10,8),%ZMM11{%K1} |
(662) 0x465957 VPSUBD %YMM12,%YMM7,%YMM10 |
(662) 0x46595c VPMOVSXDQ %YMM10,%ZMM12 |
(662) 0x465962 VPADDQ %ZMM12,%ZMM2,%ZMM2 |
(662) 0x465968 VXORPD %XMM13,%XMM13,%XMM13 |
(662) 0x46596d KXNORW %K0,%K0,%K1 |
(662) 0x465971 VGATHERQPD (%RDI,%ZMM2,8),%ZMM13{%K1} |
(662) 0x465978 VPXOR %XMM2,%XMM2,%XMM2 |
(662) 0x46597c VPMULLQ %ZMM24,%ZMM21,%ZMM2 |
(662) 0x465982 VPADDQ %ZMM1,%ZMM2,%ZMM14 |
(662) 0x465988 VXORPD %XMM15,%XMM15,%XMM15 |
(662) 0x46598d KXNORW %K0,%K0,%K1 |
(662) 0x465991 VGATHERQPD (%RDI,%ZMM14,8),%ZMM15{%K1} |
(662) 0x465998 VPADDQ %ZMM12,%ZMM2,%ZMM2 |
(662) 0x46599e VXORPD %XMM14,%XMM14,%XMM14 |
(662) 0x4659a3 KXNORW %K0,%K0,%K1 |
(662) 0x4659a7 VGATHERQPD (%RDI,%ZMM2,8),%ZMM14{%K1} |
(662) 0x4659ae VXORPD %XMM2,%XMM2,%XMM2 |
(662) 0x4659b2 KXNORW %K0,%K0,%K1 |
(662) 0x4659b6 VGATHERDPD (%R15,%YMM7,8),%ZMM2{%K1} |
(662) 0x4659bd VSUBPD %ZMM9,%ZMM8,%ZMM8 |
(662) 0x4659c3 VSUBPD %ZMM4,%ZMM6,%ZMM9 |
(662) 0x4659c9 VSUBPD %ZMM9,%ZMM8,%ZMM6 |
(662) 0x4659cf VSUBPD %ZMM15,%ZMM13,%ZMM13 |
(662) 0x4659d5 VXORPD %XMM4,%XMM4,%XMM4 |
(662) 0x4659d9 KXNORW %K0,%K0,%K1 |
(662) 0x4659dd VGATHERDPD (%R12,%YMM3,8),%ZMM4{%K1} |
(662) 0x4659e4 VSUBPD %ZMM11,%ZMM14,%ZMM11 |
(662) 0x4659ea VPXOR %XMM14,%XMM14,%XMM14 |
(662) 0x4659ef VPMULLQ %ZMM24,%ZMM22,%ZMM14 |
(662) 0x4659f5 VPADDQ %ZMM12,%ZMM14,%ZMM12 |
(662) 0x4659fb VXORPD %XMM15,%XMM15,%XMM15 |
(662) 0x465a00 KXNORW %K0,%K0,%K1 |
(662) 0x465a04 VGATHERQPD (%R13,%ZMM12,8),%ZMM15{%K1} |
(662) 0x465a0c VSUBPD %ZMM11,%ZMM13,%ZMM12 |
(662) 0x465a12 VMULPD %ZMM6,%ZMM2,%ZMM19 |
(662) 0x465a18 VPCMPEQD %YMM0,%YMM0,%YMM0 |
(662) 0x465a1c VPADDD %YMM0,%YMM7,%YMM7 |
(662) 0x465a20 VPMOVSXDQ %YMM7,%ZMM7 |
(662) 0x465a26 VPADDQ %ZMM7,%ZMM14,%ZMM7 |
(662) 0x465a2c VPXOR %XMM14,%XMM14,%XMM14 |
(662) 0x465a31 KXNORW %K0,%K0,%K1 |
(662) 0x465a35 VGATHERQPD (%R13,%ZMM7,8),%ZMM14{%K1} |
(662) 0x465a3d VFMADD231PD %ZMM12,%ZMM4,%ZMM19 |
(662) 0x465a43 VADDPD %ZMM8,%ZMM9,%ZMM7 |
(662) 0x465a49 VDIVPD %ZMM2,%ZMM29,%ZMM8 |
(662) 0x465a4f VXORPD %XMM9,%XMM9,%XMM9 |
(662) 0x465a54 KXNORW %K0,%K0,%K1 |
(662) 0x465a58 VGATHERDPD (%R15,%YMM10,8),%ZMM9{%K1} |
(662) 0x465a5f VMULPD %ZMM28,%ZMM7,%ZMM10 |
(662) 0x465a65 VPMULLQ %ZMM5,%ZMM22,%ZMM5 |
(662) 0x465a6b VPADDQ %ZMM1,%ZMM5,%ZMM5 |
(662) 0x465a71 VXORPD %XMM7,%XMM7,%XMM7 |
(662) 0x465a75 KXNORW %K0,%K0,%K1 |
(662) 0x465a79 VGATHERQPD (%R13,%ZMM5,8),%ZMM7{%K1} |
(662) 0x465a81 VADDPD %ZMM13,%ZMM11,%ZMM11 |
(662) 0x465a87 VDIVPD %ZMM4,%ZMM29,%ZMM13 |
(662) 0x465a8d VSUBPD %ZMM14,%ZMM15,%ZMM5 |
(662) 0x465a93 VPADDD %YMM0,%YMM3,%YMM14 |
(662) 0x465a97 VPMOVSXDQ %YMM14,%ZMM14 |
(662) 0x465a9d VPMULLQ %ZMM14,%ZMM22,%ZMM14 |
(662) 0x465aa3 VPADDQ %ZMM1,%ZMM14,%ZMM14 |
(662) 0x465aa9 VXORPD %XMM15,%XMM15,%XMM15 |
(662) 0x465aae KXNORW %K0,%K0,%K1 |
(662) 0x465ab2 VGATHERQPD (%R13,%ZMM14,8),%ZMM15{%K1} |
(662) 0x465aba VADDPD %ZMM2,%ZMM9,%ZMM9 |
(662) 0x465ac0 VDIVPD %ZMM9,%ZMM5,%ZMM5 |
(662) 0x465ac6 VPADDD 0x152b0(%RIP){1to8},%YMM3,%YMM3 |
(662) 0x465ad0 VXORPD %XMM9,%XMM9,%XMM9 |
(662) 0x465ad5 KXNORW %K0,%K0,%K1 |
(662) 0x465ad9 VGATHERDPD (%R12,%YMM3,8),%ZMM9{%K1} |
(662) 0x465ae0 VMULPD %ZMM8,%ZMM28,%ZMM3 |
(662) 0x465ae6 VSUBPD %ZMM15,%ZMM7,%ZMM7 |
(662) 0x465aec VADDPD %ZMM4,%ZMM9,%ZMM9 |
(662) 0x465af2 VDIVPD %ZMM9,%ZMM7,%ZMM7 |
(662) 0x465af8 VMULPD %ZMM3,%ZMM11,%ZMM3 |
(662) 0x465afe VFMADD231PD %ZMM10,%ZMM13,%ZMM3 |
(662) 0x465b04 VMULPD %ZMM7,%ZMM7,%ZMM9 |
(662) 0x465b0a VMULPD %ZMM3,%ZMM7,%ZMM3 |
(662) 0x465b10 VMULPD %ZMM28,%ZMM6,%ZMM6 |
(662) 0x465b16 VMULPD %ZMM8,%ZMM5,%ZMM8 |
(662) 0x465b1c VFMADD213PD %ZMM3,%ZMM6,%ZMM8 |
(662) 0x465b22 VMULPD %ZMM5,%ZMM8,%ZMM3 |
(662) 0x465b28 VMULPD %ZMM28,%ZMM12,%ZMM6 |
(662) 0x465b2e VMULPD %ZMM9,%ZMM13,%ZMM8 |
(662) 0x465b34 VFMADD213PD %ZMM3,%ZMM6,%ZMM8 |
(662) 0x465b3a VFMADD231PD %ZMM5,%ZMM5,%ZMM9 |
(662) 0x465b40 VMAXPD %ZMM31,%ZMM9,%ZMM3 |
(662) 0x465b46 VDIVPD %ZMM3,%ZMM8,%ZMM3 |
(662) 0x465b4c VFPCLASSPD $0x56,%ZMM3,%K1 |
(662) 0x465b53 VFPCLASSPD $0x50,%ZMM19,%K1{%K1} |
(662) 0x465b5a KNOTB %K1,%K2 |
(662) 0x465b5e VMOVDQU64 0x80(%RSP),%ZMM6 |
(662) 0x465b66 VPMULLQ %ZMM24,%ZMM6,%ZMM6 |
(662) 0x465b6c VPADDQ %ZMM1,%ZMM6,%ZMM6 |
(662) 0x465b72 MOV 0x10(%RSP),%RAX |
(662) 0x465b77 VXORPD %XMM8,%XMM8,%XMM8 |
(662) 0x465b7c VSCATTERQPD %ZMM8,(%RAX,%ZMM6,8){%K2} |
(662) 0x465b83 KORTESTB %K1,%K1 |
(662) 0x465b87 JE 465840 |
(662) 0x465b8d VANDPD %ZMM30,%ZMM5,%ZMM8 |
(662) 0x465b93 VMAXPD %ZMM31,%ZMM8,%ZMM8 |
(662) 0x465b99 VFPCLASSPD $0x50,%ZMM5,%K2 |
(662) 0x465ba0 VBROADCASTSD 0x13c16(%RIP),%ZMM9 |
(662) 0x465baa VXORPD %ZMM9,%ZMM8,%ZMM8{%K2} |
(662) 0x465bb0 VANDPD %ZMM30,%ZMM7,%ZMM5 |
(662) 0x465bb6 VMAXPD %ZMM31,%ZMM5,%ZMM5 |
(662) 0x465bbc VFPCLASSPD $0x50,%ZMM8,%K2 |
(662) 0x465bc3 VXORPD %ZMM9,%ZMM5,%ZMM5{%K2} |
(662) 0x465bc9 VMULPD %ZMM8,%ZMM8,%ZMM7 |
(662) 0x465bcf VFMADD231PD %ZMM5,%ZMM5,%ZMM7 |
(662) 0x465bd5 VSQRTPD %ZMM7,%ZMM7 |
(662) 0x465bdb VPSLLQ $0x3,%ZMM6,%ZMM6 |
(662) 0x465be2 VPADDQ 0x140(%RSP),%ZMM6,%ZMM6 |
(662) 0x465bea VMULPD %ZMM2,%ZMM7,%ZMM2 |
(662) 0x465bf0 VDIVPD %ZMM8,%ZMM2,%ZMM2 |
(662) 0x465bf6 VANDPD %ZMM30,%ZMM2,%ZMM2 |
(662) 0x465bfc VMULPD %ZMM4,%ZMM7,%ZMM4 |
(662) 0x465c02 VDIVPD %ZMM5,%ZMM4,%ZMM4 |
(662) 0x465c08 VANDPD %ZMM30,%ZMM4,%ZMM4 |
(662) 0x465c0e MOV 0x20(%RSP),%RCX |
(662) 0x465c13 MOV 0x10(%RCX),%RAX |
(662) 0x465c17 VPMULLQ (%RCX){1to8},%ZMM24,%ZMM0 |
(662) 0x465c1d VPADDQ %ZMM1,%ZMM0,%ZMM0 |
(662) 0x465c23 VPXOR %XMM1,%XMM1,%XMM1 |
(662) 0x465c27 KMOVQ %K1,%K2 |
(662) 0x465c2c VGATHERQPD (%RAX,%ZMM0,8),%ZMM1{%K2} |
(662) 0x465c33 VMINPD %ZMM4,%ZMM2,%ZMM0 |
(662) 0x465c39 VMULPD %ZMM3,%ZMM0,%ZMM0 |
(662) 0x465c3f VMULPD %ZMM0,%ZMM0,%ZMM0 |
(662) 0x465c45 VADDPD %ZMM1,%ZMM1,%ZMM1 |
(662) 0x465c4b VMULPD %ZMM1,%ZMM0,%ZMM0 |
(662) 0x465c51 VSCATTERQPD %ZMM0,(,%ZMM6,1){%K1} |
(662) 0x465c5c JMP 465840 |
0x465c61 NOPW %CS:(%RAX,%RAX,1) |
0x465c70 NOPW %CS:(%RAX,%RAX,1) |
0x465c7f NOP |
0x465c80 LEA -0x28(%RBP),%RSP |
0x465c84 POP %RBX |
0x465c85 POP %R12 |
0x465c87 POP %R13 |
0x465c89 POP %R14 |
0x465c8b POP %R15 |
0x465c8d POP %RBP |
0x465c8e RET |
0x465c8f NOPW %CS:(%RAX,%RAX,1) |
0x465c9e NOPW %CS:(%RAX,%RAX,1) |
0x465cad NOPW %CS:(%RAX,%RAX,1) |
0x465cbc NOPL (%RAX) |
0x465cc0 MOV 0xc0(%RSP),%R9 |
0x465cc8 CMP %R14,%R9 |
0x465ccb MOV 0x100(%RSP),%R8 |
0x465cd3 JNE 465d40 |
0x465cd5 MOV $0x69a5e0,%EDI |
0x465cda MOV 0x18(%RSP),%ESI |
0x465cde LEA -0x28(%RBP),%RSP |
0x465ce2 POP %RBX |
0x465ce3 POP %R12 |
0x465ce5 POP %R13 |
0x465ce7 POP %R14 |
0x465ce9 POP %R15 |
0x465ceb POP %RBP |
0x465cec VZEROUPPER |
0x465cef JMP 402ef0 |
0x465cf4 NOPW %CS:(%RAX,%RAX,1) |
0x465d00 XOR %R14D,%R14D |
0x465d03 MOV %R8,%RAX |
0x465d06 JMP 465d49 |
0x465d08 NOPW %CS:(%RAX,%RAX,1) |
0x465d17 NOPW %CS:(%RAX,%RAX,1) |
0x465d26 NOPW %CS:(%RAX,%RAX,1) |
0x465d35 NOPW %CS:(%RAX,%RAX,1) |
0x465d40 LEA (%R8,%R14,1),%RAX |
0x465d44 MOV 0x8(%RSP),%RDX |
0x465d49 VPBROADCASTQ %RAX,%ZMM0 |
0x465d4f VMOVDQU64 0x25567(%RIP),%ZMM26 |
0x465d59 VPADDQ %ZMM26,%ZMM0,%ZMM17 |
0x465d5f SUB %R14,%R9 |
0x465d62 VPBROADCASTQ %R9,%ZMM27 |
0x465d68 ADD %R14,%R8 |
0x465d6b VPBROADCASTQ %R8,%ZMM0 |
0x465d71 VMOVDQU64 %ZMM0,0x100(%RSP) |
0x465d79 INC %RDX |
0x465d7c VPBROADCASTQ %RDX,%ZMM0 |
0x465d82 VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x465d8a VPBROADCASTQ 0x129e4(%RIP),%ZMM30 |
0x465d94 MOV $0x4664a0,%RBX |
0x465d9b MOV $0x466620,%R14 |
0x465da2 VBROADCASTSD 0x129bc(%RIP),%ZMM31 |
0x465dac VBROADCASTSD 0x11c9a(%RIP),%ZMM25 |
0x465db6 VBROADCASTSD 0x11608(%RIP),%ZMM28 |
0x465dc0 VBROADCASTSD 0x11c9e(%RIP),%ZMM29 |
0x465dca JMP 465e23 |
0x465dcc NOPW %CS:(%RAX,%RAX,1) |
0x465ddb NOPW %CS:(%RAX,%RAX,1) |
0x465dea NOPW %CS:(%RAX,%RAX,1) |
0x465df9 NOPL (%RAX) |
(661) 0x465e00 VPADDQ %ZMM30,%ZMM17,%ZMM0 |
(661) 0x465e06 VMOVDQA64 %ZMM0,%ZMM17{%K3} |
(661) 0x465e0c VPADDQ %ZMM30,%ZMM26,%ZMM26 |
(661) 0x465e12 VPCMPLTUQ %ZMM27,%ZMM26,%K0 |
(661) 0x465e19 KORTESTB %K0,%K0 |
(661) 0x465e1d JE 465cd5 |
(661) 0x465e23 VPCMPLTUQ %ZMM27,%ZMM26,%K3 |
(661) 0x465e2a KORTESTB %K3,%K3 |
(661) 0x465e2e VPXOR %XMM0,%XMM0,%XMM0 |
(661) 0x465e32 JE 465e06 |
(661) 0x465e34 VPADDQ 0x100(%RSP),%ZMM26,%ZMM0 |
(661) 0x465e3c VPBROADCASTQ 0x1237a(%RIP),%ZMM1 |
(661) 0x465e46 VPBLENDMQ %ZMM16,%ZMM1,%ZMM18{%K3} |
(661) 0x465e4c VMOVDQA64 %ZMM18,%ZMM1 |
(661) 0x465e52 KMOVW %K3,0x8(%RSP) |
(661) 0x465e58 CALL %RBX |
(661) 0x465e5a VPMOVQD %ZMM0,%YMM19 |
(661) 0x465e60 VPADDQ 0xc0(%RSP),%ZMM0,%ZMM0 |
(661) 0x465e68 VPSLLQ $0x20,%ZMM0,%ZMM0 |
(661) 0x465e6f VPSRAQ $0x20,%ZMM0,%ZMM23 |
(661) 0x465e76 VMOVDQA64 %ZMM17,%ZMM0 |
(661) 0x465e7c VMOVDQA64 %ZMM18,%ZMM1 |
(661) 0x465e82 CALL %R14 |
(661) 0x465e85 KMOVW 0x8(%RSP),%K3 |
(661) 0x465e8b VPMOVQD %ZMM0,%YMM0 |
(661) 0x465e91 VPADDD 0x60(%RSP),%YMM0,%YMM4 |
(661) 0x465e97 VPMOVSXDQ %YMM4,%ZMM24 |
(661) 0x465e9d VPXOR %XMM2,%XMM2,%XMM2 |
(661) 0x465ea1 VPMULLQ %ZMM24,%ZMM20,%ZMM2 |
(661) 0x465ea7 VPADDQ %ZMM2,%ZMM23,%ZMM1 |
(661) 0x465ead VXORPD %XMM3,%XMM3,%XMM3 |
(661) 0x465eb1 KMOVQ %K3,%K1 |
(661) 0x465eb6 VGATHERQPD (%RSI,%ZMM1,8),%ZMM3{%K1} |
(661) 0x465ebd VPCMPEQD %YMM1,%YMM1,%YMM1 |
(661) 0x465ec1 VPSUBD %YMM1,%YMM4,%YMM1 |
(661) 0x465ec5 VPCMPEQD %YMM12,%YMM12,%YMM12 |
(661) 0x465eca VPMOVSXDQ %YMM1,%ZMM5 |
(661) 0x465ed0 VPXOR %XMM6,%XMM6,%XMM6 |
(661) 0x465ed4 VPMULLQ %ZMM5,%ZMM20,%ZMM6 |
(661) 0x465eda VPADDQ %ZMM6,%ZMM23,%ZMM1 |
(661) 0x465ee0 VXORPD %XMM7,%XMM7,%XMM7 |
(661) 0x465ee4 KMOVQ %K3,%K1 |
(661) 0x465ee9 VGATHERQPD (%RSI,%ZMM1,8),%ZMM7{%K1} |
(661) 0x465ef0 VPADDD 0x40(%RSP),%YMM19,%YMM8 |
(661) 0x465ef8 VPMOVSXDQ %YMM8,%ZMM1 |
(661) 0x465efe VPADDQ %ZMM1,%ZMM2,%ZMM2 |
(661) 0x465f04 VXORPD %XMM9,%XMM9,%XMM9 |
(661) 0x465f09 KMOVQ %K3,%K1 |
(661) 0x465f0e VGATHERQPD (%RSI,%ZMM2,8),%ZMM9{%K1} |
(661) 0x465f15 VPADDQ %ZMM1,%ZMM6,%ZMM2 |
(661) 0x465f1b VPXOR %XMM6,%XMM6,%XMM6 |
(661) 0x465f1f KMOVQ %K3,%K1 |
(661) 0x465f24 VGATHERQPD (%RSI,%ZMM2,8),%ZMM6{%K1} |
(661) 0x465f2b VPXOR %XMM2,%XMM2,%XMM2 |
(661) 0x465f2f VPMULLQ %ZMM5,%ZMM21,%ZMM2 |
(661) 0x465f35 VPADDQ %ZMM1,%ZMM2,%ZMM10 |
(661) 0x465f3b VXORPD %XMM11,%XMM11,%XMM11 |
(661) 0x465f40 KMOVQ %K3,%K1 |
(661) 0x465f45 VGATHERQPD (%RDI,%ZMM10,8),%ZMM11{%K1} |
(661) 0x465f4c VPSUBD %YMM12,%YMM8,%YMM10 |
(661) 0x465f51 VPMOVSXDQ %YMM10,%ZMM12 |
(661) 0x465f57 VPADDQ %ZMM12,%ZMM2,%ZMM2 |
(661) 0x465f5d VXORPD %XMM13,%XMM13,%XMM13 |
(661) 0x465f62 KMOVQ %K3,%K1 |
(661) 0x465f67 VGATHERQPD (%RDI,%ZMM2,8),%ZMM13{%K1} |
(661) 0x465f6e VPXOR %XMM2,%XMM2,%XMM2 |
(661) 0x465f72 VPMULLQ %ZMM24,%ZMM21,%ZMM2 |
(661) 0x465f78 VPADDQ %ZMM1,%ZMM2,%ZMM14 |
(661) 0x465f7e VXORPD %XMM15,%XMM15,%XMM15 |
(661) 0x465f83 KMOVQ %K3,%K1 |
(661) 0x465f88 VGATHERQPD (%RDI,%ZMM14,8),%ZMM15{%K1} |
(661) 0x465f8f VSUBPD %ZMM9,%ZMM7,%ZMM9 |
(661) 0x465f95 VSUBPD %ZMM3,%ZMM6,%ZMM14 |
(661) 0x465f9b VSUBPD %ZMM14,%ZMM9,%ZMM6 |
(661) 0x465fa1 VSUBPD %ZMM15,%ZMM13,%ZMM13 |
(661) 0x465fa7 VPADDQ %ZMM12,%ZMM2,%ZMM2 |
(661) 0x465fad VXORPD %XMM7,%XMM7,%XMM7 |
(661) 0x465fb1 KMOVQ %K3,%K1 |
(661) 0x465fb6 VGATHERQPD (%RDI,%ZMM2,8),%ZMM7{%K1} |
(661) 0x465fbd VXORPD %XMM2,%XMM2,%XMM2 |
(661) 0x465fc1 KMOVQ %K3,%K1 |
(661) 0x465fc6 VGATHERDPD (%R15,%YMM8,8),%ZMM2{%K1} |
(661) 0x465fcd VXORPD %XMM3,%XMM3,%XMM3 |
(661) 0x465fd1 KMOVQ %K3,%K1 |
(661) 0x465fd6 VGATHERDPD (%R12,%YMM4,8),%ZMM3{%K1} |
(661) 0x465fdd VSUBPD %ZMM11,%ZMM7,%ZMM11 |
(661) 0x465fe3 VSUBPD %ZMM11,%ZMM13,%ZMM7 |
(661) 0x465fe9 VPXOR %XMM15,%XMM15,%XMM15 |
(661) 0x465fee VPMULLQ %ZMM24,%ZMM22,%ZMM15 |
(661) 0x465ff4 VPADDQ %ZMM12,%ZMM15,%ZMM12 |
(661) 0x465ffa VPXORD %XMM18,%XMM18,%XMM18 |
(661) 0x466000 KMOVQ %K3,%K1 |
(661) 0x466005 VGATHERQPD (%R13,%ZMM12,8),%ZMM18{%K1} |
(661) 0x46600d VMULPD %ZMM6,%ZMM2,%ZMM12 |
(661) 0x466013 VFMADD231PD %ZMM7,%ZMM3,%ZMM12 |
(661) 0x466019 VADDPD %ZMM9,%ZMM14,%ZMM9 |
(661) 0x46601f VPCMPEQD %YMM0,%YMM0,%YMM0 |
(661) 0x466023 VPADDD %YMM0,%YMM8,%YMM8 |
(661) 0x466027 VPMOVSXDQ %YMM8,%ZMM8 |
(661) 0x46602d VPADDQ %ZMM8,%ZMM15,%ZMM8 |
(661) 0x466033 VXORPD %XMM14,%XMM14,%XMM14 |
(661) 0x466038 KMOVQ %K3,%K1 |
(661) 0x46603d VGATHERQPD (%R13,%ZMM8,8),%ZMM14{%K1} |
(661) 0x466045 VXORPD %XMM8,%XMM8,%XMM8 |
(661) 0x46604a KMOVQ %K3,%K1 |
(661) 0x46604f VGATHERDPD (%R15,%YMM10,8),%ZMM8{%K1} |
(661) 0x466056 VMULPD %ZMM31,%ZMM9,%ZMM9 |
(661) 0x46605c VADDPD %ZMM13,%ZMM11,%ZMM10 |
(661) 0x466062 VPMULLQ %ZMM5,%ZMM22,%ZMM5 |
(661) 0x466068 VPADDQ %ZMM1,%ZMM5,%ZMM5 |
(661) 0x46606e VXORPD %XMM11,%XMM11,%XMM11 |
(661) 0x466073 KMOVQ %K3,%K1 |
(661) 0x466078 VGATHERQPD (%R13,%ZMM5,8),%ZMM11{%K1} |
(661) 0x466080 VPADDD %YMM0,%YMM4,%YMM5 |
(661) 0x466084 VPMOVSXDQ %YMM5,%ZMM5 |
(661) 0x46608a VPMULLQ %ZMM5,%ZMM22,%ZMM5 |
(661) 0x466090 VPADDQ %ZMM1,%ZMM5,%ZMM5 |
(661) 0x466096 VXORPD %XMM13,%XMM13,%XMM13 |
(661) 0x46609b KMOVQ %K3,%K1 |
(661) 0x4660a0 VGATHERQPD (%R13,%ZMM5,8),%ZMM13{%K1} |
(661) 0x4660a8 VDIVPD %ZMM2,%ZMM25,%ZMM15 |
(661) 0x4660ae VDIVPD %ZMM3,%ZMM25,%ZMM19 |
(661) 0x4660b4 VSUBPD %ZMM14,%ZMM18,%ZMM5 |
(661) 0x4660ba VADDPD %ZMM2,%ZMM8,%ZMM8 |
(661) 0x4660c0 VDIVPD %ZMM8,%ZMM5,%ZMM5 |
(661) 0x4660c6 VPADDD 0x14cb0(%RIP){1to8},%YMM4,%YMM4 |
(661) 0x4660d0 VXORPD %XMM8,%XMM8,%XMM8 |
(661) 0x4660d5 KMOVQ %K3,%K1 |
(661) 0x4660da VGATHERDPD (%R12,%YMM4,8),%ZMM8{%K1} |
(661) 0x4660e1 VMULPD %ZMM15,%ZMM31,%ZMM4 |
(661) 0x4660e7 VSUBPD %ZMM13,%ZMM11,%ZMM11 |
(661) 0x4660ed VADDPD %ZMM3,%ZMM8,%ZMM8 |
(661) 0x4660f3 VDIVPD %ZMM8,%ZMM11,%ZMM8 |
(661) 0x4660f9 VMULPD %ZMM4,%ZMM10,%ZMM4 |
(661) 0x4660ff VFMADD231PD %ZMM9,%ZMM19,%ZMM4 |
(661) 0x466105 VMULPD %ZMM8,%ZMM8,%ZMM9 |
(661) 0x46610b VMULPD %ZMM4,%ZMM8,%ZMM4 |
(661) 0x466111 VMULPD %ZMM31,%ZMM6,%ZMM6 |
(661) 0x466117 VMULPD %ZMM15,%ZMM5,%ZMM10 |
(661) 0x46611d VFMADD213PD %ZMM4,%ZMM6,%ZMM10 |
(661) 0x466123 VMULPD %ZMM5,%ZMM10,%ZMM4 |
(661) 0x466129 VMULPD %ZMM31,%ZMM7,%ZMM6 |
(661) 0x46612f VMULPD %ZMM9,%ZMM19,%ZMM7 |
(661) 0x466135 VFMADD213PD %ZMM4,%ZMM6,%ZMM7 |
(661) 0x46613b VFMADD231PD %ZMM5,%ZMM5,%ZMM9 |
(661) 0x466141 VMAXPD %ZMM28,%ZMM9,%ZMM4 |
(661) 0x466147 VDIVPD %ZMM4,%ZMM7,%ZMM4 |
(661) 0x46614d VFPCLASSPD $0x56,%ZMM4,%K1 |
(661) 0x466154 VFPCLASSPD $0x50,%ZMM12,%K0{%K1} |
(661) 0x46615b KANDNB %K3,%K0,%K1 |
(661) 0x46615f VMOVDQU64 0x80(%RSP),%ZMM6 |
(661) 0x466167 VPMULLQ %ZMM24,%ZMM6,%ZMM6 |
(661) 0x46616d VPADDQ %ZMM1,%ZMM6,%ZMM6 |
(661) 0x466173 MOV 0x10(%RSP),%RAX |
(661) 0x466178 VXORPD %XMM7,%XMM7,%XMM7 |
(661) 0x46617c VSCATTERQPD %ZMM7,(%RAX,%ZMM6,8){%K1} |
(661) 0x466183 KANDB %K0,%K3,%K1 |
(661) 0x466187 KORTESTB %K1,%K1 |
(661) 0x46618b JE 465e00 |
(661) 0x466191 VANDPD %ZMM29,%ZMM5,%ZMM7 |
(661) 0x466197 VMAXPD %ZMM28,%ZMM7,%ZMM7 |
(661) 0x46619d VFPCLASSPD $0x50,%ZMM5,%K2 |
(661) 0x4661a4 VBROADCASTSD 0x13612(%RIP),%ZMM9 |
(661) 0x4661ae VXORPD %ZMM9,%ZMM7,%ZMM7{%K2} |
(661) 0x4661b4 VANDPD %ZMM29,%ZMM8,%ZMM5 |
(661) 0x4661ba VMAXPD %ZMM28,%ZMM5,%ZMM5 |
(661) 0x4661c0 VFPCLASSPD $0x50,%ZMM7,%K2 |
(661) 0x4661c7 VXORPD %ZMM9,%ZMM5,%ZMM5{%K2} |
(661) 0x4661cd VMULPD %ZMM7,%ZMM7,%ZMM8 |
(661) 0x4661d3 VFMADD231PD %ZMM5,%ZMM5,%ZMM8 |
(661) 0x4661d9 VSQRTPD %ZMM8,%ZMM8 |
(661) 0x4661df VPSLLQ $0x3,%ZMM6,%ZMM6 |
(661) 0x4661e6 VPADDQ 0x140(%RSP),%ZMM6,%ZMM6 |
(661) 0x4661ee VMULPD %ZMM2,%ZMM8,%ZMM2 |
(661) 0x4661f4 VDIVPD %ZMM7,%ZMM2,%ZMM2 |
(661) 0x4661fa VANDPD %ZMM29,%ZMM2,%ZMM2 |
(661) 0x466200 VMULPD %ZMM3,%ZMM8,%ZMM3 |
(661) 0x466206 VDIVPD %ZMM5,%ZMM3,%ZMM3 |
(661) 0x46620c VANDPD %ZMM29,%ZMM3,%ZMM3 |
(661) 0x466212 MOV 0x20(%RSP),%RCX |
(661) 0x466217 MOV 0x10(%RCX),%RAX |
(661) 0x46621b VPMULLQ (%RCX){1to8},%ZMM24,%ZMM0 |
(661) 0x466221 VPADDQ %ZMM1,%ZMM0,%ZMM0 |
(661) 0x466227 VPXOR %XMM1,%XMM1,%XMM1 |
(661) 0x46622b KMOVQ %K1,%K2 |
(661) 0x466230 VGATHERQPD (%RAX,%ZMM0,8),%ZMM1{%K2} |
(661) 0x466237 VMINPD %ZMM3,%ZMM2,%ZMM0 |
(661) 0x46623d VMULPD %ZMM4,%ZMM0,%ZMM0 |
(661) 0x466243 VMULPD %ZMM0,%ZMM0,%ZMM0 |
(661) 0x466249 VADDPD %ZMM1,%ZMM1,%ZMM1 |
(661) 0x46624f VMULPD %ZMM1,%ZMM0,%ZMM0 |
(661) 0x466255 VSCATTERQPD %ZMM0,(,%ZMM6,1){%K1} |
(661) 0x466260 JMP 465e00 |
0x466265 NOPW %CS:(%RAX,%RAX,1) |
0x46626f NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.99+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 162 |
nb uops | 164 |
loop length | 922 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 14 |
nb stack references | 23 |
micro-operation queue | 27.33 cycles |
front end | 27.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.67 | 17.67 | 14.50 | 14.00 | 6.30 | 14.50 | 14.50 | 14.50 | 6.20 | 17.67 |
cycles | 6.30 | 6.20 | 17.67 | 17.67 | 14.50 | 14.00 | 6.30 | 14.50 | 14.50 | 14.50 | 6.20 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.12-27.15 |
Stall cycles | 0.00 |
Front-end | 27.33 |
Dispatch | 17.67 |
Overall L1 | 27.33 |
all | 17% |
load | 11% |
store | 31% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 15% |
load | 8% |
store | 31% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 24% |
load | 21% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 22% |
load | 18% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 465c80 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x630> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x40(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x30(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x69a5c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403090 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 465cd5 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x685> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R15,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %R13D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
AND %R9,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 465d00 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R8,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25ade(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM26,%ZMM18,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f6a(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12248(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11bb6(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12f5c(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12242(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 465859 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x209> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x100(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 465d40 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x69a5e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402ef0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 465d49 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6f9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25567(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM26,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R9,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R8,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x129e4(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV $0x4664a0,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x466620,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD 0x129bc(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11c9a(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11608(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11c9e(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 465e23 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x7d3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 162 |
nb uops | 164 |
loop length | 922 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 14 |
nb stack references | 23 |
micro-operation queue | 27.33 cycles |
front end | 27.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 6.20 | 17.67 | 17.67 | 14.50 | 14.00 | 6.30 | 14.50 | 14.50 | 14.50 | 6.20 | 17.67 |
cycles | 6.30 | 6.20 | 17.67 | 17.67 | 14.50 | 14.00 | 6.30 | 14.50 | 14.50 | 14.50 | 6.20 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.12-27.15 |
Stall cycles | 0.00 |
Front-end | 27.33 |
Dispatch | 17.67 |
Overall L1 | 27.33 |
all | 17% |
load | 11% |
store | 31% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 15% |
load | 8% |
store | 31% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 24% |
load | 21% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 22% |
load | 18% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 465c80 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x630> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x40(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x30(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x69a5c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403090 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R9,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 465cd5 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x685> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R15,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %R13D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R8,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
AND %R9,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 465d00 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R8,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25ade(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM26,%ZMM18,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f6a(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12248(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11bb6(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12f5c(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12242(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 465859 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x209> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x100(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 465d40 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x69a5e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402ef0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 465d49 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6f9> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x25567(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM26,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R9,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %R8,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x129e4(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV $0x4664a0,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x466620,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VBROADCASTSD 0x129bc(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11c9a(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11608(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11c9e(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 465e23 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x7d3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼viscosity_kernel(int, int, int, int, clover::Buffer1D | 3.36 | 2.22 |
○Loop 662 - viscosity.cpp:37-66 - exec | 3.36 | 2.21 |
○Loop 661 - viscosity.cpp:37-64 - exec | 0 | 0 |