Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.66% |
---|
Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.66% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 64 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x451b70 PUSH %RBP |
0x451b71 MOV %RSP,%RBP |
0x451b74 PUSH %R15 |
0x451b76 PUSH %R14 |
0x451b78 PUSH %R13 |
0x451b7a PUSH %R12 |
0x451b7c PUSH %RBX |
0x451b7d AND $-0x20,%RSP |
0x451b81 SUB $0x160,%RSP |
0x451b88 MOV %R8,0x20(%RSP) |
0x451b8d MOV %RCX,%RSI |
0x451b90 MOV 0x48(%RBP),%RAX |
0x451b94 MOV 0x38(%RBP),%R13 |
0x451b98 MOV 0x30(%RBP),%RCX |
0x451b9c MOV %RCX,0x18(%RSP) |
0x451ba1 MOV 0x20(%RBP),%RBX |
0x451ba5 MOV 0x18(%RBP),%R14 |
0x451ba9 MOV 0x10(%RBP),%R12 |
0x451bad MOV 0x28(%RBP),%ECX |
0x451bb0 MOV %RCX,0x38(%RSP) |
0x451bb5 MOVL $0,0x2c(%RSP) |
0x451bbd TEST %RAX,%RAX |
0x451bc0 JS 4521cd |
0x451bc6 MOV %R9,%R15 |
0x451bc9 MOV %RDX,0x10(%RSP) |
0x451bce MOV %RSI,0xc0(%RSP) |
0x451bd6 MOV (%RDI),%ESI |
0x451bd8 MOVQ $0,0x68(%RSP) |
0x451be1 MOV %RAX,0x60(%RSP) |
0x451be6 MOVQ $0x1,0xb8(%RSP) |
0x451bf2 SUB $0x8,%RSP |
0x451bf6 LEA 0xc0(%RSP),%RAX |
0x451bfe LEA 0x34(%RSP),%RCX |
0x451c03 LEA 0x70(%RSP),%R8 |
0x451c08 LEA 0x68(%RSP),%R9 |
0x451c0d MOV $0x4845c0,%EDI |
0x451c12 MOV %ESI,0x30(%RSP) |
0x451c16 MOV $0x22,%EDX |
0x451c1b PUSH $0x1 |
0x451c1d PUSH $0x1 |
0x451c1f PUSH %RAX |
0x451c20 CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x451c25 ADD $0x20,%RSP |
0x451c29 MOV 0x68(%RSP),%RSI |
0x451c2e MOV 0x60(%RSP),%RCX |
0x451c33 CMP %RCX,%RSI |
0x451c36 JA 4521e6 |
0x451c3c MOV %RCX,%RAX |
0x451c3f MOV %R15,%RDI |
0x451c42 MOV (%R14),%R15 |
0x451c45 MOV 0x10(%R14),%RCX |
0x451c49 MOV %RCX,0x58(%RSP) |
0x451c4e MOV (%RBX),%RDX |
0x451c51 MOV 0x10(%RBX),%RCX |
0x451c55 MOV %RCX,0x50(%RSP) |
0x451c5a MOV 0x10(%RSP),%RCX |
0x451c5f MOV 0x8(%RCX),%RCX |
0x451c63 MOV %RCX,0x10(%RSP) |
0x451c68 MOV 0xc0(%RSP),%RCX |
0x451c70 MOV 0x8(%RCX),%RCX |
0x451c74 MOV %RCX,0x48(%RSP) |
0x451c79 MOV (%RDI),%R8 |
0x451c7c MOV 0x10(%RDI),%RCX |
0x451c80 MOV %RCX,0x40(%RSP) |
0x451c85 MOV (%R12),%R14 |
0x451c89 MOV 0x10(%R12),%R12 |
0x451c8e LEA 0x1(%RSI),%RCX |
0x451c92 INC %RAX |
0x451c95 CMP %RAX,%RCX |
0x451c98 MOV %RCX,0x98(%RSP) |
0x451ca0 CMOVG %RCX,%RAX |
0x451ca4 MOV %RAX,0xb0(%RSP) |
0x451cac SUB %RSI,%RAX |
0x451caf MOV $-0x4,%EBX |
0x451cb4 AND %RAX,%RBX |
0x451cb7 MOV %RDX,0xa8(%RSP) |
0x451cbf MOV %R8,0xa0(%RSP) |
0x451cc7 JE 452205 |
0x451ccd MOV %RAX,0x90(%RSP) |
0x451cd5 VPBROADCASTQ %RSI,%YMM0 |
0x451cdb MOV 0x38(%RSP),%RAX |
0x451ce0 VPBROADCASTD %EAX,%XMM1 |
0x451ce6 VMOVDQU %XMM1,0x80(%RSP) |
0x451cef VPBROADCASTQ %R15,%YMM12 |
0x451cf5 MOV 0x18(%RSP),%RCX |
0x451cfa VPBROADCASTD %ECX,%XMM1 |
0x451d00 VMOVDQU %XMM1,0x70(%RSP) |
0x451d06 VPBROADCASTQ %RDX,%YMM14 |
0x451d0c VPBROADCASTQ %R8,%YMM15 |
0x451d12 VPBROADCASTQ %R14,%YMM1 |
0x451d18 VMOVDQU %YMM1,0x100(%RSP) |
0x451d21 VPBROADCASTQ %R12,%YMM1 |
0x451d27 VMOVDQU %YMM1,0xe0(%RSP) |
0x451d30 VMOVDQU64 0x127c6(%RIP),%YMM18 |
0x451d3a VMOVDQU %YMM0,0xc0(%RSP) |
0x451d43 VPADDQ %YMM18,%YMM0,%YMM8 |
0x451d49 MOV %R13D,%EAX |
0x451d4c SUB %ECX,%EAX |
0x451d4e VPBROADCASTQ %RAX,%YMM9 |
0x451d54 LEA 0x1(%RCX),%RAX |
0x451d58 VPBROADCASTQ %RAX,%YMM0 |
0x451d5e VMOVDQU %YMM0,0x120(%RSP) |
0x451d67 XOR %EDI,%EDI |
0x451d69 VXORPD %XMM20,%XMM20,%XMM20 |
0x451d6f VPCMPEQD %XMM11,%XMM11,%XMM11 |
0x451d74 VBROADCASTSD 0x1276a(%RIP),%YMM22 |
0x451d7e VBROADCASTSD 0x11d40(%RIP),%YMM23 |
0x451d88 VBROADCASTSD 0x1160e(%RIP),%YMM25 |
0x451d92 VBROADCASTSD 0x11d44(%RIP),%YMM26 |
0x451d9c VBROADCASTSD 0x1290a(%RIP),%YMM27 |
0x451da6 VPBROADCASTQ 0x12748(%RIP),%YMM28 |
0x451db0 JMP 451e97 |
0x451db5 NOPW %CS:(%RAX,%RAX,1) |
(710) 0x451dc0 VPSLLQ $0x3,%YMM30,%YMM10 |
(710) 0x451dc7 VPADDQ 0xe0(%RSP),%YMM10,%YMM10 |
(710) 0x451dd0 VANDPD %YMM26,%YMM6,%YMM13 |
(710) 0x451dd6 VMAXPD %YMM25,%YMM13,%YMM13 |
(710) 0x451ddc VCMPPD $0x1,%YMM20,%YMM6,%K2 |
(710) 0x451de3 VXORPD %YMM27,%YMM13,%YMM13{%K2} |
(710) 0x451de9 VANDPD %YMM26,%YMM5,%YMM5 |
(710) 0x451def VMAXPD %YMM25,%YMM5,%YMM5 |
(710) 0x451df5 VCMPPD $0x1,%YMM20,%YMM13,%K2 |
(710) 0x451dfc VXORPD %YMM27,%YMM5,%YMM5{%K2} |
(710) 0x451e02 VMULPD %YMM13,%YMM13,%YMM6 |
(710) 0x451e07 VFMADD231PD %YMM5,%YMM5,%YMM6 |
(710) 0x451e0c VSQRTPD %YMM6,%YMM6 |
(710) 0x451e10 VMULPD %YMM3,%YMM6,%YMM3 |
(710) 0x451e14 VDIVPD %YMM13,%YMM3,%YMM3 |
(710) 0x451e19 VANDPD %YMM26,%YMM3,%YMM3 |
(710) 0x451e1f VMULPD %YMM4,%YMM6,%YMM4 |
(710) 0x451e23 VPBROADCASTQ %XMM29,%YMM6 |
(710) 0x451e29 VPMULLQ %YMM0,%YMM6,%YMM0 |
(710) 0x451e2f VPBROADCASTQ %XMM7,%YMM6 |
(710) 0x451e34 VPADDQ %YMM1,%YMM0,%YMM0 |
(710) 0x451e38 VPSLLQ $0x3,%YMM0,%YMM0 |
(710) 0x451e3d VPADDQ %YMM0,%YMM6,%YMM0 |
(710) 0x451e41 VPXOR %XMM1,%XMM1,%XMM1 |
(710) 0x451e45 KMOVQ %K1,%K2 |
(710) 0x451e4a VGATHERQPD (,%YMM0,1),%YMM1{%K2} |
(710) 0x451e55 VDIVPD %YMM5,%YMM4,%YMM0 |
(710) 0x451e59 VANDPD %YMM26,%YMM0,%YMM0 |
(710) 0x451e5f VMINPD %YMM0,%YMM3,%YMM0 |
(710) 0x451e63 VMULPD %YMM2,%YMM0,%YMM0 |
(710) 0x451e67 VMULPD %YMM0,%YMM0,%YMM0 |
(710) 0x451e6b VADDPD %YMM1,%YMM1,%YMM1 |
(710) 0x451e6f VMULPD %YMM1,%YMM0,%YMM0 |
(710) 0x451e73 VSCATTERQPD %YMM0,(,%YMM10,1){%K1} |
(710) 0x451e7e VPADDQ %YMM28,%YMM8,%YMM8 |
(710) 0x451e84 VPADDQ %YMM28,%YMM18,%YMM18 |
(710) 0x451e8a ADD $0x4,%RDI |
(710) 0x451e8e CMP %RBX,%RDI |
(710) 0x451e91 JAE 4521dc |
(710) 0x451e97 VPADDQ 0xc0(%RSP),%YMM18,%YMM0 |
(710) 0x451e9f VMOVDQA %YMM9,%YMM1 |
(710) 0x451ea3 MOV $0x452870,%RAX |
(710) 0x451eaa CALL %RAX |
(710) 0x451eac VPMOVQD %YMM0,%XMM1 |
(710) 0x451eb2 VPADDD 0x70(%RSP),%XMM1,%XMM29 |
(710) 0x451eba VPADDQ 0x120(%RSP),%YMM0,%YMM0 |
(710) 0x451ec3 VPSLLQ $0x20,%YMM0,%YMM0 |
(710) 0x451ec8 VPSRAQ $0x20,%YMM0,%YMM30 |
(710) 0x451ecf VMOVDQA %YMM8,%YMM0 |
(710) 0x451ed3 VMOVDQA %YMM9,%YMM1 |
(710) 0x451ed7 CALL 452aa0 <__svml_u64div4_l9> |
(710) 0x451edd VPMOVQD %YMM0,%XMM0 |
(710) 0x451ee3 VPADDD 0x80(%RSP),%XMM0,%XMM2 |
(710) 0x451eec VPMOVSXDQ %XMM2,%YMM0 |
(710) 0x451ef1 VXORPS %XMM3,%XMM3,%XMM3 |
(710) 0x451ef5 VPMULLQ %YMM0,%YMM12,%YMM3 |
(710) 0x451efb VPADDQ %YMM3,%YMM30,%YMM1 |
(710) 0x451f01 KXNORW %K0,%K0,%K1 |
(710) 0x451f05 VXORPD %XMM4,%XMM4,%XMM4 |
(710) 0x451f09 MOV 0x58(%RSP),%RAX |
(710) 0x451f0e VGATHERQPD (%RAX,%YMM1,8),%YMM4{%K1} |
(710) 0x451f15 VPSUBD %XMM11,%XMM2,%XMM1 |
(710) 0x451f1a VPMOVSXDQ %XMM1,%YMM5 |
(710) 0x451f1f VXORPS %XMM6,%XMM6,%XMM6 |
(710) 0x451f23 VPMULLQ %YMM5,%YMM12,%YMM6 |
(710) 0x451f29 VPADDQ %YMM6,%YMM30,%YMM1 |
(710) 0x451f2f KXNORW %K0,%K0,%K1 |
(710) 0x451f33 VXORPD %XMM7,%XMM7,%XMM7 |
(710) 0x451f37 VGATHERQPD (%RAX,%YMM1,8),%YMM7{%K1} |
(710) 0x451f3e VPMOVSXDQ %XMM29,%YMM1 |
(710) 0x451f44 VPADDQ %YMM1,%YMM3,%YMM3 |
(710) 0x451f48 KXNORW %K0,%K0,%K1 |
(710) 0x451f4c VPXORD %XMM30,%XMM30,%XMM30 |
(710) 0x451f52 VGATHERQPD (%RAX,%YMM3,8),%YMM30{%K1} |
(710) 0x451f59 VPADDQ %YMM1,%YMM6,%YMM3 |
(710) 0x451f5d KXNORW %K0,%K0,%K1 |
(710) 0x451f61 VPXOR %XMM6,%XMM6,%XMM6 |
(710) 0x451f65 VGATHERQPD (%RAX,%YMM3,8),%YMM6{%K1} |
(710) 0x451f6c VXORPS %XMM3,%XMM3,%XMM3 |
(710) 0x451f70 VPMULLQ %YMM5,%YMM14,%YMM3 |
(710) 0x451f76 VPADDQ %YMM1,%YMM3,%YMM31 |
(710) 0x451f7c KXNORW %K0,%K0,%K1 |
(710) 0x451f80 VXORPD %XMM24,%XMM24,%XMM24 |
(710) 0x451f86 MOV 0x50(%RSP),%RAX |
(710) 0x451f8b VGATHERQPD (%RAX,%YMM31,8),%YMM24{%K1} |
(710) 0x451f92 VPSUBD %XMM11,%XMM29,%XMM31 |
(710) 0x451f98 VPMOVSXDQ %XMM31,%YMM21 |
(710) 0x451f9e VPADDQ %YMM21,%YMM3,%YMM3 |
(710) 0x451fa4 KXNORW %K0,%K0,%K1 |
(710) 0x451fa8 VXORPD %XMM13,%XMM13,%XMM13 |
(710) 0x451fad VGATHERQPD (%RAX,%YMM3,8),%YMM13{%K1} |
(710) 0x451fb4 VXORPS %XMM3,%XMM3,%XMM3 |
(710) 0x451fb8 VPMULLQ %YMM0,%YMM14,%YMM3 |
(710) 0x451fbe VPADDQ %YMM1,%YMM3,%YMM16 |
(710) 0x451fc4 KXNORW %K0,%K0,%K1 |
(710) 0x451fc8 VXORPD %XMM17,%XMM17,%XMM17 |
(710) 0x451fce VGATHERQPD (%RAX,%YMM16,8),%YMM17{%K1} |
(710) 0x451fd5 VPADDQ %YMM21,%YMM3,%YMM3 |
(710) 0x451fdb KXNORW %K0,%K0,%K1 |
(710) 0x451fdf VXORPD %XMM16,%XMM16,%XMM16 |
(710) 0x451fe5 VGATHERQPD (%RAX,%YMM3,8),%YMM16{%K1} |
(710) 0x451fec VPMULLQ %YMM0,%YMM15,%YMM10 |
(710) 0x451ff2 VPADDQ %YMM21,%YMM10,%YMM3 |
(710) 0x451ff8 KXNORW %K0,%K0,%K1 |
(710) 0x451ffc VPXORD %XMM21,%XMM21,%XMM21 |
(710) 0x452002 MOV 0x40(%RSP),%RDX |
(710) 0x452007 VGATHERQPD (%RDX,%YMM3,8),%YMM21{%K1} |
(710) 0x45200e KXNORW %K0,%K0,%K1 |
(710) 0x452012 VXORPD %XMM3,%XMM3,%XMM3 |
(710) 0x452016 MOV 0x10(%RSP),%RAX |
(710) 0x45201b VGATHERDPD (%RAX,%XMM29,8),%YMM3{%K1} |
(710) 0x452022 VSUBPD %YMM30,%YMM7,%YMM7 |
(710) 0x452028 VSUBPD %YMM4,%YMM6,%YMM6 |
(710) 0x45202c VSUBPD %YMM6,%YMM7,%YMM30 |
(710) 0x452032 KXNORW %K0,%K0,%K1 |
(710) 0x452036 VXORPD %XMM4,%XMM4,%XMM4 |
(710) 0x45203a MOV 0x48(%RSP),%RCX |
(710) 0x45203f VGATHERDPD (%RCX,%XMM2,8),%YMM4{%K1} |
(710) 0x452046 VPADDD %XMM11,%XMM29,%XMM29 |
(710) 0x45204c VPMOVSXDQ %XMM29,%YMM29 |
(710) 0x452052 VPADDQ %YMM29,%YMM10,%YMM10 |
(710) 0x452058 KXNORW %K0,%K0,%K1 |
(710) 0x45205c VPXORD %XMM29,%XMM29,%XMM29 |
(710) 0x452062 VGATHERQPD (%RDX,%YMM10,8),%YMM29{%K1} |
(710) 0x452069 VSUBPD %YMM17,%YMM13,%YMM10 |
(710) 0x45206f VSUBPD %YMM24,%YMM16,%YMM13 |
(710) 0x452075 VSUBPD %YMM13,%YMM10,%YMM16 |
(710) 0x45207b KXNORW %K0,%K0,%K1 |
(710) 0x45207f VXORPD %XMM17,%XMM17,%XMM17 |
(710) 0x452085 VGATHERDPD (%RAX,%XMM31,8),%YMM17{%K1} |
(710) 0x45208c VMULPD %YMM30,%YMM3,%YMM24 |
(710) 0x452092 VFMADD231PD %YMM16,%YMM4,%YMM24 |
(710) 0x452098 VADDPD %YMM7,%YMM6,%YMM6 |
(710) 0x45209c VPMULLQ %YMM5,%YMM15,%YMM5 |
(710) 0x4520a2 VMULPD %YMM22,%YMM6,%YMM6 |
(710) 0x4520a8 VADDPD %YMM10,%YMM13,%YMM7 |
(710) 0x4520ad VPADDQ %YMM1,%YMM5,%YMM5 |
(710) 0x4520b1 KXNORW %K0,%K0,%K1 |
(710) 0x4520b5 VXORPD %XMM10,%XMM10,%XMM10 |
(710) 0x4520ba VGATHERQPD (%RDX,%YMM5,8),%YMM10{%K1} |
(710) 0x4520c1 VDIVPD %YMM3,%YMM23,%YMM13 |
(710) 0x4520c7 VPADDD %XMM2,%XMM11,%XMM5 |
(710) 0x4520cb VPMOVSXDQ %XMM5,%YMM5 |
(710) 0x4520d0 VPMULLQ %YMM5,%YMM15,%YMM5 |
(710) 0x4520d6 VMULPD %YMM13,%YMM22,%YMM31 |
(710) 0x4520dc VMULPD %YMM31,%YMM7,%YMM7 |
(710) 0x4520e2 VPADDQ %YMM1,%YMM5,%YMM5 |
(710) 0x4520e6 KXNORW %K0,%K0,%K1 |
(710) 0x4520ea VXORPD %XMM31,%XMM31,%XMM31 |
(710) 0x4520f0 VGATHERQPD (%RDX,%YMM5,8),%YMM31{%K1} |
(710) 0x4520f7 VDIVPD %YMM4,%YMM23,%YMM19 |
(710) 0x4520fd VFMADD231PD %YMM6,%YMM19,%YMM7 |
(710) 0x452103 VPADDD 0x1440f(%RIP){1to4},%XMM2,%XMM2 |
(710) 0x45210d KXNORW %K0,%K0,%K1 |
(710) 0x452111 VXORPD %XMM5,%XMM5,%XMM5 |
(710) 0x452115 VGATHERDPD (%RCX,%XMM2,8),%YMM5{%K1} |
(710) 0x45211c VSUBPD %YMM29,%YMM21,%YMM2 |
(710) 0x452122 VADDPD %YMM3,%YMM17,%YMM6 |
(710) 0x452128 VDIVPD %YMM6,%YMM2,%YMM6 |
(710) 0x45212c VSUBPD %YMM31,%YMM10,%YMM2 |
(710) 0x452132 VADDPD %YMM4,%YMM5,%YMM5 |
(710) 0x452136 VDIVPD %YMM5,%YMM2,%YMM5 |
(710) 0x45213a VMULPD %YMM5,%YMM5,%YMM2 |
(710) 0x45213e VMULPD %YMM7,%YMM5,%YMM7 |
(710) 0x452142 VMULPD %YMM22,%YMM30,%YMM10 |
(710) 0x452148 VMULPD %YMM6,%YMM13,%YMM13 |
(710) 0x45214c VFMADD213PD %YMM7,%YMM10,%YMM13 |
(710) 0x452151 VMULPD %YMM6,%YMM13,%YMM7 |
(710) 0x452155 VMULPD %YMM22,%YMM16,%YMM10 |
(710) 0x45215b VMULPD %YMM2,%YMM19,%YMM13 |
(710) 0x452161 VFMADD213PD %YMM7,%YMM10,%YMM13 |
(710) 0x452166 VFMADD231PD %YMM6,%YMM6,%YMM2 |
(710) 0x45216b VMAXPD %YMM25,%YMM2,%YMM2 |
(710) 0x452171 VDIVPD %YMM2,%YMM13,%YMM2 |
(710) 0x452175 VCMPPD $0x2,%YMM20,%YMM2,%K1 |
(710) 0x45217c VCMPPD $0x1,%YMM20,%YMM24,%K1{%K1} |
(710) 0x452183 KNOTW %K1,%K2 |
(710) 0x452187 VMOVDQU 0x100(%RSP),%YMM7 |
(710) 0x452190 VPMULLQ %YMM0,%YMM7,%YMM7 |
(710) 0x452196 VPADDQ %YMM1,%YMM7,%YMM30 |
(710) 0x45219c VSCATTERQPD %YMM20,(%R12,%YMM30,8){%K2} |
(710) 0x4521a3 KMOVD %K1,%EAX |
(710) 0x4521a7 TEST $0xf,%AL |
(710) 0x4521a9 JE 4521b5 |
(710) 0x4521ab MOV 0x20(%RSP),%RCX |
(710) 0x4521b0 VMOVQ 0x10(%RCX),%XMM7 |
(710) 0x4521b5 TEST $0xf,%AL |
(710) 0x4521b7 JE 451dc0 |
(710) 0x4521bd MOV 0x20(%RSP),%RAX |
(710) 0x4521c2 VMOVQ (%RAX),%XMM29 |
(710) 0x4521c8 JMP 451dc0 |
0x4521cd LEA -0x28(%RBP),%RSP |
0x4521d1 POP %RBX |
0x4521d2 POP %R12 |
0x4521d4 POP %R13 |
0x4521d6 POP %R14 |
0x4521d8 POP %R15 |
0x4521da POP %RBP |
0x4521db RET |
0x4521dc CMP %RBX,0x90(%RSP) |
0x4521e4 JNE 45220c |
0x4521e6 MOV $0x4845e0,%EDI |
0x4521eb MOV 0x28(%RSP),%ESI |
0x4521ef LEA -0x28(%RBP),%RSP |
0x4521f3 POP %RBX |
0x4521f4 POP %R12 |
0x4521f6 POP %R13 |
0x4521f8 POP %R14 |
0x4521fa POP %R15 |
0x4521fc POP %RBP |
0x4521fd VZEROUPPER |
0x452200 JMP 403050 |
0x452205 XOR %EBX,%EBX |
0x452207 MOV %RSI,%RDI |
0x45220a JMP 452210 |
0x45220c LEA (%RSI,%RBX,1),%RDI |
0x452210 MOV 0x18(%RSP),%RAX |
0x452215 SUB %EAX,%R13D |
0x452218 INC %RAX |
0x45221b MOV %RAX,0x80(%RSP) |
0x452223 ADD %RBX,%RSI |
0x452226 VMOVSD 0x122ba(%RIP),%XMM0 |
0x45222e VMOVSD 0x11892(%RIP),%XMM1 |
0x452236 VMOVSD 0x11162(%RIP),%XMM2 |
0x45223e VXORPD %XMM3,%XMM3,%XMM3 |
0x452242 VMOVDDUP 0x11896(%RIP),%XMM4 |
0x45224a VMOVDDUP 0x1245e(%RIP),%XMM5 |
0x452252 MOV %R13,0x70(%RSP) |
0x452257 MOV %R14,0x100(%RSP) |
0x45225f MOV %R12,0xe0(%RSP) |
0x452267 MOV %R15,0x30(%RSP) |
0x45226c MOV 0x38(%RSP),%R14 |
0x452271 JMP 4522cf |
0x452273 NOPW %CS:(%RAX,%RAX,1) |
(709) 0x452280 MOV 0x70(%RSP),%R13 |
(709) 0x452285 MOV 0x30(%RSP),%R15 |
(709) 0x45228a MOV 0x100(%RSP),%R8 |
(709) 0x452292 MOV 0xe0(%RSP),%R9 |
(709) 0x45229a MOV 0xc0(%RSP),%RBX |
(709) 0x4522a2 IMUL %R8,%RDX |
(709) 0x4522a6 ADD %RDI,%RDX |
(709) 0x4522a9 VMOVSD %XMM12,(%R9,%RDX,8) |
(709) 0x4522af MOV 0x98(%RSP),%RAX |
(709) 0x4522b7 LEA (%RAX,%RBX,1),%RDI |
(709) 0x4522bb INC %RBX |
(709) 0x4522be INC %RSI |
(709) 0x4522c1 CMP %RSI,0xb0(%RSP) |
(709) 0x4522c9 JE 4521e6 |
(709) 0x4522cf MOV %RSI,%RAX |
(709) 0x4522d2 SHR $0x20,%RAX |
(709) 0x4522d6 MOV %RBX,0xc0(%RSP) |
(709) 0x4522de JE 4522f0 |
(709) 0x4522e0 MOV %RSI,%RAX |
(709) 0x4522e3 CQTO |
(709) 0x4522e5 IDIV %R13 |
(709) 0x4522e8 MOV %RDX,%RCX |
(709) 0x4522eb JMP 4522f9 |
0x4522ed NOPL (%RAX) |
(709) 0x4522f0 MOV %ESI,%EAX |
(709) 0x4522f2 XOR %EDX,%EDX |
(709) 0x4522f4 DIV %R13D |
(709) 0x4522f7 MOV %EDX,%ECX |
(709) 0x4522f9 MOV 0x80(%RSP),%RAX |
(709) 0x452301 ADD %ECX,%EAX |
(709) 0x452303 MOVSXD %EAX,%R10 |
(709) 0x452306 MOV %RDI,%RAX |
(709) 0x452309 SHR $0x20,%RAX |
(709) 0x45230d JE 452320 |
(709) 0x45230f MOV %RDI,%RAX |
(709) 0x452312 XOR %EDX,%EDX |
(709) 0x452314 DIV %R13 |
(709) 0x452317 JMP 452327 |
0x452319 NOPL (%RAX) |
(709) 0x452320 MOV %EDI,%EAX |
(709) 0x452322 XOR %EDX,%EDX |
(709) 0x452324 DIV %R13D |
(709) 0x452327 LEA (%R14,%RAX,1),%EDX |
(709) 0x45232b MOVSXD %EDX,%RDX |
(709) 0x45232e MOV %R15,%R11 |
(709) 0x452331 IMUL %RDX,%R11 |
(709) 0x452335 LEA (%R10,%R11,1),%R8 |
(709) 0x452339 LEA 0x1(%R14,%RAX,1),%EDI |
(709) 0x45233e MOVSXD %EDI,%R9 |
(709) 0x452341 MOV %R15,%R13 |
(709) 0x452344 IMUL %R9,%R13 |
(709) 0x452348 ADD %R13,%R10 |
(709) 0x45234b MOV 0x58(%RSP),%R12 |
(709) 0x452350 VMOVSD (%R12,%R10,8),%XMM6 |
(709) 0x452356 MOV 0x18(%RSP),%R15 |
(709) 0x45235b LEA (%R15,%RCX,1),%EDI |
(709) 0x45235f MOVSXD %EDI,%RDI |
(709) 0x452362 ADD %RDI,%R11 |
(709) 0x452365 ADD %RDI,%R13 |
(709) 0x452368 VMOVSD (%R12,%R13,8),%XMM7 |
(709) 0x45236e VSUBSD (%R12,%R11,8),%XMM6,%XMM6 |
(709) 0x452374 VSUBSD (%R12,%R8,8),%XMM7,%XMM7 |
(709) 0x45237a MOV 0xa8(%RSP),%R12 |
(709) 0x452382 MOV %R12,%R8 |
(709) 0x452385 IMUL %R9,%R8 |
(709) 0x452389 LEA (%R8,%RDI,1),%R11 |
(709) 0x45238d LEA 0x1(%R15,%RCX,1),%R10D |
(709) 0x452392 MOVSXD %R10D,%R10 |
(709) 0x452395 ADD %R10,%R8 |
(709) 0x452398 MOV 0x50(%RSP),%RBX |
(709) 0x45239d VMOVSD (%RBX,%R8,8),%XMM8 |
(709) 0x4523a3 MOV %R12,%R8 |
(709) 0x4523a6 IMUL %RDX,%R8 |
(709) 0x4523aa LEA (%R8,%RDI,1),%R13 |
(709) 0x4523ae ADD %R10,%R8 |
(709) 0x4523b1 VMOVSD (%RBX,%R8,8),%XMM9 |
(709) 0x4523b7 VSUBSD (%RBX,%R13,8),%XMM8,%XMM8 |
(709) 0x4523bd VSUBSD (%RBX,%R11,8),%XMM9,%XMM9 |
(709) 0x4523c3 MOV 0xa0(%RSP),%R11 |
(709) 0x4523cb IMUL %R11,%R9 |
(709) 0x4523cf ADD %RDI,%R9 |
(709) 0x4523d2 MOV 0x40(%RSP),%RBX |
(709) 0x4523d7 VMOVSD (%RBX,%R9,8),%XMM10 |
(709) 0x4523dd MOV %R11,%R8 |
(709) 0x4523e0 IMUL %RDX,%R8 |
(709) 0x4523e4 LEA -0x1(%R15,%RCX,1),%ECX |
(709) 0x4523e9 MOVSXD %ECX,%RCX |
(709) 0x4523ec ADD %R8,%RCX |
(709) 0x4523ef ADD %R10,%R8 |
(709) 0x4523f2 VMOVHPD (%RBX,%R8,8),%XMM10,%XMM10 |
(709) 0x4523f8 LEA -0x1(%R14,%RAX,1),%R8D |
(709) 0x4523fd MOVSXD %R8D,%R8 |
(709) 0x452400 IMUL %R11,%R8 |
(709) 0x452404 ADD %RDI,%R8 |
(709) 0x452407 VMOVSD (%RBX,%R8,8),%XMM11 |
(709) 0x45240d VMOVHPD (%RBX,%RCX,8),%XMM11,%XMM12 |
(709) 0x452412 LEA 0x2(%R14,%RAX,1),%EAX |
(709) 0x452417 CLTQ |
(709) 0x452419 MOV 0x48(%RSP),%RCX |
(709) 0x45241e VMOVSD (%RCX,%RAX,8),%XMM11 |
(709) 0x452423 MOV 0x10(%RSP),%RAX |
(709) 0x452428 VMOVHPD (%RAX,%R10,8),%XMM11,%XMM14 |
(709) 0x45242e VSUBSD %XMM7,%XMM6,%XMM13 |
(709) 0x452432 VSUBSD %XMM9,%XMM8,%XMM11 |
(709) 0x452437 VADDSD %XMM6,%XMM7,%XMM6 |
(709) 0x45243b VMULSD %XMM0,%XMM6,%XMM15 |
(709) 0x45243f VADDSD %XMM8,%XMM9,%XMM8 |
(709) 0x452444 VMOVSD (%RAX,%RDI,8),%XMM7 |
(709) 0x452449 VMOVSD (%RCX,%RDX,8),%XMM6 |
(709) 0x45244e VDIVSD %XMM7,%XMM1,%XMM16 |
(709) 0x452454 VMULSD %XMM16,%XMM0,%XMM9 |
(709) 0x45245a VMULSD %XMM9,%XMM8,%XMM8 |
(709) 0x45245f VDIVSD %XMM6,%XMM1,%XMM17 |
(709) 0x452465 VFMADD231SD %XMM15,%XMM17,%XMM8 |
(709) 0x45246b VSUBPD %XMM12,%XMM10,%XMM9 |
(709) 0x452470 VPUNPCKLQDQ %XMM7,%XMM6,%XMM10 |
(709) 0x452474 VADDPD %XMM10,%XMM14,%XMM10 |
(709) 0x452479 VDIVPD %XMM10,%XMM9,%XMM9 |
(709) 0x45247e VMULPD %XMM9,%XMM9,%XMM12 |
(709) 0x452483 VSHUFPD $0x1,%XMM12,%XMM12,%XMM14 |
(709) 0x452489 VSHUFPD $0x1,%XMM9,%XMM9,%XMM10 |
(709) 0x45248f VMULSD %XMM8,%XMM10,%XMM8 |
(709) 0x452494 VMULSD %XMM0,%XMM13,%XMM15 |
(709) 0x452498 VMULSD %XMM9,%XMM8,%XMM8 |
(709) 0x45249d VMULSD %XMM16,%XMM15,%XMM15 |
(709) 0x4524a3 VFMADD213SD %XMM8,%XMM14,%XMM15 |
(709) 0x4524a8 VMULSD %XMM0,%XMM11,%XMM8 |
(709) 0x4524ac VMULSD %XMM12,%XMM17,%XMM16 |
(709) 0x4524b2 VFMADD213SD %XMM15,%XMM8,%XMM16 |
(709) 0x4524b8 VADDSD %XMM14,%XMM12,%XMM8 |
(709) 0x4524bd VMAXSD %XMM2,%XMM8,%XMM8 |
(709) 0x4524c1 VDIVSD %XMM8,%XMM16,%XMM8 |
(709) 0x4524c7 VXORPD %XMM12,%XMM12,%XMM12 |
(709) 0x4524cc VUCOMISD %XMM12,%XMM8 |
(709) 0x4524d1 JA 452280 |
(709) 0x4524d7 VMULSD %XMM7,%XMM13,%XMM13 |
(709) 0x4524db VFMADD213SD %XMM13,%XMM6,%XMM11 |
(709) 0x4524e0 VUCOMISD %XMM3,%XMM11 |
(709) 0x4524e4 MOV 0x70(%RSP),%R13 |
(709) 0x4524e9 MOV 0x30(%RSP),%R15 |
(709) 0x4524ee MOV 0x100(%RSP),%R8 |
(709) 0x4524f6 MOV 0xe0(%RSP),%R9 |
(709) 0x4524fe MOV 0xc0(%RSP),%RBX |
(709) 0x452506 JAE 4522a2 |
(709) 0x45250c VANDPD %XMM4,%XMM10,%XMM11 |
(709) 0x452510 VMAXSD %XMM2,%XMM11,%XMM11 |
(709) 0x452514 VXORPD %XMM5,%XMM11,%XMM12 |
(709) 0x452518 VCMPSD $0x1,%XMM3,%XMM10,%K1 |
(709) 0x45251f VMOVSD %XMM12,%XMM11,%XMM11{%K1} |
(709) 0x452525 VANDPD %XMM4,%XMM9,%XMM9 |
(709) 0x452529 VMAXSD %XMM2,%XMM9,%XMM9 |
(709) 0x45252d VXORPD %XMM5,%XMM9,%XMM10 |
(709) 0x452531 VCMPSD $0x1,%XMM3,%XMM11,%K1 |
(709) 0x452538 VMOVSD %XMM10,%XMM9,%XMM9{%K1} |
(709) 0x45253e VMULSD %XMM11,%XMM11,%XMM10 |
(709) 0x452543 VFMADD231SD %XMM9,%XMM9,%XMM10 |
(709) 0x452548 VSQRTSD %XMM10,%XMM10,%XMM10 |
(709) 0x45254d VMULSD %XMM7,%XMM10,%XMM7 |
(709) 0x452551 VDIVSD %XMM11,%XMM7,%XMM7 |
(709) 0x452556 VANDPD %XMM4,%XMM7,%XMM7 |
(709) 0x45255a VMULSD %XMM6,%XMM10,%XMM6 |
(709) 0x45255e VDIVSD %XMM9,%XMM6,%XMM6 |
(709) 0x452563 VANDPD %XMM4,%XMM6,%XMM6 |
(709) 0x452567 VMINSD %XMM6,%XMM7,%XMM6 |
(709) 0x45256b MOV 0x20(%RSP),%RCX |
(709) 0x452570 MOV 0x10(%RCX),%RAX |
(709) 0x452574 VMULSD %XMM6,%XMM8,%XMM6 |
(709) 0x452578 VMULSD %XMM6,%XMM6,%XMM6 |
(709) 0x45257c MOV (%RCX),%RCX |
(709) 0x45257f IMUL %RDX,%RCX |
(709) 0x452583 ADD %RDI,%RCX |
(709) 0x452586 VMOVSD (%RAX,%RCX,8),%XMM7 |
(709) 0x45258b VADDSD %XMM7,%XMM7,%XMM7 |
(709) 0x45258f VMULSD %XMM7,%XMM6,%XMM12 |
(709) 0x452593 JMP 4522a2 |
0x452598 NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 789 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 34 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 17.67 | 17.67 | 20.50 | 10.00 | 5.80 | 20.50 | 20.50 | 20.50 | 5.60 | 17.67 |
cycles | 5.80 | 5.80 | 17.67 | 17.67 | 20.50 | 10.00 | 5.80 | 20.50 | 20.50 | 20.50 | 5.60 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.63 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 20.50 |
Overall L1 | 26.83 |
all | 17% |
load | 14% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 16% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 17% |
load | 5% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 17% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 16% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x160,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 4521cd <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x65d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4845c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4521e6 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVG %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x4,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 452205 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x695> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %ECX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x127c6(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPADDQ %YMM18,%YMM0,%YMM8 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VBROADCASTSD 0x1276a(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d40(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1160e(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d44(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1290a(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12748(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 451e97 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x327> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x90(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 45220c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4845e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 452210 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6a0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RSI,%RBX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x122ba(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11892(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11162(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x11896(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x1245e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4522cf <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x75f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 159 |
nb uops | 161 |
loop length | 789 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 34 |
micro-operation queue | 26.83 cycles |
front end | 26.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 17.67 | 17.67 | 20.50 | 10.00 | 5.80 | 20.50 | 20.50 | 20.50 | 5.60 | 17.67 |
cycles | 5.80 | 5.80 | 17.67 | 17.67 | 20.50 | 10.00 | 5.80 | 20.50 | 20.50 | 20.50 | 5.60 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.63 |
Stall cycles | 0.00 |
Front-end | 26.83 |
Dispatch | 20.50 |
Overall L1 | 26.83 |
all | 17% |
load | 14% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 16% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 17% |
load | 5% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 17% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 16% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x160,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 4521cd <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x65d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4845c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4521e6 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVG %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x4,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 452205 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x695> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %ECX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x127c6(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPADDQ %YMM18,%YMM0,%YMM8 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RAX,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VBROADCASTSD 0x1276a(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d40(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1160e(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d44(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x1290a(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12748(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 451e97 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x327> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RBX,0x90(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 45220c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4845e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 452210 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6a0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RSI,%RBX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x122ba(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11892(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11162(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x11896(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x1245e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4522cf <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x75f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted– | 2.66 | 3.74 |
○Loop 710 - viscosity.cpp:36-64 - exec | 2.66 | 3.73 |
○Loop 709 - viscosity.cpp:36-64 - exec | 0 | 0 |