# 预测鲍鱼的年龄

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t')) - 1
    dataMat = []; labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat

In [3]:
def lwlr(testPoint, xArr, yArr, k=1.0):
    xMat = np.mat(xArr); yMat = np.mat(yArr).T
    m = np.shape(xMat)[0]
    # 创建对角矩阵
    weights = np.mat(np.eye(m))
    # 根据公式计算权重值大小以及指数级衰减
    for j in range(m):
        diffMat = testPoint - xMat[j, :]
        weights[j,j] = np.exp(diffMat * diffMat.T / (-2.0 * k**2))
    xTx = xMat.T * (weights * xMat)
    if np.linalg.det(xTx) == 0.0:
        print("This Matrix is singular, cannot do inverse")
        return
    ws = xTx.I * (xMat.T * (weights * yMat))
    return testPoint * ws

In [4]:
def lwlrTest(testArr, xArr, yArr, k=1.0):
    m = np.shape(testArr)[0]
    yHat = np.zeros(m)
    for i in range(m):
        yHat[i] = lwlr(testArr[i], xArr, yArr, k)
    return yHat

In [5]:
def rssError(yArr, yHatArr):
    return ((yArr - yHatArr) ** 2).sum()

In [6]:
abX, abY = loadDataSet('abalone.txt')

In [7]:
yHat01 = lwlrTest(abX[0:99], abX[0:99], abY[0:99], 0.1)

In [8]:
yHat1 = lwlrTest(abX[0:99], abX[0:99], abY[0:99], 1)

In [9]:
yHat10 = lwlrTest(abX[0:99], abX[0:99], abY[0:99], 10)

In [10]:
rssError(abY[0:99], yHat01.T)

56.78868743050092

In [11]:
rssError(abY[0:99], yHat1.T)

429.89056187038

In [12]:
rssError(abY[0:99], yHat10.T)

549.1181708827924

In [13]:
yHat01 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)

In [14]:
rssError(abY[100:199], yHat01.T)

57913.51550155911

In [15]:
yHat1 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)

In [16]:
rssError(abY[100:199], yHat1.T)

573.5261441895982

In [17]:
yHat10 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)

In [18]:
rssError(abY[100:199], yHat10.T)

517.5711905381903

### 和简单的线性回归做个比较

In [19]:
def standRegres(xArr, yArr):
    xMat = np.mat(xArr); yMat = np.mat(yArr).T
    xTx = xMat.T * xMat
    # 计算行列式，如果行列式为0，则表示矩阵不可逆
    if np.linalg.det(xTx) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    ws = xTx.I * (xMat.T * yMat)
    return ws

In [20]:
ws = standRegres(abX[0:99], abY[0:99])

In [21]:
yHat = np.mat(abX[100:199]) * ws

In [22]:
rssError(abY[100:199], yHat.T.A)

518.6363153245542