## 多元线性回归模型

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [3]:
boston = datasets.load_boston()

X = boston.data
y = boston.target

X = X[y < 50.0]
y = y[y < 50.0]

In [138]:
X.shape

(490, 13)

### 使用sklearn的LinearRegression

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
reg = LinearRegression()

In [10]:
reg.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [11]:
reg.coef_ # 参数

array([-1.20354261e-01,  3.64423279e-02, -3.61493155e-02,  5.12978140e-02,
       -1.15775825e+01,  3.42740062e+00, -2.32311760e-02, -1.19487594e+00,
        2.60101728e-01, -1.40219119e-02, -8.35430488e-01,  7.80472852e-03,
       -3.80923751e-01])

In [12]:
reg.intercept_  # 截距

34.117399723229596

In [21]:
reg.score(x_test, y_test)

0.8129794056212807

### 自己的多元线性回归

In [119]:
class my_LinearRegnession:
    
    def __init__(self):
        self.conf_ = None
        self.itercept_ = None
        self._theta = None
        
    def fit_normal(self, x_train, y_train):
        assert x_train.shape[0] == y_train.shape[0], \
            "the size"
        X_b = np.hstack([np.ones((len(x_train), 1)),x_train])
        self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)  # np.linalg.inv 求逆
        self.itercept_ = self._theta[0]
        self.conf_ = self._theta[1:]
        
        return self
    def predict(self, x_predict):
        assert self.itercept_ is not None and self.conf_ is not None, \
            "fit before"
        assert x_predict.shape[1] == len(self.conf_), \
            "number "
        X_b = np.hstack([np.ones((len(x_predict), 1)), x_predict])
        
        return X_b.dot(self._theta)
    
    def my_mean_squared_error(self, y_true, y_predict):
        return np.sum((y_predict - y_true) ** 2) / len(y_true)
    
    def my_r2_score(self, y_true, y_predict):
        return 1 - self.my_mean_squared_error(y_true, y_predict) / np.var(y_true)
 
    def score(self, x_test, y_test):
        y_redict = self.predict(x_test)
        return self.my_r2_score(y_test, y_redict)
    
    def __repr__(self):
        return "my_LinearRegnession()"

In [120]:
m_reg = my_LinearRegnession()

In [121]:
m_reg.fit_normal(x_train, y_train)

my_LinearRegnession()

In [122]:
m_reg.conf_

array([-1.20354261e-01,  3.64423279e-02, -3.61493155e-02,  5.12978140e-02,
       -1.15775825e+01,  3.42740062e+00, -2.32311760e-02, -1.19487594e+00,
        2.60101728e-01, -1.40219119e-02, -8.35430488e-01,  7.80472852e-03,
       -3.80923751e-01])

In [133]:
m_reg.itercept_

34.117399723224324

In [134]:
y_predict = m_reg.predict(x_test)

In [135]:
m_reg.my_mean_squared_error(y_test, y_presict)

11.671795030755737

In [136]:
m_reg.my_r2_score(y_test, y_presict)

0.8129794056212793

In [137]:
m_reg.score(x_test, y_test)

0.8129794056212793

### KNN Regression

In [139]:
from sklearn.neighbors import KNeighborsRegressor

In [140]:
knn_reg = KNeighborsRegressor()

In [141]:
knn_reg.fit(x_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=5, p=2,
          weights='uniform')

In [142]:
knn_reg.score(x_test, y_test)

0.5865412198300899

In [143]:
knn_reg.predict(x_test)

array([18.36, 31.62, 10.86, 33.14, 22.48,  8.9 , 28.18, 32.5 , 16.4 ,
       32.42, 16.24, 18.14, 23.04, 15.4 , 21.86, 28.5 , 15.6 , 22.  ,
       27.08, 28.52, 10.26, 24.34, 24.22, 32.32, 21.88, 31.26, 19.24,
       23.48, 10.86, 24.54, 35.98, 11.86, 11.66, 34.1 , 19.12, 16.5 ,
       10.06, 23.62, 19.06, 19.22, 12.82, 25.66, 16.5 , 24.64, 24.56,
       15.76, 24.8 , 37.04, 20.  , 21.96, 18.48, 10.26, 26.02, 19.22,
       24.8 , 28.5 , 24.16, 17.68, 23.5 , 35.88, 19.04, 26.2 , 22.3 ,
       10.88, 12.12, 24.76, 20.12, 21.46, 27.08, 11.2 , 16.8 , 19.06,
       19.28, 20.42, 12.04, 23.48, 21.82, 17.14, 15.92, 30.34, 19.9 ,
       21.28, 20.42, 19.36, 32.32, 20.56, 23.42, 21.84, 27.42, 17.86,
       31.62, 29.94, 32.44, 24.24, 36.42, 31.82, 35.22, 23.5 ])