### 1.什么是线性回归

In [3]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error
from scipy.optimize import minimize
X,y=make_regression(n_features=6,n_samples=200)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

  return f(*args, **kwds)
  return f(*args, **kwds)


In [4]:
from sklearn.linear_model import LinearRegression
model=LinearRegression().fit(X_train,y_train)
print(model.coef_)
print(model.intercept_)

[91.54190864 17.45131164 99.46381765 70.78883293 30.16254091 65.28260962]
3.552713678800501e-15


In [5]:
model.predict(X_test)[:10]

array([ -26.84819569,  -36.94440604,  201.13314808,  -16.05525907,
          8.56466317, -222.30040597,   78.1154559 ,  136.43125401,
        -71.60576969,   27.41663916])

### 2.使用Scipy optimize求解线性回归

In [6]:
from scipy.optimize import lsq_linear
class MyLinearRegression_scipy:
    def fit(self,X,y):
        X_=np.full((X.shape[0],1),fill_value=1)    #新增一列为1的列向量
        X=np.concatenate([X,X_],axis=1)     #将X新增一列为1的列向量
        A = X
        b = y
        res = lsq_linear(A, b, lsmr_tol='auto', verbose=1)
        self.coef_=res.x[:-1]
        self.intercept_=res.x[-1]
        self.res=res
        return self.res.status
    def predict(self,X):
        return np.array(np.mat(X)*np.mat(self.res.x[:-1].reshape(-1,1))+self.res.x[-1]).flatten()
           
model=MyLinearRegression_scipy()
model.fit(X_train,y_train)

The unconstrained solution is optimal.
Final cost 1.4593e-24, first-order optimality 1.69e-11


3

In [7]:
model.predict(X_test)[:10]

array([ -26.84819569,  -36.94440604,  201.13314808,  -16.05525907,
          8.56466317, -222.30040597,   78.1154559 ,  136.43125401,
        -71.60576969,   27.41663916])

### 3.使用Numpy实现利用梯度为0求解线性回归

In [8]:
class MyLinearRegression:
    def __init__(self):
        pass
    def fit(self,X,y):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=np.concatenate([X,X_],axis=1)
        X=np.mat(X)
        #将W与b放在一起,所以需要将X新增一列全为1的列向量
        y=np.mat(y.reshape(-1,1))
        #将y重新转成矩阵，以便运算
        w=np.array((X.T*X).I*X.T*y)  #直接通过公式计算结果，注意这种解法要求X为满秩矩阵
        #计算w
        self.__W=w[:-1]    
        self.__b=w[-1].item()
        #将W与b进行拆分
        self.coef_=self.__W.flatten()  #将coefficient进行展开，允许外部访问
        self.intercept_=self.__b       #构造斜率参数，允许外部访问
    def predict(self,X):
        return np.array(np.mat(X)*np.mat(self.__W)+self.__b).flatten()

In [9]:
myModel=MyLinearRegression()
myModel.fit(X_train,y_train)
print(myModel.coef_)
print(myModel.intercept_)

[91.54190864 17.45131164 99.46381765 70.78883293 30.16254091 65.28260962]
-1.7763568394002505e-15


In [10]:
myModel.predict(X_test)[:10]

array([ -26.84819569,  -36.94440604,  201.13314808,  -16.05525907,
          8.56466317, -222.30040597,   78.1154559 ,  136.43125401,
        -71.60576969,   27.41663916])

### 3.使用Scipy求解带L2正则化的线性回归

In [24]:
def loss(w,X,y,c=0.1):
    X_=np.full((X_train.shape[0],1),fill_value=1)   
    X=np.concatenate([X_train,X_],axis=1)
    m,n=X.shape
    j1=np.sum(np.square(np.dot(X,w)-y))
    j2=np.sum(c*np.square(w*w))
    j=(j1+j2)/(2*m)
    return j

In [25]:
class LinearRegression_r2:
    def __init__(self,c=0.1):
        self.c=c
    def fit(self,X,y):
        init_w=np.random.randn(X.shape[1]+1)
        res=minimize(loss,init_w,args=(X_train,y_train))
        self.res=res
        self.coef_=res.x[:-1]
        self.intercept_=res.x[-1]
        
    def predict(self,X):
        r1=np.dot(X,self.coef_.T)+self.intercept_
        r2=self.c*np.dot(self.coef_.T,self.coef_)
        return r1+r2
model=LinearRegression_r2()
model.fit(X_train,y_train)
pre=model.predict(X_test)
print(model.coef_,model.intercept_)

[32.61492653 19.2530531  36.27788125 30.7830539  21.20446244 29.3134558 ] -5.220856052552169


In [26]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
print(pre[:10])
mean_absolute_error(y_test,pre)

[506.84255734 489.37243148 589.65729493 465.00616387 477.18827607
 401.44270935 557.04631281 552.39763066 451.27030993 497.07363173]


492.72553373169796

In [27]:
from sklearn.linear_model import Ridge
model=Ridge(alpha=0.1).fit(X_train,y_train)
pre2=model.predict(X_test)
print(model.coef_,model.intercept_)
mean_absolute_error(y_test,pre2)

[91.46519005 17.45532101 99.39750405 70.73918785 30.14818985 65.23657054] -0.007591986687160102


0.08524374633289238

### 4.使用梯度下降法求解线性回归

In [18]:
class LinearRegression_grad:
    def __init__(self,alpha=0.001,n_cycle=2000):
        self.alpha=alpha
        self.n_cycle=n_cycle
    def fit(self,X,y):
        X_=np.full(shape=(X.shape[0],1),fill_value=1)
        X=np.concatenate([X,X_],axis=1)
        W=np.random.randn(X.shape[1])
        for i in range(self.n_cycle):
            f1=2*X.T
            f2=np.dot(X,W.T)-y
            g=np.dot(f1,f2)    #计算梯度方向
            W=W-self.alpha*g   #更新权重
        self.W=W
        self.coef_=W[:-1]
        self.intercept=W[:-1]
    def predict(self,X):
        X_=np.full(shape=(X.shape[0],1),fill_value=1)
        X=np.concatenate([X,X_],axis=1)
        pre=np.dot(X,self.W.T)
        return pre

In [19]:
model=LinearRegression_grad()
model.fit(X_train,y_train)
pre=model.predict(X_test)
mean_absolute_error(y_test,pre)

2.711904774817716e-14