In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.seed(666)

In [None]:
x = 3 * np.random.random(size=100)
x

In [None]:
y = x * 3. + 4. + np.random.normal(0, 1, size=100)
y

In [None]:
plt.scatter(x, y)
plt.show()

In [None]:
X = x.reshape(-1, 1)

In [None]:
X.shape

## 使用梯度下降法训练

In [None]:
def J(theta, x_b, y):
    try:
        return np.sum((y - x_b.dot(theta)) ** 2) / len(y)
    except:
        return float('inf')

In [None]:
def DJ(theta, x_b, y):
    res = np.empty(len(theta))
    
    res[0] = np.sum(x_b.dot(theta) - y)
    
    for i in range(1, len(theta)):
        res[i] = (x_b.dot(theta) - y).dot(x_b[:, i])
    
    return res * 2 / len(x_b)

In [None]:
def gradient_descent(x_b, y, int_theta, eta, n_iters=1e4, epsilon=1e-8):
    theta = int_theta
    i_ters = 0
    
    while i_ters < n_iters:
        gradient = DJ(theta, x_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        
        if (abs(J(theta, x_b, y) - J(last_theta, x_b, y)) < epsilon):
            break
        i_ters += 1
    
    return theta

In [None]:
X_b = np.hstack([np.ones((len(X), 1)), X]) # 为x添加一列

In [None]:
X_b.shape

In [None]:
ini_theta = np.zeros(X_b.shape[1])

In [None]:
eta = 0.01

In [None]:
theta = gradient_descent(X_b, y,ini_theta, eta)
theta

In [None]:
class my_LinearRegnession:
    
    def __init__(self):
        self.conf_ = None
        self.itercept_ = None
        self._theta = None
        
    def fit_normal(self, x_train, y_train):
        assert x_train.shape[0] == y_train.shape[0], \
            "the size"
        X_b = np.hstack([np.ones((len(x_train), 1)),x_train])
        self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)  # np.linalg.inv 求逆
        self.itercept_ = self._theta[0]
        self.conf_ = self._theta[1:]
        
        return self
    
    def fit_gb(self, x_train, y_train):
        assert x_train.shape[0] == y_train.shape[0], \
            "the size"
        def J(theta, x_b, y):
            try:
                return np.sum((y - x_b.dot(theta)) ** 2) / len(y)
            except:
                return float('inf')
        
        def DJ(theta, x_b, y):
            res = np.empty(len(theta))

            res[0] = np.sum(x_b.dot(theta) - y)

            for i in range(1, len(theta)):
                res[i] = (x_b.dot(theta) - y).dot(x_b[:, i])

            return res * 2 / len(x_b)
        
        def gradient_descent(x_b, y, int_theta, eta, n_iters=1e4, epsilon=1e-8):
            theta = int_theta
            i_ters = 0

            while i_ters < n_iters:
                gradient = DJ(theta, x_b, y)
                last_theta = theta
                theta = theta - eta * gradient

                if (abs(J(theta, x_b, y) - J(last_theta, x_b, y)) < epsilon):
                    break
                i_ters += 1

            return theta
        
        X_b = np.hstack([np.ones((len(X), 1)), X]) # 为x添加一列
        ini_theta = np.zeros(X_b.shape[1])
        self._theta = gradient_descent(X_b, y,ini_theta, eta)
        self.itercept_ = self._theta[0]
        self.conf_ = self._theta[1:]
        
        return self
        
    def predict(self, x_predict):
        assert self.itercept_ is not None and self.conf_ is not None, \
            "fit before"
        assert x_predict.shape[1] == len(self.conf_), \
            "number "
        X_b = np.hstack([np.ones((len(x_predict), 1)), x_predict])
        
        return X_b.dot(self._theta)
    
    def my_mean_squared_error(self, y_true, y_predict):
        return np.sum((y_predict - y_true) ** 2) / len(y_true)
    
    def my_r2_score(self, y_true, y_predict):
        return 1 - self.my_mean_squared_error(y_true, y_predict) / np.var(y_true)
 
    def score(self, x_test, y_test):
        y_redict = self.predict(x_test)
        return self.my_r2_score(y_test, y_redict)
    
    def __repr__(self):
        return "my_LinearRegnession()"

In [None]:
lin_reg = my_LinearRegnession()

In [None]:
lin_reg.fit_gb(X,y)

In [None]:
lin_reg.conf_

In [None]:
lin_reg.itercept_

### 梯度下降法的向量化

In [66]:
class my_LinearRegnession:
    
    def __init__(self):
        self.conf_ = None
        self.itercept_ = None
        self._theta = None
        
    def fit_normal(self, x_train, y_train):
        assert x_train.shape[0] == y_train.shape[0], \
            "the size"
        X_b = np.hstack([np.ones((len(x_train), 1)),x_train])
        self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)  # np.linalg.inv 求逆
        self.itercept_ = self._theta[0]
        self.conf_ = self._theta[1:]
        
        return self
    
    def fit_gb(self, x_train, y_train, eta=0.01, n_iters=1e4):
        assert x_train.shape[0] == y_train.shape[0], \
            "the size"
        def J(theta, x_b, y):
            try:
                return np.sum((y - x_b.dot(theta)) ** 2) / len(y)
            except:
                return float('inf')
        
        def DJ(theta, x_b, y):
#             res = np.empty(len(theta))

#             res[0] = np.sum(x_b.dot(theta) - y)

#             for i in range(1, len(theta)):
#                 res[i] = (x_b.dot(theta) - y).dot(x_b[:, i])

#             return res * 2 / len(x_b)
            return x_b.T.dot(x_b.dot(theta) - y) * 2 / len(y)
        
        def gradient_descent(x_b, y, int_theta, eta, n_iters=1e4, epsilon=1e-8):
            theta = int_theta
            i_ters = 0

            while i_ters < n_iters:
                gradient = DJ(theta, x_b, y)
                last_theta = theta
                theta = theta - eta * gradient

                if (abs(J(theta, x_b, y) - J(last_theta, x_b, y)) < epsilon):
                    break
                i_ters += 1

            return theta
        
        X_b = np.hstack([np.ones((len(x_train), 1)), x_train]) # 为x添加一列
        ini_theta = np.zeros(X_b.shape[1])
        self._theta = gradient_descent(X_b, y_train,ini_theta, eta,n_iters)
        self.itercept_ = self._theta[0]
        self.conf_ = self._theta[1:]
        
        return self
        
    def predict(self, x_predict):
        assert self.itercept_ is not None and self.conf_ is not None, \
            "fit before"
        assert x_predict.shape[1] == len(self.conf_), \
            "number "
        X_b = np.hstack([np.ones((len(x_predict), 1)), x_predict])
        
        return X_b.dot(self._theta)
    
    def my_mean_squared_error(self, y_true, y_predict):
        return np.sum((y_predict - y_true) ** 2) / len(y_true)
    
    def my_r2_score(self, y_true, y_predict):
        return 1 - self.my_mean_squared_error(y_true, y_predict) / np.var(y_true)
 
    def score(self, x_test, y_test):
        y_redict = self.predict(x_test)
        return self.my_r2_score(y_test, y_redict)
    
    def __repr__(self):
        return "my_LinearRegnession()"

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [2]:
boston = datasets.load_boston()

X = boston.data
y = boston.target

X = X[y < 50.0]
y = y[y < 50.0]

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

#### 使用最小二乘法

In [6]:
lin_req1 = my_LinearRegnession()

In [7]:
%time lin_req1.fit_normal(x_train, y_train)

Wall time: 92.9 ms


my_LinearRegnession()

In [8]:
lin_req1.score(x_test, y_test)

0.8129794056212793

#### 使用梯度下降法

In [67]:
lin_req2 = my_LinearRegnession()

In [68]:
%time lin_req2.fit_gb(x_train, y_train)



Wall time: 1.17 s


my_LinearRegnession()

In [69]:
lin_req2.conf_

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [70]:
x_train[:10, :]

array([[1.42362e+01, 0.00000e+00, 1.81000e+01, 0.00000e+00, 6.93000e-01,
        6.34300e+00, 1.00000e+02, 1.57410e+00, 2.40000e+01, 6.66000e+02,
        2.02000e+01, 3.96900e+02, 2.03200e+01],
       [3.67822e+00, 0.00000e+00, 1.81000e+01, 0.00000e+00, 7.70000e-01,
        5.36200e+00, 9.62000e+01, 2.10360e+00, 2.40000e+01, 6.66000e+02,
        2.02000e+01, 3.80790e+02, 1.01900e+01],
       [1.04690e-01, 4.00000e+01, 6.41000e+00, 1.00000e+00, 4.47000e-01,
        7.26700e+00, 4.90000e+01, 4.78720e+00, 4.00000e+00, 2.54000e+02,
        1.76000e+01, 3.89250e+02, 6.05000e+00],
       [1.15172e+00, 0.00000e+00, 8.14000e+00, 0.00000e+00, 5.38000e-01,
        5.70100e+00, 9.50000e+01, 3.78720e+00, 4.00000e+00, 3.07000e+02,
        2.10000e+01, 3.58770e+02, 1.83500e+01],
       [6.58800e-02, 0.00000e+00, 2.46000e+00, 0.00000e+00, 4.88000e-01,
        7.76500e+00, 8.33000e+01, 2.74100e+00, 3.00000e+00, 1.93000e+02,
        1.78000e+01, 3.95560e+02, 7.56000e+00],
       [2.49800e-02, 0.00000e+

In [13]:
%time lin_req2.fit_gb(x_train, y_train, eta=0.000001)

Wall time: 1.21 s


my_LinearRegnession()

In [14]:
lin_req2.conf_

array([-0.09221871,  0.11615068, -0.07233536,  0.00294593,  0.00443085,
        0.12400232,  0.05400738,  0.0408687 , -0.00472105,  0.00279527,
        0.13037331,  0.04429628, -0.23062717])

In [15]:
lin_req2.score(x_test, y_test)

0.2860060019209706

In [16]:
%time lin_req2.fit_gb(x_train, y_train, eta=0.000001, n_iters=1e6)

Wall time: 1min 54s


my_LinearRegnession()

In [17]:
lin_req2.score(x_test, y_test)

0.761600988173621

In [18]:
lin_req2.conf_

array([-9.19626758e-02,  5.43718225e-02, -7.17672640e-02,  1.94538001e-01,
        2.01314205e-01,  3.99553907e+00,  3.08389538e-03, -4.99889297e-01,
        1.14340764e-01, -9.74944922e-03,  1.85122690e-02,  1.57661792e-02,
       -3.73563407e-01])

### 数据归一化处理

In [71]:
from sklearn.preprocessing import StandardScaler

In [72]:
standarScaler = StandardScaler()

In [73]:
standarScaler.fit(x_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [74]:
x_train_stand = standarScaler.transform(x_train)

In [75]:
lin_reg3 = my_LinearRegnession()

In [76]:
%time lin_reg3.fit_gb(x_train_stand, y_train)

Wall time: 636 ms


my_LinearRegnession()

In [77]:
lin_reg3.conf_

array([-1.04042202,  0.83093351, -0.24794356,  0.01179456, -1.35034756,
        2.25074   , -0.66384353, -2.53568774,  2.25572406, -2.34011572,
       -1.76565394,  0.70923397, -2.72677064])

In [78]:
x_train_stand[:2, :]

array([[ 1.22372711, -0.47293831,  1.0044968 , -0.23791548,  1.17672219,
         0.1746197 ,  1.12041788, -1.0461857 ,  1.66456428,  1.53040651,
         0.77931907,  0.43942982,  1.01121088],
       [ 0.00285336, -0.47293831,  1.0044968 , -0.23791548,  1.83672499,
        -1.31943651,  0.98737038, -0.79665733,  1.66456428,  1.53040651,
         0.77931907,  0.26215642, -0.40385575]])

In [79]:
x_train[:2, :]

array([[ 14.2362 ,   0.     ,  18.1    ,   0.     ,   0.693  ,   6.343  ,
        100.     ,   1.5741 ,  24.     , 666.     ,  20.2    , 396.9    ,
         20.32   ],
       [  3.67822,   0.     ,  18.1    ,   0.     ,   0.77   ,   5.362  ,
         96.2    ,   2.1036 ,  24.     , 666.     ,  20.2    , 380.79   ,
         10.19   ]])

In [80]:
x_test_stand = standarScaler.transform(x_test)

In [81]:
lin_reg3.score(x_test_stand, y_test)

0.8129873310487505