## 梯度下降法的向量化

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
boston = datasets.load_boston()
X = boston.data
y = boston.target

X = X[y < 50.0]
y = y[y < 50.0]

In [3]:
from playML.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

In [4]:
from playML.LinearRegression import LinearRegression

lin_reg1 = LinearRegression()
%time lin_reg1.fit_normal(X_train, y_train)
lin_reg1.score(X_test, y_test)

Wall time: 156 ms


0.8129794056212895

### 使用梯度下降法

In [5]:
lin_reg2 = LinearRegression()
lin_reg2.fit_gd(X_train, y_train)

LinearRegression()

In [6]:
# 因为每一个特征的量级不一样，所以现在训练的学习率还是太大
lin_reg2.coef_

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [7]:
lin_reg2.fit_gd(X_train, y_train, eta=0.000001)

LinearRegression()

In [8]:
# r^2值太小了，说明学习率太小，重复的次数还是不够
lin_reg2.score(X_test, y_test)

0.2758681872447726

In [9]:
%time lin_reg2.fit_gd(X_train, y_train, eta=0.000001, n_iters=1e6)

Wall time: 44.3 s


LinearRegression()

In [10]:
lin_reg2.score(X_test, y_test)

0.7542932581943915

### 使用梯度下降法前进行数据归一化

In [11]:
from sklearn.preprocessing import StandardScaler

standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)

lin_reg3 = LinearRegression()
%time lin_reg3.fit_gd(X_train_standard, y_train)

Wall time: 229 ms


LinearRegression()

In [12]:
X_test_standard = standardScaler.transform(X_test)
lin_reg3.score(X_test_standard, y_test)

0.8129873310487505

### 梯度下降法的优势

In [21]:
# 5000特征看起来很大，但是100x100的图像都装不进去
m = 1000
# n = 5000
n = 10000

big_X = np.random.normal(size=(m, n))

# 直接生成线性模型，所以先生成n+1的向量，随机在0-100取值
true_theta = np.random.uniform(0.0, 100.0, size=n+1)

# 加上噪音
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0., 10., size=m)

In [22]:
big_reg1 = LinearRegression()
%time big_reg1.fit_normal(big_X, big_y)

In [20]:
big_reg2 = LinearRegression()
%time big_reg2.fit_gd(big_X, big_y)

Wall time: 2.58 s


LinearRegression()