### 随机梯度下降法

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [4]:
m = 100000  # 样本个数

x = np.random.normal(size = m)  # 向量
X = x.reshape(-1,1)  # 矩阵  100000*1   简单线性回归
y = 4.*x+3.+np.random.normal(0,3,size = m)  # 向量 

In [6]:
y.shape

(100000,)

### 批量随机梯度下降

In [7]:
def J(theta,X_b,y):
    try:
        return np.sum((y-X_b.dot(theta))**2)/len(y)  # cost function 
    except:
        return float("inf")

def dJ(theta,X_b,y):  # 求损失函数的梯度
    return X_b.T.dot(X_b.dot(theta)-y)*2./len(y)

def gradient_descent(X_b,y,initial_theta,eta,n_iters=1e4, epsilon = 1e-8):
    theta = initial_theta  # 
    cur_iter = 0
    
    while cur_iter<n_iters:
        gradient = dJ(theta,X_b,y)  # gradient :梯度
        last_theta = theta
        theta = theta -eta*gradient
     
        if (abs(J(theta,X_b,y)-J(last_theta,X_b,y))<epsilon):
            break
        cur_iter +=1
    return theta

In [14]:
%%time
X_b = np.hstack([np.ones((len(x),1)),X]) # 
initial_theta = np.zeros(X_b.shape[1]) # 
eta = 0.01
theta = gradient_descent(X_b,y,initial_theta,eta)

Wall time: 839 ms


In [16]:
X_b

array([[1.        , 0.1824612 ],
       [1.        , 1.13527395],
       [1.        , 0.72792115],
       ...,
       [1.        , 0.64820843],
       [1.        , 0.89868786],
       [1.        , 0.23116828]])

In [13]:
theta  # 列向量 2*1

array([3.00097176, 3.99805167])

### 随机梯度下降

In [None]:
def J(theta,X_b,y):
    try:
        return np.sum((y-X_b.dot(theta))**2)/len(y)
    except:
        return float("inf")

In [26]:
def dJ_sgd(theta,X_b_i,y_i):
    return X_b_i.T.dot(X_b_i.dot(theta)-y_i)*2.  # 随机"梯度"

In [27]:
def sgd(X_b,y,initial_theta,n_iters):
    t0 = 5
    t1 = 50
    def learning_rate(t):
        return t0/(t+t1)

    theta = initial_theta
    for cur_iter in range(n_iters):
        rand_i = np.random.randint(len(X_b))  # 随机选定一个i
        gradient = dJ_sgd(theta,X_b[rand_i],y[rand_i])
        theta = theta - learning_rate(cur_iter)*gradient
        
    return theta
    

In [28]:
%%time
X_b = np.hstack([np.ones((len(X),1)),X])
initial_theta = np.zeros(X_b.shape[1])
theta = sgd(X_b,y,initial_theta,n_iters = len(X_b)//3)

Wall time: 261 ms


In [29]:
theta

array([2.99020262, 3.97118301])