# 简单梯度下降实现

### 对 y=x^2+2*x+1进行梯度下降 
#### 1.用Python


In [1]:
x =5 # x初始值  
learning_rate =0.2 #学习率
epoches =10
y =lambda x:x**2 +2*x+1 #简单曲线
for epoch in range(epoches):
    dx =2*x+2
    x = x-learning_rate*dx;
print(x)

-0.9637202944000001


#### 2. 使用Pytorch

In [2]:
import torch
x=torch.randn(1,1,requires_grad=True) # x取随机值
print('grad:',x.grad," data",x.data)

grad: None  data tensor([[0.0982]])


设置学习率及学习周期

In [3]:
learning_rate =0.1
epoches =10

In [4]:
for epoch in range(epoches):
     y = x**2+2*x+1
     y.backward()
     print("grad",x.grad.data) #x的梯度值
     x.data = x.data - learning_rate*x.grad.data #更新x
     x.grad.data.zero_()
print(x.data)

grad tensor([[2.1964]])
grad tensor([[1.7571]])
grad tensor([[1.4057]])
grad tensor([[1.1246]])
grad tensor([[0.8996]])
grad tensor([[0.7197]])
grad tensor([[0.5758]])
grad tensor([[0.4606]])
grad tensor([[0.3685]])
grad tensor([[0.2948]])
tensor([[-0.8821]])


#### 3.  Python+Numpy拟合简单曲线

In [5]:
import numpy as np

x_data = np.array([1.,2.,3])
y_data = np.array([2,4,6])

epoches = 10

lr =0.1
w = 0 
cost = []

for epoch in range(epoches):
    y_pred = x_data*w
    loss = (y_pred - y_data)**2/2*len(x_data)
    cost.append(sum(loss))
    #print(cost)
    dw = -2 *(y_data-y_pred)@x_data.T/x_data.shape[0]
    w = w - lr*dw
    
print(w)
print(cost)

1.9999999999965319
[84.0, 0.3733333333333322, 0.0016592592592593162, 7.374485596702598e-06, 3.27754915409057e-08, 1.4566885129490257e-10, 6.474171173496756e-13, 2.877409334547812e-15, 1.2788488946338218e-17, 5.683796199670706e-20]


#### 4. Pytorch拟合简单曲线 

In [6]:
import torch

x_data = torch.Tensor([[1.0],[2.0],[3.0]])
y_data = torch.Tensor([[2.0],[4.0],[6.0]])
epoches = 10

lr =0.1
w =torch.zeros(1,1,requires_grad =True)
#print(w.data)
cost = []
for epoch in range(epoches):
    y_pred = x_data*w
   # print(y_pred)
    #Loss
    loss = torch.mean((y_pred-y_data)**2)
    #print(loss.data)
   # print(loss.data.numpy())
    cost.append(loss.data.item())
    #print(cost)
    #print(cost)
    loss.backward()
    #print(w.grad.data)
    #参数更新
    w.data = w.data - lr*w.grad.data
    w.grad.data.zero_()

print(w)

tensor([[2.]], requires_grad=True)


##  numpy和pytorch实现线性回归

#### 1. Numpy实现

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from matplotlib import style

#创建数据
N = 100
x1 = np.linspace(-10, 10, N)
x2 = np.linspace(-15, 5, N)

x = np.concatenate(([x1], [x2]), axis=0).T
w = np.array([2, -4])
y = np.dot(x, w)
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')

ax1.plot_wireframe(np.array([x1]),np.array([x2]),np.array([y]), rstride=5, cstride=5)
ax1.set_xlabel("x1")
ax1.set_ylabel("x2")
ax1.set_zlabel("y")

#梯度下降
EPOCHS = 50 #迭代总次数
LOSS_MIN = 0.0001 #loss的目标最小值，当loss小于此值时停止迭代
lr = 0.01
# w_GD = np.random.rand(2) #梯度下降(GD)过程中存储w的值
w_GD = np.zeros(2)

cost = [] #梯度下降(GD)过程中存储loss的值
w_all = []
for i in range(EPOCHS):
    w_all.append(w_GD.copy())
    y_predict = np.dot(x, w_GD) #使用当前w_GD的y预测值
    loss = np.mean((y_predict-y)**2) #计算loss
    cost.append(loss)
    dw = np.mean(2*(y_predict-y) * x.T, axis=1) #计算梯度
    w_GD -= lr*dw #梯度下降
    
print("loss:",loss)
print("w1:",w_GD[0],"w2",w_GD[1])

#画出梯度下降曲线
w_all = np.array(w_all)
fig = plt.figure()
ax2 = fig.add_subplot(111, projection='3d')
ax2.plot_wireframe(np.array([w_all[:,0]]),np.array([w_all[:,1]]),np.array([cost]))
ax2.set_xlabel("w1")
ax2.set_ylabel("w2")
ax2.set_zlabel("loss")
fig = plt.figure()

#画出loss-iteration曲线
plt.plot(range(len(cost)),cost)
plt.title('loss')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.show()

loss: 2.565443781623136e-08
w1: 1.9999674457769208 w2 -3.999977280651687


<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

#### 2. Pytorch实现

In [8]:
import torch 
from torch.autograd import Variable
import numpy as np
N = 100
x = Variable(torch.randn(N,2))
w = Variable(torch.FloatTensor([2, -4]))
y = x*w

EPOCHS = 5000

lr = 0.01
w_GD = Variable(torch.FloatTensor([0, 0]), requires_grad=True)
cost = []
w_all = []
for i in range(EPOCHS):
    w_all.append(w_GD.data)
    y_predict = x*w_GD
    loss = torch.mean((y_predict-y)**2)

    cost.append(loss.data.numpy())
    loss.backward()
     #参数更新
    w_GD.data -= lr*w_GD.grad.data
    w_GD.grad.data.zero_()    
print("loss:",loss)
print("w_GD:",w_GD)

loss: tensor(9.4182e-11, grad_fn=<MeanBackward0>)
w_GD: tensor([ 2.0000, -4.0000], requires_grad=True)
