In [30]:
import torch as t
import numpy as np

%matplotlib inline

In [18]:
35//2

17

## tensor的拼接

比较torch和Numpy


| 操作                               | numpy                     | torch                              |
| ---------------------------------- | :------------------------ | ---------------------------------- |
| 垂直方向拼接(行增)                 | np.vstack(A, B)           | torch.cat((A, B), 0)               |
| 水平方向拼接(列增)                 | np.hstack(A, B)           | torch.cat((A, B), 1)               |


In [59]:
import torch as t
import numpy as np

a = np.random.rand(3, 2)
b = np.random.rand(3, 4)
c = np.random.rand(4, 2)

print('-'*30+'numpy'+'-'*30)

print('a = \n{}\n'.format(a))
print('b = \n{}\n'.format(b))
print('c = \n{}\n'.format(c))

ab = np.hstack((a, b))
ac = np.vstack((a, c))

print('ab = \n{}\n'.format(ab))
print('ac = \n{}\n'.format(ac))

print('-'*30+'torch cat'+'-'*30)

a = torch.from_numpy(a)
b = torch.from_numpy(b)
c = torch.from_numpy(c)

ab = torch.cat((a, b), 1)
ac = torch.cat((a, c), 0)

print('ab = \n{}\n'.format(ab))
print('ac = \n{}\n'.format(ac))

print('-'*30+'torch stack'+'-'*30)
# "cat 和 .stack的区别在于 cat会增加现有维度的值,可以理解为续接，stack会新加增加一个维度，可以理解为叠加

aa_0 = torch.stack((a, a), 0)
aa_1 = torch.stack((a, a), 1)

print('aa_0 = \n{}\n'.format(aa_0.size()))
print('aa_1 = \n{}\n'.format(aa_1.size()))

------------------------------numpy------------------------------
a = 
[[0.20742526 0.68117048]
 [0.06107496 0.05566335]
 [0.91610351 0.6989592 ]]

b = 
[[0.90011517 0.55089449 0.85735844 0.05841693]
 [0.49165399 0.19501844 0.96031008 0.25732553]
 [0.03068124 0.44602039 0.88419661 0.31726   ]]

c = 
[[0.73625492 0.07091267]
 [0.57339809 0.01795358]
 [0.25299596 0.85770227]
 [0.1586333  0.37721679]]

ab = 
[[0.20742526 0.68117048 0.90011517 0.55089449 0.85735844 0.05841693]
 [0.06107496 0.05566335 0.49165399 0.19501844 0.96031008 0.25732553]
 [0.91610351 0.6989592  0.03068124 0.44602039 0.88419661 0.31726   ]]

ac = 
[[0.20742526 0.68117048]
 [0.06107496 0.05566335]
 [0.91610351 0.6989592 ]
 [0.73625492 0.07091267]
 [0.57339809 0.01795358]
 [0.25299596 0.85770227]
 [0.1586333  0.37721679]]

------------------------------torch cat------------------------------
ab = 
tensor([[0.2074, 0.6812, 0.9001, 0.5509, 0.8574, 0.0584],
        [0.0611, 0.0557, 0.4917, 0.1950, 0.9603, 0.2573],
       

### 自动梯度


下面是线性回归的求解，分别用四种方式。
- 公式计算
  $$
  \mathbf w = \mathbf {(X^{T}X)^{-1}X^{T}y}
  $$
- 手工梯度下降 
- 使用Autograd 
- 使用pytorch优化

可以看到所有方式所计算的结果是相同的。后面三种方式，Loss和梯度也是完全相同的。


In [1]:
import torch as t
import numpy as np

X = t.Tensor([[5, 3, 2],
              [4, 1, 5],
              [7, 6, 8],
              [1, 4, 2]])
m, n = X.size()
y = t.Tensor([1, 0, 2, 3]).view(m, 1)
X = t.cat((t.ones(m, 1), X), 1)
w_init = t.randn(n+1, 1)

print('X = \n{}\n'.format(X))   
print('y = \n{}\n'.format(y))   

print('-'*30+'equation'+'-'*30)
w = t.inverse(X.t() @ X) @ X.t() @ y

print('w = \n{}\n'.format(w))

print('-'*30+'manual grad'+'-'*30)
def manual_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init
    for epoch in range(epochs):
        delta = X @ w - y
        loss = delta.t() @ delta/m      
        grad = 2 * X.t() @ delta / m
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), grad.t()))          
        w = w - learning_rate*grad
    return w    

w = manual_train(X, y, w_init)
print('w = \n{}\n'.format(w))    

print('-'*30+'autograd'+'-'*30)
def autograd_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init    
    for epoch in range(epochs):
        w.requires_grad_(True)
        delta = X @ w - y
        loss = delta.t() @ delta/m
        loss.backward()
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), w.grad.t()))          
        with t.no_grad():
            w = w - learning_rate*w.grad        
    return w    

w = autograd_train(X, y, w_init)
print('w = \n{}\n'.format(w))   

print('-'*30+'pytorch optimize'+'-'*30)
import torch.optim as optim
import torch.nn as nn

def pytorch_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init    
    optimizer = optim.SGD([w], lr=learning_rate)    
    criterion = nn.MSELoss()    
    for epoch in range(epochs): 
        optimizer.zero_grad() 
        loss = criterion(X @ w, y)
        loss.backward()
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), w.grad.t()))           
        optimizer.step()          
    return w    

w = pytorch_train(X, y, w_init)
print('w = \n{}\n'.format(w))   


X = 
tensor([[1., 5., 3., 2.],
        [1., 4., 1., 5.],
        [1., 7., 6., 8.],
        [1., 1., 4., 2.]])

y = 
tensor([[1.],
        [0.],
        [2.],
        [3.]])

------------------------------equation------------------------------
w = 
tensor([[ 0.9063],
        [-0.3437],
        [ 0.6250],
        [-0.0312]])

------------------------------manual grad------------------------------
epoch 0, loss: 3.241280, grad: tensor([[ 0.9334,  8.8125,  6.7585, 12.4407]])
epoch 200, loss: 0.307015, grad: tensor([[-0.4201,  0.0384,  0.0510,  0.0038]])
epoch 400, loss: 0.094523, grad: tensor([[-2.3291e-01,  2.3480e-02,  2.8202e-02,  3.8028e-05]])
epoch 600, loss: 0.029102, grad: tensor([[-1.2923e-01,  1.3036e-02,  1.5650e-02,  1.9133e-05]])
epoch 800, loss: 0.008960, grad: tensor([[-7.1709e-02,  7.2310e-03,  8.6819e-03,  8.1062e-06]])
epoch 1000, loss: 0.002759, grad: tensor([[-3.9789e-02,  4.0133e-03,  4.8188e-03,  5.7966e-06]])
epoch 1200, loss: 0.000849, grad: tensor([[-2.2078e-02,  2.