In [1]:
import torch as t
import numpy as np

%matplotlib inline

In [133]:
import torch

def compute():
    x = torch.tensor([[1, 2],[3, 4]], dtype=torch.double, requires_grad=True)
    y = 2 * x + 1
    z = y * y + 1
    out = z.mean()
    return {'x':x, 'y':y, 'z':z, 'out':out}

def print_tensor(key, tensor):
    print("{}={}".format(key, tensor))
    print("  grad_fn={} \n  grad={} \n  requires_grad={}\n  is_leaf={} \n  dtype={}".format(
        tensor.grad_fn, tensor.grad, 
        tensor.requires_grad, tensor.is_leaf, tensor.dtype))
    print("-"*50)    

def print_tensors(tensors):
    for key, tensor in tensors.items():
        print_tensor(key, tensor)

tensors = compute()
tensors['x'] = torch.tensor([[1, 1],[1, 1]], dtype=torch.double, requires_grad=True)
tensors['out'].backward()
print_tensors(tensors)

x=tensor([[1., 1.],
        [1., 1.]], dtype=torch.float64, requires_grad=True)
  grad_fn=None 
  grad=None 
  requires_grad=True
  is_leaf=True 
  dtype=torch.float64
--------------------------------------------------
y=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feb540dd048> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
z=tensor([[10., 26.],
        [50., 82.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feb540dd048> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
out=42.0
  grad_fn=<MeanBackward0 object at 0x7feb540dd048> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------


In [129]:
tensors = compute()
for _, tensor in tensors.items():
    tensor.retain_grad()
tensors['out'].backward()
print_tensors(tensors)

x=tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64, requires_grad=True)
  grad_fn=None 
  grad=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64) 
  requires_grad=True
  is_leaf=True 
  dtype=torch.float64
--------------------------------------------------
y=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feb540dd358> 
  grad=tensor([[1.5000, 2.5000],
        [3.5000, 4.5000]], dtype=torch.float64) 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
z=tensor([[10., 26.],
        [50., 82.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feb540dd358> 
  grad=tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]], dtype=torch.float64) 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
out=42.0
  grad_fn=<MeanBackward0 object at 0x7feb540dd358> 
  grad=

In [130]:
tensors = compute()
gradients = torch.tensor([[0.5, 1], [2, 4]], dtype=torch.float)
tensors['z'].backward(gradients)
print_tensors(tensors)

x=tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64, requires_grad=True)
  grad_fn=None 
  grad=tensor([[  6.,  20.],
        [ 56., 144.]], dtype=torch.float64) 
  requires_grad=True
  is_leaf=True 
  dtype=torch.float64
--------------------------------------------------
y=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feab6067f28> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
z=tensor([[10., 26.],
        [50., 82.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feab6067588> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
out=42.0
  grad_fn=<MeanBackward0 object at 0x7feab6067588> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------


In [134]:
tensors = compute()
tensors['out'].backward(retain_graph=True, create_graph=True)
print_tensors(tensors)

x=tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64, requires_grad=True)
  grad_fn=None 
  grad=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64, grad_fn=<CloneBackward>) 
  requires_grad=True
  is_leaf=True 
  dtype=torch.float64
--------------------------------------------------
y=tensor([[3., 5.],
        [7., 9.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feabe4c3748> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
z=tensor([[10., 26.],
        [50., 82.]], dtype=torch.float64, grad_fn=<AddBackward0>)
  grad_fn=<AddBackward0 object at 0x7feabe4c3748> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
--------------------------------------------------
out=42.0
  grad_fn=<MeanBackward0 object at 0x7feabe4c3748> 
  grad=None 
  requires_grad=True
  is_leaf=False 
  dtype=torch.float64
-----------------------------------------------

## tensor的拼接

比较torch和Numpy


| 操作                               | numpy                     | torch                              |
| ---------------------------------- | :------------------------ | ---------------------------------- |
| 垂直方向拼接(行增)                 | np.vstack(A, B)           | torch.cat((A, B), 0)               |
| 水平方向拼接(列增)                 | np.hstack(A, B)           | torch.cat((A, B), 1)               |


In [51]:
import torch as t
import numpy as np

a = np.random.rand(3, 2)
b = np.random.rand(3, 4)
c = np.random.rand(4, 2)

print('-'*30+'numpy'+'-'*30)

print('a = \n{}\n'.format(a))
print('b = \n{}\n'.format(b))
print('c = \n{}\n'.format(c))

ab = np.hstack((a, b))
ac = np.vstack((a, c))

print('ab = \n{}\n'.format(ab))
print('ac = \n{}\n'.format(ac))

print('-'*30+'torch cat'+'-'*30)

a = torch.from_numpy(a)
b = torch.from_numpy(b)
c = torch.from_numpy(c)

ab = torch.cat((a, b), 1)
ac = torch.cat((a, c), 0)

print('ab = \n{}\n'.format(ab))
print('ac = \n{}\n'.format(ac))

print('-'*30+'torch stack'+'-'*30)
# "cat 和 .stack的区别在于 cat会增加现有维度的值,可以理解为续接，stack会新加增加一个维度，可以理解为叠加

aa_0 = torch.stack((a, a), 0)
aa_1 = torch.stack((a, a), 1)

print('aa_0 = \n{}\n'.format(aa_0.size()))
print('aa_1 = \n{}\n'.format(aa_1.size()))

------------------------------numpy------------------------------
a = 
[[0.57981256 0.32386837]
 [0.07167079 0.55758686]
 [0.87771614 0.61672263]]

b = 
[[0.70158077 0.3792917  0.94872452 0.92641252]
 [0.9133379  0.99182234 0.72041977 0.67072277]
 [0.90090687 0.18414355 0.93752312 0.44986942]]

c = 
[[0.13479513 0.03405707]
 [0.89925418 0.87138145]
 [0.20890133 0.73610866]
 [0.7030512  0.35181984]]

ab = 
[[0.57981256 0.32386837 0.70158077 0.3792917  0.94872452 0.92641252]
 [0.07167079 0.55758686 0.9133379  0.99182234 0.72041977 0.67072277]
 [0.87771614 0.61672263 0.90090687 0.18414355 0.93752312 0.44986942]]

ac = 
[[0.57981256 0.32386837]
 [0.07167079 0.55758686]
 [0.87771614 0.61672263]
 [0.13479513 0.03405707]
 [0.89925418 0.87138145]
 [0.20890133 0.73610866]
 [0.7030512  0.35181984]]

------------------------------torch cat------------------------------
ab = 
tensor([[0.5798, 0.3239, 0.7016, 0.3793, 0.9487, 0.9264],
        [0.0717, 0.5576, 0.9133, 0.9918, 0.7204, 0.6707],
       

### 自动梯度


下面是线性回归的求解，分别用四种方式。
- 公式计算
  $$
  \mathbf w = \mathbf {(X^{T}X)^{-1}X^{T}y}
  $$
- 手工梯度下降 
- 使用Autograd 
- 使用pytorch优化

可以看到所有方式所计算的结果是相同的。后面三种方式，Loss和梯度也是完全相同的。


In [1]:
import torch as t
import numpy as np

X = t.Tensor([[5, 3, 2],
              [4, 1, 5],
              [7, 6, 8],
              [1, 4, 2]])
m, n = X.size()
y = t.Tensor([1, 0, 2, 3]).view(m, 1)
X = t.cat((t.ones(m, 1), X), 1)
w_init = t.randn(n+1, 1)

print('X = \n{}\n'.format(X))   
print('y = \n{}\n'.format(y))   

print('-'*30+'equation'+'-'*30)
w = t.inverse(X.t() @ X) @ X.t() @ y

print('w = \n{}\n'.format(w))

print('-'*30+'manual grad'+'-'*30)
def manual_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init
    for epoch in range(epochs):
        delta = X @ w - y
        loss = delta.t() @ delta/m      
        grad = 2 * X.t() @ delta / m
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), grad.t()))          
        w = w - learning_rate*grad
    return w    

w = manual_train(X, y, w_init)
print('w = \n{}\n'.format(w))    

print('-'*30+'autograd'+'-'*30)
def autograd_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init    
    for epoch in range(epochs):
        w.requires_grad_(True)
        delta = X @ w - y
        loss = delta.t() @ delta/m
        loss.backward()
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), w.grad.t()))          
        with t.no_grad():
            w = w - learning_rate*w.grad        
    return w    

w = autograd_train(X, y, w_init)
print('w = \n{}\n'.format(w))   

print('-'*30+'pytorch optimize'+'-'*30)
import torch.optim as optim
import torch.nn as nn

def pytorch_train(X, y, w_init, epochs=2000, learning_rate=0.01):
    w = w_init    
    optimizer = optim.SGD([w], lr=learning_rate)    
    criterion = nn.MSELoss()    
    for epoch in range(epochs): 
        optimizer.zero_grad() 
        loss = criterion(X @ w, y)
        loss.backward()
        if epoch % (epochs // 10) ==0:
            print('epoch {}, loss: {:7f}, grad: {}'.format(epoch, loss.item(), w.grad.t()))           
        optimizer.step()          
    return w    

w = pytorch_train(X, y, w_init)
print('w = \n{}\n'.format(w))   


X = 
tensor([[1., 5., 3., 2.],
        [1., 4., 1., 5.],
        [1., 7., 6., 8.],
        [1., 1., 4., 2.]])

y = 
tensor([[1.],
        [0.],
        [2.],
        [3.]])

------------------------------equation------------------------------
w = 
tensor([[ 0.9063],
        [-0.3437],
        [ 0.6250],
        [-0.0312]])

------------------------------manual grad------------------------------
epoch 0, loss: 3.241280, grad: tensor([[ 0.9334,  8.8125,  6.7585, 12.4407]])
epoch 200, loss: 0.307015, grad: tensor([[-0.4201,  0.0384,  0.0510,  0.0038]])
epoch 400, loss: 0.094523, grad: tensor([[-2.3291e-01,  2.3480e-02,  2.8202e-02,  3.8028e-05]])
epoch 600, loss: 0.029102, grad: tensor([[-1.2923e-01,  1.3036e-02,  1.5650e-02,  1.9133e-05]])
epoch 800, loss: 0.008960, grad: tensor([[-7.1709e-02,  7.2310e-03,  8.6819e-03,  8.1062e-06]])
epoch 1000, loss: 0.002759, grad: tensor([[-3.9789e-02,  4.0133e-03,  4.8188e-03,  5.7966e-06]])
epoch 1200, loss: 0.000849, grad: tensor([[-2.2078e-02,  2.