# Pytorch Notebook

In [1]:
import torch
import torchvision
import numpy as np

## Pytorch basics
If you are using jupyterlab, enable "show contextual help" for checking the definitions of functions!

In [16]:
# Get variables
x = torch.ones(3, 4)

print(x)
print(x.view(1, 12))
print(x.view(2, -1))

y = torch.rand(3, 4)
z = x + y

print(z.size())
print(z)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])
torch.Size([3, 4])
tensor([[1.7880, 1.6233, 1.7106, 1.1526],
        [1.6345, 1.1149, 1.2167, 1.6470],
        [1.1962, 1.2656, 1.3082, 1.1896]])


In [6]:
# Auto grad basics
x = torch.ones(1, 2, requires_grad=True)
print(x)

y = 2 * x + 2
print(y)
print(y.grad_fn)

z = y * y * 3
out = z.mean()
print(z, out)

# , detach

tensor([[1., 1.]], requires_grad=True)
tensor([[4., 4.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x12cb13710>
tensor([[48., 48.]], grad_fn=<MulBackward0>) tensor(48., grad_fn=<MeanBackward0>)


In [10]:
x = torch.randn(2, 2)  # <-- for constants, requires_grad is initially False
x = ((x * 3) / (x - 1))
print(x.requires_grad)

x.requires_grad_(True) # <-- Switch requires_grad by hand
print(x.requires_grad)

y = (x ** 2).sum()
print(y.grad_fn)

False
True
<SumBackward0 object at 0x12c7df5f8>


### backward

In [21]:
x = torch.ones(2, 2, requires_grad=True) # <-- enable grad calculation
print(x)
print(x.requires_grad)
print(x.grad_fn)
print("---")

y = x + 2
print(y)
print(y.requires_grad)
print(y.grad_fn)
print("---")

z = y * y * 3
out = z.mean()
print(z, out)
print(z.requires_grad, out.requires_grad)
print(z.grad_fn, out.grad_fn)
print("---")

out.backward() # <-- run backprop
print(x.grad)

# running backward() multiple times elicit an error
#out.backward() # <-- run backprop
#print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
None
---
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
True
<AddBackward0 object at 0x12cde6a20>
---
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
True True
<MulBackward0 object at 0x12cde6a20> <MeanBackward0 object at 0x12cde6ac8>
---
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


### vector-Jacobian product
This characteristic of vector-Jacobian product makes it very convenient to feed external gradients into a model that has non-scalar output.

Now in this case y is no longer a scalar. torch.autograd could not compute the full Jacobian directly, but if we just want the vector-Jacobian product, simply pass the vector to backward as argument. $J$ is jacobian. and ${\rm x\_grad} = J^\top v$. If the backward target is a scalar function, $v = (\frac{l}{y_1}, \cdots, \frac{l}{y_m})^\top$

In [35]:
x = torch.ones(3, requires_grad=True)
y = x * 2

print(f"norm : {y.data.norm()}")
print(y)
while y.data.norm() < 1000:
    y = y * 2
    print(f"norm : {y.data.norm()}")
    print(y)

v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)
print(x.grad)

norm : 3.464101552963257
tensor([2., 2., 2.], grad_fn=<MulBackward0>)
norm : 6.928203105926514
tensor([4., 4., 4.], grad_fn=<MulBackward0>)
norm : 13.856406211853027
tensor([8., 8., 8.], grad_fn=<MulBackward0>)
norm : 27.712812423706055
tensor([16., 16., 16.], grad_fn=<MulBackward0>)
norm : 55.42562484741211
tensor([32., 32., 32.], grad_fn=<MulBackward0>)
norm : 110.85124969482422
tensor([64., 64., 64.], grad_fn=<MulBackward0>)
norm : 221.70249938964844
tensor([128., 128., 128.], grad_fn=<MulBackward0>)
norm : 443.4049987792969
tensor([256., 256., 256.], grad_fn=<MulBackward0>)
norm : 886.8099975585938
tensor([512., 512., 512.], grad_fn=<MulBackward0>)
norm : 1773.6199951171875
tensor([1024., 1024., 1024.], grad_fn=<MulBackward0>)
tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


### Stop autograd by wrapping with no_grad

In [38]:
x = torch.ones(2, 2, requires_grad=True)
print(x)
print((x ** 2).requires_grad)

# stop autograd and block 
with torch.no_grad():
    print((x ** 2).requires_grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
False


### Use detach() to get a "new" Tensor with the same content but that does not require gradient

In [59]:
x = torch.tensor([2], dtype=torch.float, requires_grad=True)
y = torch.tensor([3], dtype=torch.float, requires_grad=True)

out = (x ** 2) * (y ** 2)
print(out.requires_grad)
z = out.detach()
print(z.requires_grad)

# z.backward() # <-- return an error

True
False


## Autograd and some specific topics

In [None]:
# Utilities


### Define Fully Connected Feed Forward NN $f_{\theta}$ 

In [4]:
x = torch.randn(5)


print(x)

tensor([ 0.6864, -0.6671, -1.8042, -1.0444,  0.7582])


### Calculate Loss $L(\theta)$

### Calculate $\nabla_\theta L(\theta, x)|_{x=x_0}$

### Calculate $\nabla_x L(\theta, x)|_{x=x_0}$

### Update $\theta \leftarrow \theta + \alpha \nabla_\theta L(\theta, x)|_{x=x_0}$

### Update weight by minibatch $\frac{1}{N} \sum_{n=1}^N \nabla_\theta L(\theta, x)|_{x=x_n}$

### Update $x \leftarrow x + \alpha \nabla_x L(\theta, x)|_x$

### Copy $\theta_1$ to $\theta_2$

### Copy $\theta_1$ to $\theta_2$ and update only $\theta_1$ by $\theta \leftarrow \theta + \alpha \nabla_\theta L(\theta, x)|_{x=x_0}$

### Stochastic sampling from $f_\theta(x)$

### Inject noise (reparametrization trick) and cal grad $\nabla_\theta f_\theta(x, \epsilon)$, $\epsilon\sim {\cal N}(\mu, \sigma^2)$

### Use weighted update $\theta \leftarrow \theta + \alpha \delta \nabla_\theta L(\theta, x)|_{x=x_0}$, $\delta  = f_\theta(x) - \theta_0$

### Use noisy weight $f(x, \theta + \epsilon)$, $\epsilon \sim {\cal N}(1, 0)$

### Update $\theta$ by the approximated variational method

# Convolution Neural Networks

### Define CNN $f_\theta(x)$

### Update weight by minibatch for CNN

### Update $x \leftarrow x + \alpha \nabla_x L(\theta, x)|_x$ for CNN

# Recurrent Neural Networks

### Define RNN $f_\theta(x_0, x_1, \dots, x_T)$

### Update weight by minibatch for RNN

### Update $x \leftarrow x + \alpha \nabla_x L(\theta, x)|_x$ for RNN