A notebook to test Pytorch.

In [53]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import numpy as np
import os
import torch

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [54]:
import matplotlib
import matplotlib.pyplot as plt

# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 24
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [55]:
torch.zeros(3, device=torch.device('cpu'))

tensor([0., 0., 0.])

In [56]:
w = torch.tensor([3,5,4.0])
w.dot(torch.ones(3))

tensor(12.)

## Torch 2d to 1d and vice versa

In [57]:
torch.eye(3).matmul(torch.tensor([[2., 3, 1]]).T)

tensor([[2.],
        [3.],
        [1.]])

In [58]:
torch.tensor([[2., 3, 1]]).T

tensor([[2.],
        [3.],
        [1.]])

In [59]:
V = torch.randn(3, 1)
V

tensor([[ 0.9358],
        [-0.7846],
        [ 2.3494]])

In [60]:
V.reshape(-1)

tensor([ 0.9358, -0.7846,  2.3494])

In [61]:
v = torch.rand(4)
v

tensor([0.6966, 0.9078, 0.7852, 0.7538])

In [62]:
v - 1

tensor([-0.3034, -0.0922, -0.2148, -0.2462])

In [63]:
v.reshape(4, 1)

tensor([[0.6966],
        [0.9078],
        [0.7852],
        [0.7538]])

## Torch distributions
https://pytorch.org/docs/stable/distributions.html

In [64]:
import torch.distributions as dists

In [65]:
dis = dists.Normal(0, 1)
dis.log_prob(torch.ones(5))

tensor([-1.4189, -1.4189, -1.4189, -1.4189, -1.4189])

### Grad of log (Normal density)

$$ \nabla_x \log p(x) = -x$$ where $p(x) = \mathcal{N}(0,1)$.

Input is a scalar

In [66]:
x = torch.tensor([3.0], requires_grad=True)
y = 2*x
lp = dis.log_prob(y)

In [67]:
lp.backward(retain_graph=True) 
x.grad

tensor([-12.])

In [68]:
torch.autograd.grad([lp], [x, y], retain_graph=True, only_inputs=True)

(tensor([-12.]), tensor([-6.]))

Input is n x 1

In [69]:
X = torch.tensor([[-3, 1.0, 2]], requires_grad=True).T
Lp = dis.log_prob(X)
sum_lp = torch.sum(Lp)

In [70]:
torch.autograd.grad(sum_lp, X, retain_graph=True, only_inputs=True)

(tensor([[ 3.],
         [-1.],
         [-2.]]),)

## Multivariate normal

Assume $p(x) = \mathcal{N}(x \mid \mu, I)$. Then

$$ \nabla_x \log p(x) = -(x-\mu) $$

In [71]:
mdist = dists.MultivariateNormal(
    loc=torch.tensor([1, 2]), covariance_matrix=torch.eye(2))

X = torch.tensor([
    [0, 0.0],
    [0, 1.0],
    [1.0, 4],
])
X.requires_grad = True
logp = mdist.log_prob(X)
torch.autograd.grad(torch.sum(logp), X, retain_graph=True, only_inputs=True)[0]

tensor([[ 1.,  2.],
        [ 1.,  1.],
        [ 0., -2.]])

## Univariate normal

In [72]:
x = torch.randn(5, 1)
stdnorm = dists.Normal(0,1)
stdnorm.log_prob(x)

tensor([[-1.0063],
        [-0.9370],
        [-0.9294],
        [-1.1526],
        [-1.1304]])

In [73]:
var = 2.8
stdev = var**0.5
norm = dists.Normal(0, scale=stdev)
norm.log_prob(x)

tensor([[-1.4649],
        [-1.4402],
        [-1.4375],
        [-1.5172],
        [-1.5093]])

In [74]:
stdnorm.log_prob(x/stdev) - torch.log(torch.tensor(stdev))

tensor([[-1.4649],
        [-1.4402],
        [-1.4375],
        [-1.5172],
        [-1.5093]])

In [75]:
x.shape == [5]

False

In [76]:
torch.all(x <= 0).item()

False

## Uniform distribution

In [77]:
U = dists.Uniform(low=-3.0, high=3.0)


## Broadcasting

In [78]:
a = torch.arange(1, 4).reshape(3, 1)
a

tensor([[1],
        [2],
        [3]])

In [79]:
b = torch.arange(3, 7).reshape(1, 4)
b

tensor([[3, 4, 5, 6]])

In [80]:
a+b

tensor([[4, 5, 6, 7],
        [5, 6, 7, 8],
        [6, 7, 8, 9]])

In [81]:
a + b.reshape(-1)

tensor([[4, 5, 6, 7],
        [5, 6, 7, 8],
        [6, 7, 8, 9]])

In [82]:
torch.exp

<function _VariableFunctions.exp>

In [83]:
torch.zeros((3,4))

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

Subtract each row

In [84]:
A = torch.arange(18).reshape(6, 3)
A

tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]])

In [85]:
m = torch.tensor([1, 2, 3.0])
A -m 

tensor([[-1., -1., -1.],
        [ 2.,  2.,  2.],
        [ 5.,  5.,  5.],
        [ 8.,  8.,  8.],
        [11., 11., 11.],
        [14., 14., 14.]])

## Remove diagonal

In [86]:
A = torch.arange(16).reshape(4,4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]])

In [87]:
diagA = torch.diagflat(torch.diag(A))
diagA

tensor([[ 0,  0,  0,  0],
        [ 0,  5,  0,  0],
        [ 0,  0, 10,  0],
        [ 0,  0,  0, 15]])

In [88]:
A - diagA

tensor([[ 0,  1,  2,  3],
        [ 4,  0,  6,  7],
        [ 8,  9,  0, 11],
        [12, 13, 14,  0]])

## Multinomial

In [89]:
d = 5
m = 20
mult = dists.multinomial.Multinomial(total_count=d, probs=torch.ones(d)/d)
mult.sample()

tensor([3., 1., 0., 0., 1.])

## Categorical distribution

In [132]:
cat = dists.Categorical(probs=torch.tensor([0.2, 0.3, 0.4, 0.1]))
cat.sample((10, 1))

tensor([[3],
        [3],
        [2],
        [0],
        [1],
        [0],
        [0],
        [0],
        [1],
        [0]])

In [140]:
s = cat.sample((1,1))
s

tensor([[1]])

In [141]:
s.item()

1

In [150]:
A = torch.arange(20).reshape(1, 4, 5)
A

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]]])

In [153]:
A[:, 1, :]

tensor([[5, 6, 7, 8, 9]])

## Quadratic form
   

In [90]:
w = torch.rand(5)
A = torch.randn(5,5)

In [91]:
A.matmul(w).matmul(w)

tensor(-0.2663)

## Bool to float

In [92]:
v = torch.randn(5)
v

tensor([ 0.1912, -0.3390,  0.2140,  0.1513,  0.6651])

In [93]:
I = v >  0
I

tensor([ True, False,  True,  True,  True])

In [94]:
I.to(torch.float)

tensor([1., 0., 1., 1., 1.])

## Torch scalar to normal float

In [95]:
torch_num = torch.tensor(3.78)
torch_num.item()

3.7799999713897705

## Elementwise product

In [96]:
A = torch.randn(5,2)
B = torch.rand(5, 2)
torch.sum(A*B, 1)

tensor([-0.3653,  0.2048,  0.4840,  1.2905, -0.8223])

## Meshgrid


In [97]:
a = torch.arange(5.0)
print(a)
b = torch.rand(4)
print(b)

tensor([0., 1., 2., 3., 4.])
tensor([0.4774, 0.6320, 0.9676, 0.7630])


In [98]:
torch.meshgrid(a,b)

(tensor([[0., 0., 0., 0.],
         [1., 1., 1., 1.],
         [2., 2., 2., 2.],
         [3., 3., 3., 3.],
         [4., 4., 4., 4.]]), tensor([[0.4774, 0.6320, 0.9676, 0.7630],
         [0.4774, 0.6320, 0.9676, 0.7630],
         [0.4774, 0.6320, 0.9676, 0.7630],
         [0.4774, 0.6320, 0.9676, 0.7630],
         [0.4774, 0.6320, 0.9676, 0.7630]]))

## Clamp values in a tensor


In [99]:
a = torch.tensor([4.0, -2.3, 2])
a

tensor([ 4.0000, -2.3000,  2.0000])

In [100]:
a.clamp_(min=0)

tensor([4., 0., 2.])

## Indexing

In [101]:
A = torch.randn(5,3)
A

tensor([[ 0.2089,  0.2179,  1.4288],
        [-0.6123, -0.8670,  1.1052],
        [-0.8837, -0.5589,  0.2848],
        [-0.8398, -0.6359, -1.3771],
        [-0.1890,  0.0214,  0.3488]])

## Mean and std of data

In [102]:
X = torch.randn(8, 3)
X

tensor([[-2.0473,  0.1458,  0.9283],
        [-0.7038, -0.1357,  1.9979],
        [-0.3363,  2.2553,  1.3484],
        [ 0.0141, -1.0226,  0.4755],
        [-0.6421, -0.9785, -1.9373],
        [-0.8793,  0.4317, -0.6543],
        [-0.8823, -0.6060,  0.0304],
        [ 1.2862, -0.9702,  0.1976]])

In [103]:
torch.min(X)

tensor(-2.0473)

In [104]:
torch.max(torch.std(X, dim=0))

tensor(1.2193)

In [107]:
X

tensor([[-2.0473,  0.1458,  0.9283],
        [-0.7038, -0.1357,  1.9979],
        [-0.3363,  2.2553,  1.3484],
        [ 0.0141, -1.0226,  0.4755],
        [-0.6421, -0.9785, -1.9373],
        [-0.8793,  0.4317, -0.6543],
        [-0.8823, -0.6060,  0.0304],
        [ 1.2862, -0.9702,  0.1976]])

In [112]:
(X <= 2) & (X >= 0)

tensor([[False,  True,  True],
        [False, False,  True],
        [False, False,  True],
        [ True, False,  True],
        [False, False, False],
        [False,  True, False],
        [False, False,  True],
        [ True, False,  True]])

In [114]:
torch.max(X, dim=1)

torch.return_types.max(
values=tensor([ 0.9283,  1.9979,  2.2553,  0.4755, -0.6421,  0.4317,  0.0304,  1.2862]),
indices=tensor([2, 2, 1, 2, 0, 1, 2, 0]))

In [117]:
X.size

<function Tensor.size>

In [156]:
dists.Normal(0, 1)

Normal(loc: 0.0, scale: 1.0)

In [165]:
X.prod(dim=-1)

tensor([-0.2771,  0.1908, -1.0227, -0.0069, -1.2173,  0.2483,  0.0163, -0.2466])

In [171]:
torch.tensor([1,3.0]).reshape(1, 2).repeat(5, 1)

tensor([[1., 3.],
        [1., 3.],
        [1., 3.],
        [1., 3.],
        [1., 3.]])

In [172]:
X

tensor([[-2.0473,  0.1458,  0.9283],
        [-0.7038, -0.1357,  1.9979],
        [-0.3363,  2.2553,  1.3484],
        [ 0.0141, -1.0226,  0.4755],
        [-0.6421, -0.9785, -1.9373],
        [-0.8793,  0.4317, -0.6543],
        [-0.8823, -0.6060,  0.0304],
        [ 1.2862, -0.9702,  0.1976]])