In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Numpy/pandas array to torch tensor

Array (matix)

$$\left[\begin{array}{lll}
1 & 0 & 1\\
2 & 1 & 1
\end{array}\right]
$$

In `numpy`:

In [2]:
a = np.array([[1, 0, 1], [2, 1, 1]])
a

array([[1, 0, 1],
       [2, 1, 1]])

We can transform it to `pytorch` __tensor__.

In [3]:
b = torch.from_numpy(a)
b

tensor([[1, 0, 1],
        [2, 1, 1]])

### BTW
You can do something similar with `pandas.DataFrame`>

In [4]:
df = pd.DataFrame({"a": [0, 1, 2], "b": [2, 3, 4]})
df_torch = torch.from_numpy(df.values)
df_torch

tensor([[0, 2],
        [1, 3],
        [2, 4]])

### Exercise

Create array and transform it to a tensor:

$$\left[\begin{array}{lll}
1 & 0 & 1 & 1\\
2 & 1 & 1 & 1
\end{array}\right]
$$

In [5]:
sample = np.array([[1,0,1,1],[2,1,1,1]])
sample = torch.from_numpy(sample)
sample

tensor([[1, 0, 1, 1],
        [2, 1, 1, 1]])

### They share memory!

In [9]:
a[0, 0] = 666
b

tensor([[666,   0,   1],
        [  2,   1,   1]])

In [10]:
df_torch[0, 0] = 666
df

Unnamed: 0,a,b
0,666,2
1,1,3
2,2,4


### Other ways

#### From python list

In [11]:
c = torch.tensor([[0, 1, 0], [1, 1, 1]])
c

tensor([[0, 1, 0],
        [1, 1, 1]])

#### tensor of ones and zeros

In [9]:
d = torch.ones(size=[2, 3])
d 

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [10]:
torch.zeros(size=[2, 3])

tensor([[0., 0., 0.],
        [0., 0., 0.]])

#### Eye tensor

In [11]:
e = torch.eye(n=3)
e

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

#### Sequence

In [12]:
torch.arange(1, 11)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

#### normal sampling

In [13]:
f = torch.normal(mean=torch.zeros(size=[2, 3]), std=torch.ones(size=[2, 3]))
f

tensor([[-0.3164, -0.7827,  0.5904],
        [-0.7115,  0.4695, -0.6747]])

In [75]:
torch.randn(2, 3)

tensor([[0.4742, 0.8311, 2.0675],
        [0.6166, 0.9869, 0.5459]])

## Exercise

Create few matrices. 

In [14]:
f = torch.normal(mean=torch.ones(size=[2, 3]), std=-10*torch.ones(size=[2, 3]))
f

tensor([[-11.0713,  -5.0598,  -2.1445],
        [ -0.3915,  12.0287,   7.0188]])

## More than two dimensions 

In [15]:
f = torch.tensor(
    [
        [[0, 1, 0], [1, 1, 1]],
        [[1, 2, 3], [4, 5, 6]]      
    ]

)
f

tensor([[[0, 1, 0],
         [1, 1, 1]],

        [[1, 2, 3],
         [4, 5, 6]]])

In [16]:
f.shape

torch.Size([2, 2, 3])

## Operations

In [17]:
b[0, 0] = 1
print(b)
print(c)

tensor([[1, 0, 1],
        [2, 1, 1]])
tensor([[0, 1, 0],
        [1, 1, 1]])


### sums

#### pointwise

In [18]:
b + c

tensor([[1, 1, 1],
        [3, 2, 2]])

#### total sum

In [19]:
b.sum()

tensor(6)

#### sums of rows (columns...)

In [20]:
b.sum(dim=1)

tensor([2, 4])

### scalar muliplication

In [21]:
b

tensor([[1, 0, 1],
        [2, 1, 1]])

In [22]:
2 * b

tensor([[2, 0, 2],
        [4, 2, 2]])

### pointwise multiplication

In [23]:
b * c

tensor([[0, 0, 0],
        [2, 1, 1]])

### dot product

In [24]:
print(b[1])
print(c[1])

tensor([2, 1, 1])
tensor([1, 1, 1])


In [25]:
torch.dot(b[1], c[1])

tensor(4)

### transpose

In [26]:
c.t()

tensor([[0, 1],
        [1, 1],
        [0, 1]])

In [27]:
c.permute([1, 0])

tensor([[0, 1],
        [1, 1],
        [0, 1]])

In [53]:
f = torch.tensor(
    [
        [[0, 1, 0, 1], [1, 1, 1, 1], [2, 2, 3, 3]],
        [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]      
    ]

)
f

tensor([[[ 0,  1,  0,  1],
         [ 1,  1,  1,  1],
         [ 2,  2,  3,  3]],

        [[ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 9, 10, 11, 12]]])

In [54]:
f.permute([1, 2, 0])

tensor([[[ 0,  1],
         [ 1,  2],
         [ 0,  3],
         [ 1,  4]],

        [[ 1,  5],
         [ 1,  6],
         [ 1,  7],
         [ 1,  8]],

        [[ 2,  9],
         [ 2, 10],
         [ 3, 11],
         [ 3, 12]]])

### Matrix multiplication

In [55]:
torch.matmul(b, c.t())

tensor([[0, 2],
        [1, 4]])

In [56]:
c.t().unsqueeze(0).shape

torch.Size([1, 3, 2])

### unsqueeze

In [57]:
b.unsqueeze(1)

tensor([[[1, 0, 1]],

        [[2, 1, 1]]])

In [58]:
c.t().unsqueeze(0)

tensor([[[0, 1],
         [1, 1],
         [0, 1]]])

In [59]:
torch.matmul(b, c.t().unsqueeze(0)).shape

torch.Size([1, 2, 2])

#### concatenation

In [69]:
print(b)
print(c)

tensor([[1, 0, 1],
        [2, 1, 1]])
tensor([[0, 1, 0],
        [1, 1, 1]])


In [67]:
torch.cat([b, c], 0)

tensor([[1, 0, 1],
        [2, 1, 1],
        [0, 1, 0],
        [1, 1, 1]])

In [68]:
torch.cat([b, c], 1)

tensor([[1, 0, 1, 0, 1, 0],
        [2, 1, 1, 1, 1, 1]])

#### view

In [73]:
g = torch.arange(1, 13)
g.view(-1, 2)

tensor([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10],
        [11, 12]])

In [74]:
g.view(2, 3, -1)

tensor([[[ 1,  2],
         [ 3,  4],
         [ 5,  6]],

        [[ 7,  8],
         [ 9, 10],
         [11, 12]]])

### Exercises

1. Create a 5x5 zero tensor with elements on the main diagonal equal to 1, 2, 3, 4, 5.

In [52]:
mtx = torch.eye(n=5)
for i in range(5): mtx[i,i] = i+1

tensor([[1., 0., 0., 0., 0.],
        [0., 2., 0., 0., 0.],
        [0., 0., 3., 0., 0.],
        [0., 0., 0., 4., 0.],
        [0., 0., 0., 0., 5.]])

## Drivatives: How autograd works

$$x_1 = \left[\begin{array}{l}
x_1^1\\
x_1^2
\end{array}\right]=
\left[\begin{array}{l}
0.1\\
0.2 
\end{array}\right]
$$

$$ x_2 = \left[\begin{array}{l}
x_2^1\\
x_2^2
\end{array}\right]=
\left[\begin{array}{l}
0.1\\
0.1 
\end{array}\right]
$$

$$y = 3\cdot x_1 + x_2 +  
\left[\begin{array}{l}
    1\\
    1
\end{array}\right] = 
\left[\begin{array}{l}
    3x_1^1 + x_1^1 + 1\\
    3x_1^2 + x_2^2 + 1
\end{array}\right]$$

$$f(x_1, x_2) = \textrm{mean}(y) = \frac{1}{2}(3x_1^1 + x_1^1 + 1 + 3x_1^2 + x_2^2 + 1)$$

$$(\nabla f)_{x_1} = \left[\begin{array}{l}
    \frac{\partial f}{\partial x_1^1}\\
    \frac{\partial f}{\partial x_1^2}
\end{array}\right] = 
\left[\begin{array}{l}
    1.5\\
    1.5
\end{array}\right] 
$$

### Now in pytorch

In [60]:
x1 = torch.tensor([0.1, 0.2], requires_grad=True)
x2 = torch.tensor([0.1, 0.1], requires_grad=True)
y = 3 * x1 + x2 + 1
y

tensor([1.4000, 1.7000], grad_fn=<AddBackward0>)

In [61]:
output = y.mean()

output.backward()
print(x1.grad, x2.grad)

tensor([1.5000, 1.5000]) tensor([0.5000, 0.5000])


The method `backward` calculates gradients. __It does not put them to zero!__ But there is a convinient method for that.

In [62]:
y = 3 * x1 + x2 + 1
output = y.mean()
output.backward()
print(x1.grad, x2.grad)

tensor([3., 3.]) tensor([1., 1.])


## Let's check if more complicated derivatives are calculated correctly

We start with $x = 2$.

$y = f(x) = x^2 - 3$, $f(2) = 2^2 - 3 = 1$.

$z = g(y) = y^3 - 2y$, $g(f(2)) = g(1) = 1^3 - 2\cdot 1 = -1$

In [63]:
x = torch.tensor([2.0], requires_grad=True)
print("x = ", x)
y = x**2 - 3
print("y = ", y)
z = y**3 - 2*y
print("z = ", z)


x =  tensor([2.], requires_grad=True)
y =  tensor([1.], grad_fn=<SubBackward0>)
z =  tensor([-1.], grad_fn=<SubBackward0>)



#### Chain rule
$(g(f(x))' = g'(f(x))\cdot f'(x)$

#### Derivaties
$f'(x) = 2x$, $f'(2) = 2\cdot 2 = 4$

$g'(y) = 3y^2 - 2$, $g'(f(2)) = g'(1) = 3 - 2 = 1$

$(g(f(2))'=g'(f(2))\cdot f(2)) = 4 \cdot 1 = 4$

In [64]:
z.backward()
x.grad

tensor([4.])