In [None]:
import torch

# Solving a simple linear system with pytorch and gradient descent

$$w+3x+2y-z=9\\
5w+2x+y-2z=4\\
x+2y+4z=24\\
w+x-y-3z=-12$$

Above is a series of linear equations that are represented in the matrice format: `Ax = b`. We will first represent each matrix as a torch tensor.

$$
A=
\begin{bmatrix}
1 & 3 & 2 & -1\\
5 & 2 & 1 & -2\\
0 & 1 & 2 & 4\\
1 & 1 & -1 & -3
\end{bmatrix}
$$

In [None]:
A = torch.tensor([[1,3,2,-1],[5,2,1,-2],[0,1,2,4],[1,1,-1,-3]]).float()
A

tensor([[ 1.,  3.,  2., -1.],
        [ 5.,  2.,  1., -2.],
        [ 0.,  1.,  2.,  4.],
        [ 1.,  1., -1., -3.]])

$$
\mathbf{y}=
\begin{bmatrix}
9\\
4\\
24\\
-12
\end{bmatrix}
$$


In [None]:
y = torch.tensor([9,4,24,-12]).float()
y

tensor([  9.,   4.,  24., -12.])

$$
\mathbf{x}=
\begin{bmatrix}
w\\
x\\
y\\
z
\end{bmatrix}
$$

For x, we do not know the specific numbers, so first we will initialise random numbers

In [None]:
x = torch.randn([4], requires_grad=True)
x

tensor([-0.2719, -0.1371,  0.5924, -0.0146], requires_grad=True)

$$L = ||A\mathbf{x}-\mathbf{y}||^2_2$$

For loss, we are using L2 Norm aka Mean Square Error. We will take the difference between our actual `y` and our predicted y, `Ax` and square it to allow it to be positive. Absolute is not used as it is harder to differentiate it

In [None]:
loss = torch.nn.MSELoss()
L = loss(torch.matmul(A, x), y)
L

tensor(187.1334, grad_fn=<MseLossBackward0>)

In [None]:
optimizer = torch.optim.SGD([x], lr=0.01, momentum=0.9)

In [None]:
for i in range(1000):
    optimizer.zero_grad()
    L = loss(torch.matmul(A, x), y)
    L.backward()
    optimizer.step()

In [None]:
x

tensor([1.0000, 2.0000, 3.0000, 4.0000], requires_grad=True)

Now we know how gradient descent works, let us do something harder: Creating a NN

# Create a Neural Network

In [None]:
import os
from torch import nn
from torch.utils.data import DataLoader

We define our neural network by subclassing `nn.Module`, and
initialize the neural network layers in `__init__`. `forward` defines how the data will be pass through the neural network

In [None]:
# DO NOT RUN THIS AT ALL
class Ne(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
      pass

We are making a model to classify MNIST (Some number dataset 🙄) which contains images of numbers. First, we need to transform the image to numbers. We can do this by flattening a 28 by 28 image to a 784 arraylist, with the number being a grayscale number from 0 to 255. Let us add that in!

In [None]:
# DO NOT RUN THIS AT ALL
class Neu(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()

    def forward(self, x):
      pass

We added `Flatten` which causes the image to be 1 flat array

Now we need some layers of nodes. Luckily, torch is a good library and gives us the method `linear` to create a layer of nodes

```
nn.Linear(in_features=(number of input nodes), out_features=(number of output nodes))
```

We also need to make sure the output goes through a non-linear function to allow our Neural Network to model any functions. We can use `relu` activation function for this.

```
nn.ReLU()(input)
```


However, at the output, we cannot just give numbers, what do they mean? We need to scale them to between 0 and 1 to represent the probability of each class. `dim` is the dimension along which the inputs are softmaxxed

```
nn.Softmax(dim=n)()
```

Finally, how do we make sure that it goes through all the layers sequentially. We can use `nn.Sequential` which is a container of layers that the data passes through

In [None]:
# DO NOT RUN THIS AT ALL
class Neural(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
            nn.Softmax
        )
                
    def forward(self, x):
      pass

Now let us finish it by coding the forward function

In [None]:
class Neural(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.stack = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
            nn.Softmax(dim=1)
        )
                
    def forward(self, x):
      x = self.flatten(x)
      perc = self.stack(x)
      return perc