## MLP


In [0]:
import torch
from torch import nn
from tqdm.notebook import tqdm # easily showing epochsd
import numpy as np

from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from torch import optim

In [0]:
class MLP(nn.Module):
  def __init__(self, input_size, hidden_sizes, num_classes):
    super(MLP, self).__init__()

    hidden_sizes = [input_size] + hidden_sizes
    layers = []
    for i in range(1, len(hidden_sizes)):
      in_s = hidden_sizes[i-1]
      out_s = hidden_sizes[i]
      layers += [nn.Linear(in_s, out_s)]
      layers += [nn.ReLU()]
    layers += [nn.Linear(out_s, num_classes)]
    layers += [nn.Softmax(dim=-1)]

    self.net = nn.Sequential(*layers)

  def forward(self, x):
      return self.net(x)

In [3]:
mlp = MLP(784, [100, 20], 10)
print(mlp)

MLP(
  (net): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=10, bias=True)
    (5): Softmax(dim=-1)
  )
)


In [0]:
mnist = MNIST('', train=False, transform=ToTensor(), download=True)

In [5]:
mnist

Dataset MNIST
    Number of datapoints: 10000
    Root location: 
    Split: Test
    StandardTransform
Transform: ToTensor()

In [6]:
train_data, test_data = random_split(mnist, [6000, 4000])
len(train_data), len(test_data)

(6000, 4000)

In [0]:
train_loader = DataLoader(train_data, batch_size=64, pin_memory=True, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, pin_memory=True, shuffle=True)

In [8]:
for batch in train_loader:
  x, y = batch
  print(x.shape, y.shape)
  break

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp.parameters(), lr=1e-4)
mlp.cuda()

MLP(
  (net): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=10, bias=True)
    (5): Softmax(dim=-1)
  )
)

In [22]:
mlp.train()
for epoch in tqdm(range(10)):
  losses = []
  mlp.train()
  for batch in train_loader:
    optimizer.zero_grad()
    x, y = batch
    X = x.view(x.shape[0], -1)
    o = mlp(X.float().cuda())
    loss = criterion(o.float(), y.cuda().long()) # both has to be on cpu or gpu (one device)
    loss.backward()
    optimizer.step()

    losses += [loss.item()]
  print(epoch+1, 'loss', np.mean(losses))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

1 loss 2.2359932534238127
2 loss 2.1148212032115206
3 loss 1.9904633709724913
4 loss 1.8877426677561822
5 loss 1.8191057380209579
6 loss 1.774235334802181
7 loss 1.7442710399627686
8 loss 1.7227711804369663
9 loss 1.7064895921565117
10 loss 1.6934078120170755



## CNN

In [0]:
class CNN(nn.Module):
    def __init__(self, input_size, num_classes, in_channel, out_channels, kernels, strides, dropouts):
        super(CNN, self).__init__()

        layers = []
        for i in range(len(out_channels)):
            in_c = in_channel if i == 0 else out_channels[i-1]
            out_c = out_channels[i]
            k = kernels[i]
            s = strides[i]
            d = dropouts[i]
            input_size = input_size // s  # 	Floor division
            layers += [nn.Conv2d(in_c, out_c, k, s, padding=(k-s+1)//2), 
                       nn.BatchNorm2d(out_c),
                       nn.ReLU(),
                       nn.Dropout(d)]
        layers += [nn.Flatten(), nn.Linear(out_c*input_size*input_size, num_classes), nn.BatchNorm1d(num_classes), nn.Softmax()]
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.net(x)