In [1]:
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from PIL import Image

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

try:
    from torchinfo import summary
except ModuleNotFoundError:
    !pip install torchinfo
    from torchinfo import summary

import os
import pathlib
import shutil
import sys

Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1
[0m

In [2]:
"""
Same dir structure as on Kaggle
input/
    lfw-dataset/
        csv files
        lfw-deepfunneled/
working/
    notebook
    data/
        train/
        val/
        test/
"""

'\nSame dir structure as on Kaggle\ninput/\n    lfw-dataset/\n        csv files\n        lfw-deepfunneled/\nworking/\n    notebook\n    data/\n        train/\n        val/\n        test/\n'

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

# https://towardsdatascience.com/pytorch-switching-to-the-gpu-a7c0b21e8a99
# for modifications to use GPU

# Also this: https://github.com/pytorch/examples/blob/main/imagenet/main.py

cuda


In [4]:
data_folder = '../input/lfw-dataset/'

In [5]:
lfw_allnames = pd.read_csv(data_folder+"lfw_allnames.csv")

image_paths = lfw_allnames.loc[lfw_allnames.index.repeat(lfw_allnames['images'])]
image_paths['image_path'] = 1 + image_paths.groupby('name').cumcount()
image_paths['image_path'] = image_paths.image_path.apply(lambda x: str(x).zfill(4))
image_paths['image_path'] = image_paths.name + "/" + image_paths.name + "_" + image_paths.image_path + ".jpg"
image_paths = image_paths.drop("images", axis=1)

In [6]:
num_ppl = 25

print(image_paths['name'].value_counts()[:num_ppl])
list_people = list(image_paths['name'].value_counts()[:num_ppl].keys())
list_num_images = list(image_paths['name'].value_counts()[:num_ppl])
print(list_people, list_num_images)

George_W_Bush                530
Colin_Powell                 236
Tony_Blair                   144
Donald_Rumsfeld              121
Gerhard_Schroeder            109
Ariel_Sharon                  77
Hugo_Chavez                   71
Junichiro_Koizumi             60
Jean_Chretien                 55
John_Ashcroft                 53
Jacques_Chirac                52
Serena_Williams               52
Vladimir_Putin                49
Luiz_Inacio_Lula_da_Silva     48
Gloria_Macapagal_Arroyo       44
Jennifer_Capriati             42
Arnold_Schwarzenegger         42
Laura_Bush                    41
Lleyton_Hewitt                41
Hans_Blix                     39
Alejandro_Toledo              39
Nestor_Kirchner               37
Andre_Agassi                  36
Alvaro_Uribe                  35
Silvio_Berlusconi             33
Name: name, dtype: int64
['George_W_Bush', 'Colin_Powell', 'Tony_Blair', 'Donald_Rumsfeld', 'Gerhard_Schroeder', 'Ariel_Sharon', 'Hugo_Chavez', 'Junichiro_Koizumi', 'Jean_Chre

In [7]:
num_for_each = image_paths['name'].value_counts()[num_ppl-1]
tmp_l = []
for name in list(image_paths['name'].value_counts()[:num_ppl].keys()):
    tmp_l.append(image_paths[image_paths.name==name].sample(num_for_each))
data = pd.concat(tmp_l)
print(data)

                   name                                    image_path
1871      George_W_Bush          George_W_Bush/George_W_Bush_0297.jpg
1871      George_W_Bush          George_W_Bush/George_W_Bush_0227.jpg
1871      George_W_Bush          George_W_Bush/George_W_Bush_0186.jpg
1871      George_W_Bush          George_W_Bush/George_W_Bush_0336.jpg
1871      George_W_Bush          George_W_Bush/George_W_Bush_0045.jpg
...                 ...                                           ...
5039  Silvio_Berlusconi  Silvio_Berlusconi/Silvio_Berlusconi_0016.jpg
5039  Silvio_Berlusconi  Silvio_Berlusconi/Silvio_Berlusconi_0022.jpg
5039  Silvio_Berlusconi  Silvio_Berlusconi/Silvio_Berlusconi_0001.jpg
5039  Silvio_Berlusconi  Silvio_Berlusconi/Silvio_Berlusconi_0024.jpg
5039  Silvio_Berlusconi  Silvio_Berlusconi/Silvio_Berlusconi_0021.jpg

[825 rows x 2 columns]


In [8]:
data_train, data_test = train_test_split(data, test_size=0.2)
data_train, data_val = train_test_split(data_train, test_size=0.2)

In [9]:
print(data_train.shape, data_val.shape, data_test.shape)        # before augmentation

(528, 2) (132, 2) (165, 2)


In [10]:
data_root = './data/'

data_list = [data_train, data_val, data_test]
dirs = ['train', 'val', 'test']

# """             # (un)comment this line (only) and run, to copy

# # remove data directory if it exists
if os.path.exists(data_root) and os.path.isdir(data_root):
    shutil.rmtree(data_root)

transform_augment = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1)
])

for i in range(len(dirs)):
    pathlib.Path(os.path.join(data_root, dirs[i])).mkdir(parents=True, exist_ok=True)
    
    for person in list_people:
        if len(data_train[data_train['name']==person])>0:
            pathlib.Path(os.path.join(data_root, dirs[i], person)).mkdir(parents=True, exist_ok=True)

    for im_path in data_list[i].image_path:
        name = data[data['image_path']==im_path]['name'].iloc[0]
        path_from = os.path.join(data_folder+'/lfw-deepfunneled/lfw-deepfunneled/', im_path)
        filename, file_extension = os.path.splitext(path_from.split('/')[-1])
        path_to = os.path.join(data_root, dirs[i], name)

        if not os.path.isfile(os.path.join(path_to, im_path)):
            shutil.copy(path_from, path_to)         # earlier (just copies image)
            
            # if dirs[i]!='test':                   # test-time augmentation too?
            img = Image.open(path_from)
            img = transform_augment(img)            # transformed image
            img.save(path_to+'/'+filename+'_transformed'+file_extension)

# """

In [11]:
train_path = os.path.join(data_root, dirs[0])
val_path = os.path.join(data_root, dirs[1])
test_path = os.path.join(data_root, dirs[2])

train_transform = transforms.Compose(transforms=[
    # transforms.RandomHorizontalFlip(),
    # transforms.Grayscale(num_output_channels=1),         # convert to grayscale
    transforms.ToTensor(),
    # transforms.Normalize(mean=0, std=255),      # output = (input-mean)/std
])
test_transform = transforms.Compose(transforms=[
    # transforms.Grayscale(num_output_channels=1),         # convert to grayscale
    transforms.ToTensor(),
    # transforms.Normalize(mean=0, std=255)
])

dataloader_kwargs = {
    'pin_memory': True,
    'num_workers': 1,
    'batch_size': 1,
    'shuffle': True
}
non_blocking = dataloader_kwargs['pin_memory']  # https://stackoverflow.com/questions/55563376/

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, train_transform), **dataloader_kwargs
)
val_loader = DataLoader(
    torchvision.datasets.ImageFolder(val_path, test_transform), **dataloader_kwargs
)
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, test_transform), **dataloader_kwargs
)

In [12]:
for data in train_loader:
    print(data[0].shape, data[1].shape)
    # print(data[0], data[1])
    print(torch.mean(data[0]))
    break
# Total train data is of shape (128, 3, 250, 250)

torch.Size([1, 3, 250, 250]) torch.Size([1])
tensor(0.3906)


In [13]:
class FaceCNN_initial(nn.Module):
    def __init__(self, num_input_channels, num_classes, stride=1, padding=1):
        super().__init__()

        self.network = nn.Sequential(

        nn.Conv2d(in_channels=num_input_channels, out_channels=50, kernel_size=3, stride=stride, padding=padding),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Conv2d(in_channels=50, out_channels=20, kernel_size=3, stride=stride, padding=padding),
        nn.ReLU(),

        nn.Flatten(),
        nn.Linear(in_features=20*125*125, out_features=num_classes)

        )

    def forward(self, input):
        output = self.network(input)
        return output

In [14]:
class FaceCNN(nn.Module):
    def __init__(self, num_input_channels, num_classes, stride=1, padding=1):
        super().__init__()

        self.network = nn.Sequential(

            # (250, 250, 3)

            nn.Conv2d(in_channels=num_input_channels, out_channels=64, kernel_size=7, stride=2, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=padding),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=stride, padding=padding),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, stride=stride, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            nn.Flatten(),
            nn.Linear(in_features=14400, out_features=1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),    # https://stats.stackexchange.com/questions/240305/
            nn.Linear(in_features=1024, out_features=64),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=64, out_features=num_classes),

        )

    def forward(self, input):
        output = self.network(input)
        return output

In [20]:
num_input_channels = 3
model = FaceCNN(num_input_channels=num_input_channels, num_classes=len(list_people)).to(device)
# for e in model.parameters():
#     print(e)
print(summary(model, input_size=(dataloader_kwargs['batch_size'], num_input_channels, 250, 250)))

# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)
loss_fn = nn.CrossEntropyLoss()
num_epochs = 30

Layer (type:depth-idx)                   Output Shape              Param #
FaceCNN                                  [1, 25]                   --
├─Sequential: 1-1                        [1, 25]                   --
│    └─Conv2d: 2-1                       [1, 64, 123, 123]         9,472
│    └─BatchNorm2d: 2-2                  [1, 64, 123, 123]         128
│    └─ReLU: 2-3                         [1, 64, 123, 123]         --
│    └─MaxPool2d: 2-4                    [1, 64, 61, 61]           --
│    └─Dropout: 2-5                      [1, 64, 61, 61]           --
│    └─Conv2d: 2-6                       [1, 128, 61, 61]          73,856
│    └─BatchNorm2d: 2-7                  [1, 128, 61, 61]          256
│    └─ReLU: 2-8                         [1, 128, 61, 61]          --
│    └─MaxPool2d: 2-9                    [1, 128, 30, 30]          --
│    └─Dropout: 2-10                     [1, 128, 30, 30]          --
│    └─Conv2d: 2-11                      [1, 256, 30, 30]          295,168
│

In [21]:
def evaluate(loader, model):

    model.eval()

    score = 0
    cnt = 0

    with torch.no_grad():       # not training, so no need to calculate gradients
        for inputs, labels in loader:
            inputs, labels = inputs.to(device, non_blocking=non_blocking), labels.to(device, non_blocking=non_blocking)
            output = model(inputs)
            _, pred = torch.max(output.data, 1)
            score += float(torch.sum(pred==labels.data))
            cnt += data[0].shape[0]

    return score/cnt

In [22]:
def train():
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        train_score = 0
        cnt = 0
        train_loss = 0

        model.train()
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device, non_blocking=non_blocking), labels.to(device, non_blocking=non_blocking)

            optimizer.zero_grad()
            
            outputs = model(inputs)
            
            # print(outputs, labels)

            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            _, preds = torch.max(outputs.data, 1)
            train_score += float(torch.sum(preds==labels.data))
            cnt += inputs.shape[0]

            # print(preds, labels)

        train_acc = train_score/cnt
        val_acc = evaluate(val_loader, model)
        
        print("Epoch:", epoch, "\tLoss:", train_loss, "\tTraining Acc:", train_acc, "\tVal Acc:", val_acc)

        if val_acc > best_acc:
            torch.save(model.state_dict(),'best.model')
            best_acc = val_acc

In [23]:
train()

Epoch: 0 	Loss: 3413.37361741066 	Training Acc: 0.041666666666666664 	Val Acc: 0.022727272727272728
Epoch: 1 	Loss: 3346.7614665031433 	Training Acc: 0.07196969696969698 	Val Acc: 0.05303030303030303
Epoch: 2 	Loss: 3201.3032276034355 	Training Acc: 0.09753787878787878 	Val Acc: 0.10606060606060606
Epoch: 3 	Loss: 3079.741751715541 	Training Acc: 0.12973484848484848 	Val Acc: 0.125
Epoch: 4 	Loss: 2865.667134359479 	Training Acc: 0.17518939393939395 	Val Acc: 0.18181818181818182
Epoch: 5 	Loss: 2718.3486230820417 	Training Acc: 0.19412878787878787 	Val Acc: 0.19696969696969696
Epoch: 6 	Loss: 2579.4593008980155 	Training Acc: 0.22443181818181818 	Val Acc: 0.25
Epoch: 7 	Loss: 2391.6380325537175 	Training Acc: 0.29829545454545453 	Val Acc: 0.30303030303030304
Epoch: 8 	Loss: 2201.1669663584325 	Training Acc: 0.3484848484848485 	Val Acc: 0.23863636363636365
Epoch: 9 	Loss: 2032.1635219482705 	Training Acc: 0.38920454545454547 	Val Acc: 0.3143939393939394
Epoch: 10 	Loss: 1865.40173449966

In [24]:
best_model = torch.load('best.model')
score = 0
cnt = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device, non_blocking=non_blocking), labels.to(device, non_blocking=non_blocking)
        output = model(inputs)
        _, pred = torch.max(output.data, 1)
        score += float(torch.sum(pred==labels.data))
        cnt += data[0].shape[0]

print(score/cnt)

0.5757575757575758


### Train Stats

```
num_ppl=4, num_for_each=100, num_input_channels=3, SGD

Epoch: 0 	Loss: 358.62260937690735 	Training Acc: 0.3046875 	Val Acc: 0.390625
Epoch: 1 	Loss: 333.57504665851593 	Training Acc: 0.4453125 	Val Acc: 0.5
Epoch: 2 	Loss: 287.8054849989712 	Training Acc: 0.53515625 	Val Acc: 0.484375
Epoch: 3 	Loss: 214.57957464270294 	Training Acc: 0.66796875 	Val Acc: 0.546875
Epoch: 4 	Loss: 128.76155146129895 	Training Acc: 0.8515625 	Val Acc: 0.6875
Epoch: 5 	Loss: 66.8646472900873 	    Training Acc: 0.9375 	    Val Acc: 0.609375
Epoch: 6 	Loss: 38.552940751942515 	Training Acc: 0.96484375 	Val Acc: 0.65625
Epoch: 7 	Loss: 14.400232573081496 	Training Acc: 0.99609375 	Val Acc: 0.640625
Epoch: 8 	Loss: 6.6097561110264 	    Training Acc: 1.0 	        Val Acc: 0.71875
Epoch: 9 	Loss: 3.2570621859687208 	Training Acc: 1.0 	        Val Acc: 0.6875
Epoch: 10 	Loss: 2.2477716574980775 	Training Acc: 1.0 	        Val Acc: 0.6875
Epoch: 11 	Loss: 1.6625592309815147 	Training Acc: 1.0 	        Val Acc: 0.734375
Epoch: 12 	Loss: 1.2643641760919024 	Training Acc: 1.0 	        Val Acc: 0.6875
Epoch: 13 	Loss: 1.0747595029670762 	Training Acc: 1.0 	        Val Acc: 0.71875
Epoch: 14 	Loss: 0.8993665239690074 	Training Acc: 1.0 	        Val Acc: 0.6875

Test Acc: 0.85
```
<hr>

```
Larger network: num_ppl=25, num_for_each=33, num_input_channels=3, SGD, dropout=0.5, no dropout after conv layers

Epoch: 0 	Loss: 1701.3236393928528 	Training Acc: 0.05113636363636364 	Val Acc: 0.08333333333333333
Epoch: 1 	Loss: 1648.1168529987335 	Training Acc: 0.08143939393939394 	Val Acc: 0.13636363636363635
Epoch: 2 	Loss: 1570.683046221733 	Training Acc: 0.11742424242424243 	Val Acc: 0.16666666666666666
Epoch: 3 	Loss: 1488.0452314019203 	Training Acc: 0.18181818181818182 	Val Acc: 0.15151515151515152
Epoch: 4 	Loss: 1359.583057552576 	Training Acc: 0.22348484848484848 	Val Acc: 0.17424242424242425
Epoch: 5 	Loss: 1259.4556982889771 	Training Acc: 0.3068181818181818 	Val Acc: 0.25757575757575757
Epoch: 6 	Loss: 1127.6610003113747 	Training Acc: 0.3712121212121212 	Val Acc: 0.2727272727272727
Epoch: 7 	Loss: 1039.084867735859 	Training Acc: 0.4090909090909091 	Val Acc: 0.3333333333333333
Epoch: 8 	Loss: 917.1557948449627 	Training Acc: 0.4791666666666667 	Val Acc: 0.2878787878787879
Epoch: 9 	Loss: 827.8774750904413 	Training Acc: 0.571969696969697 	Val Acc: 0.45454545454545453
Epoch: 10 	Loss: 673.7398463344434 	Training Acc: 0.615530303030303 	Val Acc: 0.38636363636363635
Epoch: 11 	Loss: 606.598319072742 	Training Acc: 0.678030303030303 	Val Acc: 0.4318181818181818
Epoch: 12 	Loss: 502.92579286664113 	Training Acc: 0.7064393939393939 	Val Acc: 0.44696969696969696
Epoch: 13 	Loss: 412.34249426988936 	Training Acc: 0.7765151515151515 	Val Acc: 0.45454545454545453
Epoch: 14 	Loss: 329.6107653811632 	Training Acc: 0.8143939393939394 	Val Acc: 0.5303030303030303
Epoch: 15 	Loss: 263.91059577577107 	Training Acc: 0.8731060606060606 	Val Acc: 0.5303030303030303
Epoch: 16 	Loss: 248.08312809604558 	Training Acc: 0.8712121212121212 	Val Acc: 0.4621212121212121
Epoch: 17 	Loss: 197.47446866694008 	Training Acc: 0.9090909090909091 	Val Acc: 0.5227272727272727
Epoch: 18 	Loss: 172.28715605471052 	Training Acc: 0.9223484848484849 	Val Acc: 0.5227272727272727
Epoch: 19 	Loss: 133.61397156973908 	Training Acc: 0.9375 	Val Acc: 0.5681818181818182

Test Acc: 0.59
```
<hr>

Lesser dropout, faster convergence (more fitting: 0.99 train acc, 0.58 val and test). Ofc not good always.

<hr>

```
Added dropout layers (p=0.2) after convolution: Train and val acc stay close for the first few (~10) epochs, but then diverge.
    30 epochs. Test acc: 0.38.
    50 epochs. Test acc: 0.55.

Epoch: 0 	Loss: 1706.7114553451538 	Training Acc: 0.05303030303030303 	Val Acc: 0.06060606060606061
Epoch: 1 	Loss: 1703.110694885254 	Training Acc: 0.045454545454545456 	Val Acc: 0.06818181818181818
Epoch: 2 	Loss: 1684.9260246753693 	Training Acc: 0.07196969696969698 	Val Acc: 0.09090909090909091
Epoch: 3 	Loss: 1654.7403428554535 	Training Acc: 0.06818181818181818 	Val Acc: 0.12121212121212122
Epoch: 4 	Loss: 1600.201600074768 	Training Acc: 0.11931818181818182 	Val Acc: 0.1590909090909091
Epoch: 5 	Loss: 1548.975799292326 	Training Acc: 0.13636363636363635 	Val Acc: 0.11363636363636363
Epoch: 6 	Loss: 1498.9787369072437 	Training Acc: 0.13446969696969696 	Val Acc: 0.19696969696969696
Epoch: 7 	Loss: 1436.561501070857 	Training Acc: 0.20643939393939395 	Val Acc: 0.22727272727272727
Epoch: 8 	Loss: 1384.87419853732 	Training Acc: 0.23863636363636365 	Val Acc: 0.19696969696969696
Epoch: 9 	Loss: 1319.722053207457 	Training Acc: 0.26136363636363635 	Val Acc: 0.23484848484848486
Epoch: 10 	Loss: 1251.95867273584 	Training Acc: 0.2859848484848485 	Val Acc: 0.30303030303030304
Epoch: 11 	Loss: 1211.6356632895768 	Training Acc: 0.29734848484848486 	Val Acc: 0.25757575757575757
Epoch: 12 	Loss: 1182.6897227037698 	Training Acc: 0.29734848484848486 	Val Acc: 0.3181818181818182
Epoch: 13 	Loss: 1113.2747115662205 	Training Acc: 0.3484848484848485 	Val Acc: 0.29545454545454547
Epoch: 14 	Loss: 1053.397371711675 	Training Acc: 0.3939393939393939 	Val Acc: 0.3787878787878788
Epoch: 15 	Loss: 958.3644631365314 	Training Acc: 0.4431818181818182 	Val Acc: 0.3484848484848485
Epoch: 16 	Loss: 938.4995879707858 	Training Acc: 0.4602272727272727 	Val Acc: 0.36363636363636365
Epoch: 17 	Loss: 866.5983875243692 	Training Acc: 0.48863636363636365 	Val Acc: 0.4015151515151515
Epoch: 18 	Loss: 822.6497887708247 	Training Acc: 0.5113636363636364 	Val Acc: 0.3939393939393939
Epoch: 19 	Loss: 782.9832759417477 	Training Acc: 0.5435606060606061 	Val Acc: 0.3939393939393939
Epoch: 20 	Loss: 698.3823288148269 	Training Acc: 0.5890151515151515 	Val Acc: 0.3787878787878788
Epoch: 21 	Loss: 658.3069786201813 	Training Acc: 0.6231060606060606 	Val Acc: 0.4090909090909091
Epoch: 22 	Loss: 569.9304089847283 	Training Acc: 0.634469696969697 	Val Acc: 0.4393939393939394
Epoch: 23 	Loss: 622.2785371405917 	Training Acc: 0.6420454545454546 	Val Acc: 0.4772727272727273
Epoch: 24 	Loss: 520.0580716890399 	Training Acc: 0.6761363636363636 	Val Acc: 0.4696969696969697
Epoch: 25 	Loss: 448.39510909226374 	Training Acc: 0.75 	Val Acc: 0.45454545454545453
Epoch: 26 	Loss: 429.5296724770451 	Training Acc: 0.759469696969697 	Val Acc: 0.5
Epoch: 27 	Loss: 380.9049152136067 	Training Acc: 0.7746212121212122 	Val Acc: 0.5
Epoch: 28 	Loss: 356.775621923327 	Training Acc: 0.8068181818181818 	Val Acc: 0.5303030303030303
Epoch: 29 	Loss: 363.29747000594216 	Training Acc: 0.7859848484848485 	Val Acc: 0.4772727272727273
Epoch: 30 	Loss: 297.8134929970547 	Training Acc: 0.8314393939393939 	Val Acc: 0.5
Epoch: 31 	Loss: 282.2684114029398 	Training Acc: 0.8541666666666666 	Val Acc: 0.4621212121212121
Epoch: 32 	Loss: 246.5776662196622 	Training Acc: 0.8522727272727273 	Val Acc: 0.4696969696969697
Epoch: 33 	Loss: 242.04962211154543 	Training Acc: 0.8560606060606061 	Val Acc: 0.5
Epoch: 34 	Loss: 208.3801038511872 	Training Acc: 0.8958333333333334 	Val Acc: 0.5606060606060606
Epoch: 35 	Loss: 194.5455155442314 	Training Acc: 0.8996212121212122 	Val Acc: 0.5227272727272727
Epoch: 36 	Loss: 158.61289137974723 	Training Acc: 0.9185606060606061 	Val Acc: 0.5151515151515151
Epoch: 37 	Loss: 162.71159709136646 	Training Acc: 0.9034090909090909 	Val Acc: 0.5
Epoch: 38 	Loss: 170.97251973819846 	Training Acc: 0.9015151515151515 	Val Acc: 0.5075757575757576
Epoch: 39 	Loss: 157.24092914579478 	Training Acc: 0.9147727272727273 	Val Acc: 0.553030303030303
Epoch: 40 	Loss: 133.82072701480678 	Training Acc: 0.9356060606060606 	Val Acc: 0.5
Epoch: 41 	Loss: 136.39509819635964 	Training Acc: 0.928030303030303 	Val Acc: 0.553030303030303
Epoch: 42 	Loss: 126.88825436101558 	Training Acc: 0.9318181818181818 	Val Acc: 0.5075757575757576
Epoch: 43 	Loss: 124.5683646489997 	Training Acc: 0.9356060606060606 	Val Acc: 0.5757575757575758
Epoch: 44 	Loss: 120.55329895571163 	Training Acc: 0.9337121212121212 	Val Acc: 0.5
Epoch: 45 	Loss: 100.53747285808356 	Training Acc: 0.9356060606060606 	Val Acc: 0.5378787878787878
Epoch: 46 	Loss: 92.43336634917796 	Training Acc: 0.9621212121212122 	Val Acc: 0.5606060606060606
Epoch: 47 	Loss: 84.71885716843988 	Training Acc: 0.9564393939393939 	Val Acc: 0.5303030303030303
Epoch: 48 	Loss: 80.6087966322365 	Training Acc: 0.9602272727272727 	Val Acc: 0.5227272727272727
Epoch: 49 	Loss: 86.07536707191636 	Training Acc: 0.9564393939393939 	Val Acc: 0.5378787878787878
```

<hr>

Batch size 50: Train accuracy 4% even after 50 epochs. (25 ppl)

<hr>

Data Augmentation. Horizontal flipping: 37% test acc after 10 epochs. (25 ppl)

```
Epoch: 0 	Loss: 3413.37361741066 	Training Acc: 0.041666666666666664 	Val Acc: 0.022727272727272728
Epoch: 1 	Loss: 3346.7614665031433 	Training Acc: 0.07196969696969698 	Val Acc: 0.05303030303030303
Epoch: 2 	Loss: 3201.3032276034355 	Training Acc: 0.09753787878787878 	Val Acc: 0.10606060606060606
Epoch: 3 	Loss: 3079.741751715541 	Training Acc: 0.12973484848484848 	Val Acc: 0.125
Epoch: 4 	Loss: 2865.667134359479 	Training Acc: 0.17518939393939395 	Val Acc: 0.18181818181818182
Epoch: 5 	Loss: 2718.3486230820417 	Training Acc: 0.19412878787878787 	Val Acc: 0.19696969696969696
Epoch: 6 	Loss: 2579.4593008980155 	Training Acc: 0.22443181818181818 	Val Acc: 0.25
Epoch: 7 	Loss: 2391.6380325537175 	Training Acc: 0.29829545454545453 	Val Acc: 0.30303030303030304
Epoch: 8 	Loss: 2201.1669663584325 	Training Acc: 0.3484848484848485 	Val Acc: 0.23863636363636365
Epoch: 9 	Loss: 2032.1635219482705 	Training Acc: 0.38920454545454547 	Val Acc: 0.3143939393939394
Epoch: 10 	Loss: 1865.4017344996682 	Training Acc: 0.4251893939393939 	Val Acc: 0.3787878787878788
Epoch: 11 	Loss: 1683.504168131738 	Training Acc: 0.4943181818181818 	Val Acc: 0.3939393939393939
Epoch: 12 	Loss: 1620.7468348528491 	Training Acc: 0.5047348484848485 	Val Acc: 0.3977272727272727
Epoch: 13 	Loss: 1459.5485166148974 	Training Acc: 0.5482954545454546 	Val Acc: 0.4166666666666667
Epoch: 14 	Loss: 1277.721444843607 	Training Acc: 0.6136363636363636 	Val Acc: 0.3560606060606061
Epoch: 15 	Loss: 1127.472487490384 	Training Acc: 0.6524621212121212 	Val Acc: 0.45075757575757575
Epoch: 16 	Loss: 1072.5480248472522 	Training Acc: 0.6553030303030303 	Val Acc: 0.48484848484848486
Epoch: 17 	Loss: 980.6919525736521 	Training Acc: 0.696969696969697 	Val Acc: 0.4621212121212121
Epoch: 18 	Loss: 875.4076542500188 	Training Acc: 0.7367424242424242 	Val Acc: 0.5113636363636364
Epoch: 19 	Loss: 761.349040832712 	Training Acc: 0.7689393939393939 	Val Acc: 0.553030303030303
Epoch: 20 	Loss: 719.9298782024468 	Training Acc: 0.7670454545454546 	Val Acc: 0.4734848484848485
Epoch: 21 	Loss: 628.9594370288526 	Training Acc: 0.8153409090909091 	Val Acc: 0.48484848484848486
Epoch: 22 	Loss: 565.63355404699 	Training Acc: 0.8229166666666666 	Val Acc: 0.5151515151515151
Epoch: 23 	Loss: 482.1412212023616 	Training Acc: 0.8551136363636364 	Val Acc: 0.5378787878787878
Epoch: 24 	Loss: 456.59441514623654 	Training Acc: 0.8607954545454546 	Val Acc: 0.5946969696969697
Epoch: 25 	Loss: 424.6992328128117 	Training Acc: 0.8674242424242424 	Val Acc: 0.4962121212121212
Epoch: 26 	Loss: 409.11123079635934 	Training Acc: 0.8910984848484849 	Val Acc: 0.5681818181818182
Epoch: 27 	Loss: 336.6949441268644 	Training Acc: 0.9053030303030303 	Val Acc: 0.5833333333333334
Epoch: 28 	Loss: 312.2307780463056 	Training Acc: 0.90625 	Val Acc: 0.5757575757575758
Epoch: 29 	Loss: 272.262066967672 	Training Acc: 0.9185606060606061 	Val Acc: 0.5681818181818182

Test Acc: 0.576
```