In [16]:
import numpy as np
import pickle
import torch

from tqdm import tqdm
from PIL import Image
from pathlib import Path

from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision import models

from matplotlib import pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [2]:
!unzip -q ./journey-springfield.zip

In [4]:
DATA_MODES = ['train', 'val', 'test']
RESCALE_SIZE = 224
DEVICE = torch.device("cuda") if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else torch.device("cpu")

In [5]:
class SimpsonsDataset(Dataset):

    def __init__(self, files, mode):
        super().__init__()

        self.files = sorted(files)
        self.mode = mode

        if self.mode not in DATA_MODES:
            print(f"{self.mode} is not correct; correct modes: {DATA_MODES}")
            raise NameError

        self.len_ = len(self.files)

        self.label_encoder = LabelEncoder()

        if self.mode != 'test':
            self.labels = [path.parent.name for path in self.files]
            self.label_encoder.fit(self.labels)

            with open('label_encoder.pkl', 'wb') as le_dump_file:
                  pickle.dump(self.label_encoder, le_dump_file)

    def __len__(self):
        return self.len_

    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image

    def __getitem__(self, index):
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        x = self.load_sample(self.files[index])
        x = self._prepare_sample(x)
        x = np.array(x / 255, dtype='float32')
        x = transform(x)
        if self.mode == 'test':
            return x
        else:
            label = self.labels[index]
            label_id = self.label_encoder.transform([label])
            y = label_id.item()
            return x, y

    def _prepare_sample(self, image):
        image = image.resize((RESCALE_SIZE, RESCALE_SIZE))
        return np.array(image)

In [6]:
TRAIN_DIR = Path('./train/')
TEST_DIR = Path('./testset')

In [7]:
train_val_files = sorted(list(TRAIN_DIR.rglob('*.jpg')))
test_files = sorted(list(TEST_DIR.rglob('*.jpg')))

In [10]:
from sklearn.model_selection import train_test_split

train_val_labels = [path.parent.name for path in train_val_files]
train_files, val_files = train_test_split(train_val_files, test_size=0.25, \
                                          stratify=train_val_labels)

train_dataset = SimpsonsDataset(train_files, mode='train')
val_dataset = SimpsonsDataset(val_files, mode='val')

test_dataset = SimpsonsDataset(test_files, mode="test")
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=64)

In [13]:
def model_learning(train_files, val_files, model, num_epochs, batch_size, device=DEVICE):
    train_loader = DataLoader(train_files, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_files, batch_size=batch_size, shuffle=False)

    base_lr = 1e-3

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=base_lr, betas=(0.0, 0.999),
                                 eps=1e-8, weight_decay=0, amsgrad=True)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2, 4, 6, 8, 10, 12, 14, 16, 18], gamma=0.8)

    accuracy = {'train': [], 'val': []}

    for epoch in range(num_epochs):
        print(f'Epoch: {epoch + 1}')
        model.train()
        train_correct = 0
        train_all = 0
        train_loss = 0
        for x_batch, y_batch in tqdm(train_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            output = model(x_batch)
            preds = output.argmax(-1)
            correct_preds = (preds.detach() == y_batch).sum(dim=0)
            all = y_batch.size(0)

            train_correct += correct_preds.item()
            train_all += all

            loss = criterion(output, y_batch)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()

        accuracy['train'].append(train_correct / train_all)
        print(f'Train loss: {train_loss / len(train_loader)}')
        print(f"Train accuracy: {train_correct / train_all}")

        model.eval()
        val_correct = 0
        val_all = 0
        for x_batch, y_batch in tqdm(val_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            with torch.no_grad():
                output = model(x_batch)
                preds = output.argmax(-1)
                correct_preds = (preds.detach() == y_batch).sum(dim=0)
                all = y_batch.size(0)

                val_correct += correct_preds.item()
                val_all += all
            
        accuracy['val'].append(val_correct / val_all)
        print(f"Val accuracy: {val_correct / val_all}")

        scheduler.step()
    
    del model
    del x_batch
    del y_batch
    torch.mps.empty_cache()

    return accuracy

In [15]:
n_classes = len(np.unique(train_val_labels))

model_ft = models.resnet50(weights='DEFAULT')
for params in model_ft.parameters():
    params.requires_grad = False
for params in model_ft.layer4.parameters():
    params.requires_grad = True
for params in model_ft.fc.parameters():
    params.requires_grad = True

num_features = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_features, n_classes)
model_ft = model_ft.to(DEVICE)

accuracy = model_learning(train_dataset, val_dataset, model_ft, 15, 128)

Epoch: 1


100%|██████████| 123/123 [01:50<00:00,  1.11it/s]


Train loss: 0.6211497990823374
Train accuracy: 0.8475699089113956


100%|██████████| 41/41 [00:33<00:00,  1.24it/s]


Val accuracy: 0.8876576232327092
Epoch: 2


100%|██████████| 123/123 [01:52<00:00,  1.09it/s]


Train loss: 0.11220016427398698
Train accuracy: 0.9703802789986623


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9319831868551777
Epoch: 3


100%|██████████| 123/123 [01:48<00:00,  1.13it/s]


Train loss: 0.031631340910642736
Train accuracy: 0.9923562010319129


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9646541841803592
Epoch: 4


100%|██████████| 123/123 [01:49<00:00,  1.13it/s]


Train loss: 0.01548975578516843
Train accuracy: 0.9964965921396267


100%|██████████| 41/41 [00:31<00:00,  1.28it/s]


Val accuracy: 0.9554833779136416
Epoch: 5


100%|██████████| 123/123 [01:57<00:00,  1.04it/s]


Train loss: 0.004540283298943328
Train accuracy: 0.9990445251289891


100%|██████████| 41/41 [00:34<00:00,  1.19it/s]


Val accuracy: 0.9673290026748185
Epoch: 6


100%|██████████| 123/123 [01:50<00:00,  1.12it/s]


Train loss: 0.0012023083345559463
Train accuracy: 0.9997452067010638


100%|██████████| 41/41 [00:32<00:00,  1.27it/s]


Val accuracy: 0.9713412304165074
Epoch: 7


100%|██████████| 123/123 [01:50<00:00,  1.11it/s]


Train loss: 0.0007573424410089146
Train accuracy: 0.9997452067010638


100%|██████████| 41/41 [00:33<00:00,  1.21it/s]


Val accuracy: 0.9707680550248377
Epoch: 8


100%|██████████| 123/123 [01:50<00:00,  1.11it/s]


Train loss: 0.0007839127410249595
Train accuracy: 0.9997452067010638


100%|██████████| 41/41 [00:32<00:00,  1.27it/s]


Val accuracy: 0.9703859380970578
Epoch: 9


100%|██████████| 123/123 [01:50<00:00,  1.12it/s]


Train loss: 0.0005416522253358196
Train accuracy: 0.9999363016752659


100%|██████████| 41/41 [00:32<00:00,  1.27it/s]


Val accuracy: 0.9690485288498281
Epoch: 10


100%|██████████| 123/123 [01:50<00:00,  1.12it/s]


Train loss: 0.0004879356573046681
Train accuracy: 0.9997452067010638


100%|██████████| 41/41 [00:32<00:00,  1.27it/s]


Val accuracy: 0.9700038211692777
Epoch: 11


100%|██████████| 123/123 [01:49<00:00,  1.12it/s]


Train loss: 0.0004460229834447505
Train accuracy: 0.9998089050257978


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9696217042414979
Epoch: 12


100%|██████████| 123/123 [01:48<00:00,  1.13it/s]


Train loss: 0.0003761290674328774
Train accuracy: 0.9998726033505319


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9701948796331678
Epoch: 13


100%|██████████| 123/123 [01:49<00:00,  1.13it/s]


Train loss: 0.00031370650869235774
Train accuracy: 0.9998726033505319


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9701948796331678
Epoch: 14


100%|██████████| 123/123 [01:49<00:00,  1.12it/s]


Train loss: 0.00031515900465036074
Train accuracy: 0.9998726033505319


100%|██████████| 41/41 [00:32<00:00,  1.28it/s]


Val accuracy: 0.9703859380970578
Epoch: 15


100%|██████████| 123/123 [01:51<00:00,  1.11it/s]


Train loss: 0.0002655978280712795
Train accuracy: 0.9998726033505319


100%|██████████| 41/41 [00:32<00:00,  1.26it/s]

Val accuracy: 0.9707680550248377





In [20]:
torch.save(model_ft.state_dict(), 'model_ft.pt')

In [17]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []

        for inputs in test_loader:
            inputs = inputs.to(DEVICE)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)

    probs = nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    return probs


def make_f1_score(model, n=len(val_dataset)):

    idxs = list(map(int, np.random.uniform(0, n, n)))
    imgs = [val_dataset[id][0].unsqueeze(0) for id in idxs]

    probs_ims = predict(model, imgs)
    y_pred = np.argmax(probs_ims, -1)

    actual_labels = [val_dataset[id][1] for id in idxs]
    preds_class = list(y_pred)

    return round(f1_score(actual_labels, preds_class, average='micro'), 3).tolist()

In [18]:
f1_val = make_f1_score(model_ft)
f1_val

0.971