In [1]:
import pathlib
import shutil
import csv
import json
import zipfile
from PIL import Image
from random import random
import torch
from torch.nn.functional import one_hot
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, Dataset    
import torchvision.transforms as transforms
from torchvision.transforms import Compose
import torchvision.datasets as datasets
from torchsummary import summary
from numba import cuda

cuda.close()

In [2]:
class DatasetGen:
    def __init__(self, zip_path, split_ratio, transforms) -> None:
        self.zip_path = zip_path
        self.split_ratio = split_ratio
        self.transforms = transforms
        
    def create_label_encs(self):
        label2id, id2label = {}, {}
        with zipfile.ZipFile(self.zip_path, 'r') as zip_file:
            for name in zip_file.namelist():
                if '/' in name:
                    label = name.split('/')[1]
                    if label not in label2id:
                        label2id[label] = len(label2id)
                        id2label[len(id2label)] = label
        self.label2id = label2id
        self.id2label = id2label
        return label2id, id2label

    def generate(self, **loader_params):
        label2id, id2label = self.create_label_encs()
        with zipfile.ZipFile(self.zip_path, 'r') as zip_file:
            new_path = pathlib.Path(self.zip_path).parent / zip_file.namelist()[0].split('/')[0]
            zip_file.extractall('datasets')
        self.split(new_path)
        train_dataset = datasets.ImageFolder(
            new_path.parent / 'train', transform=self.transforms['train'])
        train_loader = DataLoader(train_dataset, **loader_params)
        valid_dataset = datasets.ImageFolder(
            new_path.parent / 'valid', transform=self.transforms['valid'])
        valid_loader = DataLoader(valid_dataset, **loader_params)
        test_dataset = datasets.ImageFolder(
            new_path.parent / 'test', transform=self.transforms['test'])
        test_loader = DataLoader(test_dataset, **loader_params)
        self.delete_path(new_path)
        return {
            'train_loader': train_loader,
            'valid_loader': valid_loader,
            'test_loader': test_loader,
            'label2id': label2id,
            'id2label': id2label,
            'train_dataset': train_dataset,
            'valid_dataset': valid_dataset,
            'test_dataset': test_dataset
        }

    def split(self, path: pathlib.Path):
        train_path = (path.parent / 'train')
        valid_path = (path.parent / 'valid')
        test_path = (path.parent / 'test')

        for label in path.iterdir():
            (train_path / label.name).mkdir(parents=True)
            (valid_path / label.name).mkdir(parents=True)
            (test_path / label.name).mkdir(parents=True)
            for img in label.iterdir():
                rand = random()
                if rand < self.split_ratio[0]:
                    img.rename(train_path / label.name / img.name)
                elif rand < self.split_ratio[0] + self.split_ratio[1]:
                    img.rename(valid_path / label.name / img.name)
                else:
                    img.rename(test_path / label.name / img.name)


    def delete_path(self, path):
        shutil.rmtree(path)

In [3]:
transformations = {
    'train': Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [4]:
ds_gen = DatasetGen('datasets/geoguessr-55countries.zip', (0.8, 0.1, 0.1), transformations)

In [5]:
rets = ds_gen.generate(batch_size=32, shuffle=True, num_workers=4)

In [6]:
class EncoderBlock(nn.Module):
    '''
    Convolutional block responsible for getting the hidden state of the image
    '''
    def __init__(self, input_channel, out_channels: list[int], filter_sizes: list[int], strides: list[int], batch_norm: list[bool], pooling_args: list | None = None, device: torch.device = torch.device('cuda')) -> None:
        super().__init__()
        self.layers = nn.Sequential()
        aux = input_channel
        for i, (out_channel, filter_size, stride, norm) in enumerate(zip(out_channels, filter_sizes, strides, batch_norm)):
            self.layers.add_module(f'conv_{i}', nn.Conv2d(aux, out_channel, filter_size, stride, padding='same', device=device))
            aux = out_channel
            if norm:
                self.layers.add_module(f'batch_norm_{i}', nn.BatchNorm2d(out_channel, device=device))
            self.layers.add_module(f'relu_{i}', nn.ReLU())
        if pooling_args is not None:
            self.layers.add_module('max_pool', nn.MaxPool2d(*pooling_args))
        
    def forward(self, x):
        return self.layers(x)

In [7]:
class InceptionBlock(nn.Module):
    def __init__(self, in_channel, out_channels, device=torch.device('cuda')) -> None:
        super().__init__()
        
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channel, out_channels[0][0], 1, stride=1, padding='same'),
            nn.ReLU()
        ).to(device)
        
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channel, out_channels[1][0], 1, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(out_channels[1][0], out_channels[1][1], 3, stride=1, padding='same'),
            nn.ReLU()
        ).to(device)

        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channel, out_channels[2][0], 1, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(out_channels[2][0], out_channels[2][1], 5, stride=1, padding='same'),
            nn.ReLU()    
        ).to(device)

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(in_channel, out_channels[3][0], 1, stride=1, padding='same'),
            nn.ReLU()
        ).to(device)
    
    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)

        out = torch.cat([x1, x2, x3, x4], 1)
        return out

In [8]:
class CustomModel1(nn.Module): #? Custom model made from scratch
    def __init__(self, n_labels, device=torch.device('cuda')) -> None:
        super().__init__()
        self.n_labels = n_labels 
        self.layers = nn.Sequential(
            EncoderBlock(3, [64, 64], [3, 3], [1, 1], [False, False], [2, 2]),
            EncoderBlock(64, [128, 128], [3, 3], [1, 1], [False, True], [2, 2]),
            EncoderBlock(128, [256, 256, 256], [3, 3, 3], [1, 1, 1], [False, False, True], [2, 2]),
            EncoderBlock(256, [256, 512, 1024], [3, 3, 3], [
                         1, 1, 1], [False, False, True], [3, 3]),
            nn.Flatten(),
            nn.Linear(82944, 512),
            nn.ReLU(),
            nn.Linear(512, n_labels),
            nn.Softmax(dim=1)
        ).to(device)

    def forward(self, x: torch.Tensor): 
        x = self.layers(x)
        return x

In [9]:
class VGG(nn.Module):
    def __init__(self, n_labels) -> None:
        super().__init__()
        self.n_labels = n_labels
        self.model = torchvision.models.vgg19(pretrained=True)
        self.model.training = False
        self.model.classifier[6] = nn.Linear(4096, 1024)
        self.linear = nn.Sequential(
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, n_labels)
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.model(x)
        x = self.linear(x)
        return self.softmax(x)

In [10]:
class Callback:
    def __init__(self) -> None:
        self.epoch_counter = 0
        self.batch_counter = 0

    def on_train_begin(self): 
        self.batch_counter = 0

    def on_train_end(self):
        return None

    def on_epoch_begin(self): 
        return None
    
    def on_epoch_end(self, val_loss, model, *args): 
        self.epoch_counter += 1
    
    def on_batch_begin(self):
        return None
    
    def on_batch_end(self): 
        self.batch_counter += 1

In [11]:
class ModelCheckpoint(Callback):
    def __init__(self, path) -> None:
        super().__init__()
        self.path = pathlib.Path(path)
        self.best_loss = float('inf')

    def on_epoch_end(self, val_loss, model, *args):
        super().on_epoch_end(val_loss, model, *args)
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            torch.save(model.state_dict(), self.path)

class EarlyStopping(Callback):
    def __init__(self, patience, restore_best_weights=False, checkpoint: str|bool=False) -> None:
        super().__init__()
        self.patience = patience
        self.restore_best_weights = restore_best_weights
        self.checkpoint = checkpoint

        if self.checkpoint:
            self.model_checkpoint = ModelCheckpoint(self.checkpoint)
        if self.restore_best_weights:
            self.best_weights = None

        self.counter = 0
        self.best_loss = float('inf')

    def on_epoch_end(self, val_loss, model, *args):
        super().on_epoch_end(val_loss, model, *args)
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                print(f"Early stopping on epoch {self.epoch_counter} with val_loss: {val_loss}")
                return True
            
class LearningRateScheduler(Callback):
    def __init__(self, lr_scheduler) -> None:
        super().__init__()
        self.lr_scheduler = lr_scheduler

    def on_epoch_end(self, val_loss, model, *args):
        super().on_epoch_end(val_loss, model)
        self.lr_scheduler(self.epoch_counter, *args)

In [12]:
def train(model, train_loader: DataLoader, val_loader: DataLoader, loss_func, optimizer, epochs, callbacks: None | list[Callback] = None, device=torch.device('cuda')):
    for epoch in range(epochs):
        if callbacks is not None and any(callback.on_epoch_begin() for callback in callbacks):
            break
        model.train()
        if callbacks is not None and any(callback.on_train_begin() for callback in callbacks):
            break
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            labels = one_hot(labels, model.n_labels).to(torch.float32)
            if callbacks is not None and any(callback.on_batch_begin() for callback in callbacks):
                break
            optimizer.zero_grad()
            output = model(images)

            loss = loss_func(output, labels)
            loss.backward()
            optimizer.step()
            if callbacks is not None and any(callback.on_batch_end() for callback in callbacks):
                break
        if callbacks is not None and any(callback.on_train_end() for callback in callbacks):
            break
        
        model.eval()
        with torch.no_grad():
            val_loss = 0
            val_samples = 0
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                labels = one_hot(labels, model.n_labels).to(torch.float32)
                output = model(images)
                loss = loss_func(output, labels)
                val_loss += loss.item()
                val_samples += 1
            avg_val_loss = val_loss / val_samples
            print(f"Epoch: {epoch+1}, Validation Loss: {avg_val_loss}")
        if callbacks is not None and any(callback.on_epoch_end(avg_val_loss, model) for callback in callbacks):
            break

In [13]:
model = CustomModel1(len(rets['label2id']))

In [14]:
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
      EncoderBlock-6         [-1, 64, 112, 112]               0
            Conv2d-7        [-1, 128, 112, 112]          73,856
              ReLU-8        [-1, 128, 112, 112]               0
            Conv2d-9        [-1, 128, 112, 112]         147,584
      BatchNorm2d-10        [-1, 128, 112, 112]             256
             ReLU-11        [-1, 128, 112, 112]               0
        MaxPool2d-12          [-1, 128, 56, 56]               0
     EncoderBlock-13          [-1, 128, 56, 56]               0
           Conv2d-14          [-1, 256,

In [15]:
train(model, rets['train_loader'], rets['valid_loader'], nn.CrossEntropyLoss(), torch.optim.Adam(model.parameters()), 3)

Epoch: 1, Validation Loss: 4.01376014473164
Epoch: 2, Validation Loss: 4.013760155281134
Epoch: 3, Validation Loss: 4.013760157391033


In [16]:
import numpy as np

In [17]:
sample = rets['test_dataset'][0], rets['test_dataset'][1000], rets['test_dataset'][2735]
arr = np.array(sample)
sample_in = arr[:, 0].tolist()
sample_lbl = arr[:, 1].tolist()

  arr = np.array(sample)
  arr = np.array(sample)


In [18]:
sample_lbl

[0, 17, 42]

In [19]:
labels = one_hot(torch.tensor(sample_lbl), len(
    rets['label2id'])).to(torch.float32).cuda()
labels

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.]], device='cuda:0')

In [20]:
out = model(torch.stack(sample_in).to('cuda'))
out

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [21]:
print(out.shape, labels.shape)

torch.Size([3, 55]) torch.Size([3, 55])


In [22]:
nn.CrossEntropyLoss()(out, labels)

tensor(4.0381, device='cuda:0', grad_fn=<DivBackward1>)

In [24]:
out.argmax(1)

tensor([39, 39, 39], device='cuda:0')

In [25]:
print(torch.cuda.is_available())

True
