<a href="https://colab.research.google.com/github/wilberquito/AMLProject/blob/main/Ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning Project: Image Classification
## Advanced Machine Learning


> Wilber E. Bermeo Quito 
>
> Judit Quintana Massana
>
> April 2023

## Inference for valid and test sets

In [2]:
import zipfile
from pathlib import Path
import torch
import matplotlib.pyplot as plt

In [3]:
try:
    import google.colab
    IN_COLAB = True
    ! pip install torchvision
    ! pip install torchinfo
    
    from google.colab import drive
    drive.mount('/content/drive')

    !rm -rf data
    data_path = Path('/content/drive/MyDrive/AML/dataset_CIFAR10.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall("data")

    !rm -rf test
    data_path = Path('/content/drive/MyDrive/AML/test_unlabelled.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall("test")

    !rm -rf modular
    data_path = Path('/content/drive/MyDrive/AML/modular.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall(".")

    # Getting the trained models
    !rm -rf trained
    !cp -rf /content/drive/MyDrive/AML/trained ./trained

    # Getting the predictions
    !rm -rf ensemble
    !cp -rf /content/drive/MyDrive/AML/ensemble ./ensemble
except:
    IN_COLAB = False

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.2-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.2
Mounted at /content/drive


In [3]:
from modular.evaluate import val_step
import modular.models as models
from modular.datasets import TestDataset
import modular.datasets as datasets
import torch
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from modular.utils import set_seeds
import pandas as pd
import torchvision.transforms as transforms


set_seeds(seed=42)

data_path = "./test/test_unlabelled"
out_dim = 10
n_test = 4
device = 'cuda' if torch.cuda.is_available() else 'cpu'

models = {
    Path('./trained/AMLMAXVIT_T.pth'): models.AMLMAXVIT_T(out_dim),
    Path('./trained/amlresnet50.pth'): models.AMLResnet50(out_dim),
    Path('./trained/exotic.amlresnet50.pth'): models.AMLResnet50(out_dim),
    Path('./trained/AMLResnet50_FastAI.pth'): models.AMLResnet50_FastAI(out_dim),
    Path('./trained/amlresnet101.pth'): models.AMLResnet101(out_dim),
    Path('./trained/efficientnet_v2_s.pth'): models.AMLEfficientNet_V2_S(out_dim),
}

evals = [('validation', './data/validation'), 
         ('test', './test/test_unlabelled')]

def dataloader_builder(eval_type, folder_root, transform=None):
  if eval_type == 'test':
    dataset = TestDataset(folder_root, transform)
    dataloader = DataLoader(dataset, batch_size=256, shuffle=False)
    names = dataloader.dataset.file_names
  else:
    dataloader = datasets.get_dataloader(folder_root=folder_root,
                                           transformer=transform,
                                           batch_size=256,
                                           suffle=False)
    names = list(map(lambda x : '/'.join(x[0].split('/')[-2:]), dataloader.dataset.imgs))

  return dataloader, names

  
# Validation dataset & dataloader
for eval, src in evals:

  print('Evaluate step - ' + eval)

  for p, m in models.items():
  
    # Check if prediction is already done
    predict_name = Path(f'./ensemble/{eval}/') / Path(p.name.replace('.pth', '.csv'))
  
    if predict_name.exists():
      print(f'Skipping prediction - {p.name}')
      continue
  
    print(f'Evaluating - {p.name}')
    # Make prediction
    data = torch.load(p, map_location=device)
    m.load_state_dict(data['model'])
    m = m.to(device)
    transform = m.transforms
    dataloader, names = dataloader_builder(eval, src, transform)
    _, probs, labels = val_step(m, dataloader, 
                                      device, out_dim, n_test)
    
    # Serialize prediction to csv
    save_as = eval + '.' + p.name.replace('.pth', '.csv')
    df = pd.DataFrame(probs)
    df.index = names
    df.to_csv(save_as)
  
    # Download prediction 
    if IN_COLAB:
      from google.colab import files
      files.download(save_as)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Downloading: "https://download.pytorch.org/models/maxvit_t-bc5ab103.pth" to /root/.cache/torch/hub/checkpoints/maxvit_t-bc5ab103.pth
100%|██████████| 119M/119M [00:01<00:00, 90.1MB/s]
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 89.9MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth
100%|██████████| 171M/171M [00:01<00:00, 91.5MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 90.1MB/s]


Evaluate step - validation
Skipping prediction - AMLMAXVIT_T.pth
Skipping prediction - amlresnet50.pth
Skipping prediction - exotic.amlresnet50.pth
Skipping prediction - AMLResnet50_FastAI.pth
Skipping prediction - amlresnet101.pth
Skipping prediction - efficientnet_v2_s.pth
Evaluate step - test
Skipping prediction - AMLMAXVIT_T.pth
Evaluating - amlresnet50.pth


100%|██████████| 40/40 [02:13<00:00,  3.35s/it]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Skipping prediction - exotic.amlresnet50.pth
Skipping prediction - AMLResnet50_FastAI.pth
Skipping prediction - amlresnet101.pth
Skipping prediction - efficientnet_v2_s.pth


## Ensemble (validation)

The accuracy of the ensemble may change deppending on the differente models we use.

In [4]:
from modular.ensemble import ensemble
ensemble_csv_name = 'val.ensemble.csv'
parent_dir = './ensemble/validation'

ensemble(parent_dir, ensemble_csv_name, with_probs=True)

In [7]:
import torch
import modular.datasets as datasets

dataloader = datasets.get_dataloader(folder_root='./data/validation',
                                     transformer=torchvision.transforms.ToTensor(),
                                     batch_size=256,
                                     suffle=False)

In [9]:
from modular.utils import accuracy_fn
import pandas as pd

# Compute the labels of the validation set
labels = torch.tensor([])
for _, y in dataloader:
  labels = torch.cat([labels, y], dim=0)

# Load the labels of the ensemble
val_csv = pd.read_csv('./val.ensemble.csv')
val_labels = val_csv['class']
val_labels = val_labels.values
val_labels = torch.tensor(val_labels)

# Calculate the accuracy
accuracy_fn(labels, val_labels)

86.22

## Ensemble (test)

In [10]:
from modular.ensemble import ensemble
ensemble_csv_name = 'test.ensemble.csv'
parent_dir = './ensemble/test'

ensemble(parent_dir, ensemble_csv_name, with_probs=True)