<a href="https://colab.research.google.com/github/wilberquito/AMLProject/blob/main/Ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning Project: Image Classification
## Advanced Machine Learning


> Wilber E. Bermeo Quito 
>
> Judit Quintana Massana
>
> April 2023

## Make prediction per each model

In [1]:
import zipfile
from pathlib import Path
import torch
import matplotlib.pyplot as plt

In [3]:
try:
    import google.colab
    IN_COLAB = True
    ! pip install torchvision
    ! pip install torchinfo
    
    from google.colab import drive
    drive.mount('/content/drive')

    !rm -rf data
    data_path = Path('/content/drive/MyDrive/AML/dataset_CIFAR10.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall("data")

    !rm -rf test
    data_path = Path('/content/drive/MyDrive/AML/test_unlabelled.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall("test")

    !rm -rf modular
    data_path = Path('/content/drive/MyDrive/AML/modular.zip')
    with zipfile.ZipFile(data_path,"r") as zip_ref:
        zip_ref.extractall(".")

    # Getting the trained models
    !rm -rf trained
    !cp -rf /content/drive/MyDrive/AML/trained ./trained

    # Getting the predictions
    !rm -rf ensemble
    !cp -rf /content/drive/MyDrive/AML/ensemble ./ensemble
except:
    IN_COLAB = False

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from modular.evaluate import val_step
import modular.models as models
from modular.datasets import TestDataset
import modular.datasets as datasets
import torch
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from modular.utils import set_seeds
import pandas as pd
import torchvision.transforms as transforms


set_seeds(seed=42)

data_path = "./test/test_unlabelled"
out_dim = 10
n_test = 4
device = 'cuda' if torch.cuda.is_available() else 'cpu'

models = {
    Path('./trained/efficientnet_v2_s.pth'): models.AMLEfficientNet_V2_S(out_dim),
    Path('./trained/AMLMAXVIT_T.pth'): models.AMLMAXVIT_T(out_dim),
    Path('./trained/amlresnet101.pth'): models.AMLResnet101(out_dim),
    Path('./trained/AMLResnet_50W.pth'): models.AMLResnet_50W(out_dim) # THIS IS A ResNext50.
}

evals = [('validation', './data/validation'), 
         ('test', './test/test_unlabelled')]

def dataloader_builder(eval_type, folder_root, transform=None):
  if eval_type == 'test':
    dataset = TestDataset(folder_root, transform)
    dataloader = DataLoader(dataset, batch_size=256, shuffle=False)
    names = dataloader.dataset.file_names
    print(transform)
  else:
    dataloader = datasets.get_dataloader(folder_root=folder_root,
                                           transformer=transform,
                                           batch_size=256,
                                           suffle=False)
    names = list(map(lambda x : '/'.join(x[0].split('/')[-2:]), dataloader.dataset.imgs))

  return dataloader, names

  
# Validation dataset & dataloader
for eval, src in evals:

  print('Evaluate step - ' + eval)

  for p, m in models.items():
  
    # Check if prediction is already done
    predict_name = Path(f'./ensemble/{eval}/') / Path(p.name.replace('.pth', '.csv'))
  
    if predict_name.exists():
      print(f'Skipping prediction - {p.name}')
      continue
  
    print(f'Evaluating - {p.name}')
    # Make prediction
    data = torch.load(p, map_location=device)
    m.load_state_dict(data['model'])
    m = m.to(device)
    transform = m.transforms
    dataloader, names = dataloader_builder(eval, src, transform)
    _, probs, labels = val_step(m, dataloader, 
                                      device, out_dim, n_test)
    
    # Serialize prediction to csv
    save_as = eval + '.' + p.name.replace('.pth', '.csv')
    df = pd.DataFrame(probs)
    df.index = names
    df.to_csv(save_as)
  
    # Download prediction 
    if IN_COLAB:
      from google.colab import files
      files.download(save_as)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Evaluate step - validation
Skipping prediction - efficientnet_v2_s.pth
Evaluating - AMLMAXVIT_T.pth


 18%|█▊        | 7/40 [01:00<04:23,  7.98s/it]

## Make the ensemble

In [4]:
from modular.ensemble import ensemble
ensemble_csv_name = 'ensemble.csv'
parent_dir = './predictions'

ensemble(parent_dir, ensemble_csv_name, with_probs=True)

## Test ensemble accuracy