<a href="https://colab.research.google.com/github/wilberquito/Thesis/blob/main/Code/nn/notebooks/resnet18/ResNet18_V0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet18, V0

In [1]:
try:
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")
    assert int(mlxtend.__version__.split(".")[1]) >= 19, "mlxtend verison should be 0.19.0 or higher"
except:
    !pip install -q torchmetrics -U mlxtend # <- Note: If you're using Google Colab, this may require restarting the runtime
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25hmlxtend version: 0.22.0


In [2]:
import requests
from pathlib import Path
import pandas as pd
import seaborn as sns
from zipfile import ZipFile

try:
    import google.colab
    from google.colab import drive
    IN_COLAB = True
    drive.mount('/content/drive')
except:
    IN_COLAB = False

print(f"In colab: {IN_COLAB}")

Mounted at /content/drive
In colab: True


In [3]:
def pull_code():
  print("Unzipping the code from google drive")
  !rm -rf modular/
  !unzip ./drive/MyDrive/wilberquito_thesis/modular.zip -d . >/dev/null 2>&1

if IN_COLAB:
    pull_code() 

Unzipping the code from google drive


In [4]:
def pull_data():
  answer = input("You sure? Y/n\n")
  if answer != "Y":
    return

  print("Unzipping the data from google drive")
  !rm -rf data.etl/
  !unzip ./drive/MyDrive/wilberquito_thesis/data.zip -d . >/dev/null 2>&1

if IN_COLAB:
    pull_data() 

You sure? Y/n
Y
Unzipping the data from google drive


In [5]:
!nvidia-smi

Fri May 26 09:02:09 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Scheduler

In [6]:
LEARNING_RATE = 1e-3
EPOCHS = 20

## Writter

In [7]:
import modular.utility as m_utility

writter = m_utility.model_writter('resnet18_v0')

## Train

In [None]:
from torchvision import models
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.models import (ResNet18_Weights)
from torch.utils.data import DataLoader
import torch
import torchvision as tv
import modular.dataset as m_dataset
import modular.utility as m_utility
import modular.engine as engine
import modular.models as m_models

# Set seed
m_utility.set_seed(42)

# Build the dataframes
data_dir = 'data.etl'
data_folder = '512'
train_df, test_df, mapping = m_dataset.get_df(data_dir, data_folder)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

out_features = len(mapping)
model = m_models.ResNet18_Melanoma(out_features)
model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

# Mapping from id to classname
idx_to_class = { k : classname for classname, k in mapping.items() }

# Train and validate dataset & dataloader
train_df, validate_df = m_dataset.train_validate_split(train_df,
                                                       random_state=42,
                                                       validate_size=0.15)



train_transforms, val_transforms = m_dataset.get_transforms(image_size=256)
train_transforms = val_transforms

train_dataset = m_dataset.MelanomaDataset(train_df,
                                          mode='train',
                                          transforms=train_transforms,
                                          idx_to_class=idx_to_class)                                                       
validate_dataset = m_dataset.MelanomaDataset(validate_df,
                                             mode='validate',
                                             transforms=val_transforms,
                                             idx_to_class=idx_to_class)                                                       

batch_size = 400                                          

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validate_dataloader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False)

# Test dataset and dataloader
test_dataset = m_dataset.MelanomaDataset(test_df,
                                         mode='test',
                                         idx_to_class=idx_to_class)                                                       
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Dataloaders
dataloaders = {
    'train': train_dataloader,
    'val': validate_dataloader,
}

datasets_size = {
    'train': len(train_dataset),
    'val': len(validate_dataset),
}

about_data = {
    'dataloaders': dataloaders,
    'datasets': {
        'size': datasets_size
    }
}

MEL_IDX = 6 # Check data exploration notebook
VAL_TIMES = 4

model_ft, stats = engine.train_model(model, 
                                     MEL_IDX,
                                     about_data,
                                     device,
                                     criterion, 
                                     optimizer, 
                                     num_epochs=EPOCHS,
                                     writter=writter,
                                     val_times=VAL_TIMES)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 145MB/s]


Epoch 1/20
----------
Train OvR: 0.8005 	|	Train Loss: 1.1176 	|	Train Acc: 0.6250
Val OvR: 0.8171 	|	Val Loss: 0.9044 	|	Val Acc: 0.6823
=> Saving a new checkpoint
Epoch 2/20
----------
Train OvR: 0.8591 	|	Train Loss: 0.8566 	|	Train Acc: 0.6992
Val OvR: 0.8471 	|	Val Loss: 0.8277 	|	Val Acc: 0.7077
=> Saving a new checkpoint
Epoch 3/20
----------
Train OvR: 0.8618 	|	Train Loss: 0.7883 	|	Train Acc: 0.7205
Val OvR: 0.8648 	|	Val Loss: 0.7877 	|	Val Acc: 0.7194
=> Saving a new checkpoint
Epoch 4/20
----------
Train OvR: 0.8890 	|	Train Loss: 0.7384 	|	Train Acc: 0.7372
Val OvR: 0.8763 	|	Val Loss: 0.7560 	|	Val Acc: 0.7284
=> Saving a new checkpoint
Epoch 5/20
----------
Train OvR: 0.9309 	|	Train Loss: 0.6977 	|	Train Acc: 0.7512
Val OvR: 0.8903 	|	Val Loss: 0.7305 	|	Val Acc: 0.7360
=> Saving a new checkpoint
Epoch 6/20
----------
Train OvR: 0.9029 	|	Train Loss: 0.6579 	|	Train Acc: 0.7638
Val OvR: 0.8927 	|	Val Loss: 0.7116 	|	Val Acc: 0.7394
=> Saving a new checkpoint
Epoch 7/20

In [None]:
if IN_COLAB:
  from google.colab import files
  files.download('resnet18_v1.pth.tar')
  files.download('resnet18_v1.csv')

## Train performance

In [None]:
def pull_models():
  print("Loading models from google drive")
  !rm -rf resnet18/
  !cp -r ./drive/MyDrive/wilberquito_thesis/models/resnet18 . >/dev/null 2>&1

if IN_COLAB:
    pull_models()
     

In [None]:
import torch
import modular.utility as m_utility
import modular.models as m_models


model_path = 'resnet18/resnet18_v0.pth.tar'
checkpoint = torch.load(model_path)

## Train vs Validate curves

In [None]:
m_utility.plot_curves(checkpoint['stats'])

## Melanoma vs Rest (AUC, OVR)

In [None]:
import torch
import torchvision as tv
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models import (ResNet18_Weights)
import torchvision.models as models
import modular.dataset as m_dataset
import modular.utility as m_utility
import modular.models as m_models


# Set seed
m_utility.set_seed(42)

# Build the dataframes
data_dir = 'data.etl'
data_folder = '512'
train_df, _, mapping = m_dataset.get_df(data_dir, data_folder)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch_size = 512                                             

# Mapping from id to classname
idx_to_class = { k : classname for classname, k in mapping.items() }

# Train and validate dataset & dataloader
train_df, validate_df = m_dataset.train_validate_split(train_df,
                                                       random_state=42,
                                                       validate_size=0.15)


_, val_transforms = m_dataset.get_transforms(image_size=256)

validate_dataset = m_dataset.MelanomaDataset(validate_df,
                                             mode='validate',
                                             transforms=val_transforms,
                                             idx_to_class=idx_to_class) 
validate_dataloader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False)
class_names = [name for k,name in validate_dataset.idx_to_class.items()]

out_features = len(mapping)
model = m_models.ResNet18_Melanoma(out_features)
model = model.to(device)

model_state_dict = checkpoint['model_state_dict']
model.load_state_dict(model_state_dict)
model = model.to(device)

MEL_IDX = 6 # Check data exploration notebook

m_utility.plot_ovr_multiclass_roc(model,
                                  MEL_IDX,
                                  validate_dataloader,
                                  device,
                                  val_times=4)

## Confusion Matrix

In [None]:
import torch
import torchvision as tv
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models import (ResNet18_Weights)
import torchvision.models as models
import modular.dataset as m_dataset
import modular.utility as m_utility
import modular.models as m_models


# Set seed
m_utility.set_seed(42)

# Build the dataframes
data_dir = 'data.etl'
data_folder = '512'
train_df, _, mapping = m_dataset.get_df(data_dir, data_folder)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch_size = 512                                             

# Mapping from id to classname
idx_to_class = { k : classname for classname, k in mapping.items() }

# Train and validate dataset & dataloader
train_df, validate_df = m_dataset.train_validate_split(train_df,
                                                       random_state=42,
                                                       validate_size=0.15)


_, val_transforms = m_dataset.get_transforms(image_size=256)

validate_dataset = m_dataset.MelanomaDataset(validate_df,
                                             mode='validate',
                                             transforms=val_transforms,
                                             idx_to_class=idx_to_class) 
validate_dataloader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False)
class_names = [name for k,name in validate_dataset.idx_to_class.items()]

out_features = len(mapping)
model = m_models.ResNet18_Melanoma(out_features)
model = model.to(device)

model_state_dict = checkpoint['model_state_dict']
model.load_state_dict(model_state_dict)
model = model.to(device)

m_utility.plot_confusion_matrix(model,
                                validate_dataloader,
                                class_names,
                                device,
                                show_normed=True)