<a href="https://colab.research.google.com/github/waleedGeorgy/deep-learning/blob/main/Experiment_Tracking_%26_Milestone_Project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.1.0+cu121
0.16.0+cu121


- Importing necessary libraries and scripts

In [2]:
# Continue with regular imports
import matplotlib.pyplot as plt

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine

[INFO] Couldn't find torchinfo... installing it.
[INFO] Couldn't find going_modular scripts... downloading them from GitHub.
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4056, done.[K
remote: Counting objects: 100% (1234/1234), done.[K
remote: Compressing objects: 100% (110/110), done.[K
remote: Total 4056 (delta 1141), reused 1124 (delta 1124), pack-reused 2822[K
Receiving objects: 100% (4056/4056), 649.94 MiB | 38.59 MiB/s, done.
Resolving deltas: 100% (2386/2386), done.
Updating files: 100% (248/248), done.


In [3]:
def set_seed(seed:int = 42):
  """
  Sets a random seed.

  The Random seed is automatically initialized at 42.

  Args:
    seed: set the desired seed values (optional)
  """
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

In [4]:
set_seed()

# Getting The Data

In [5]:
# Function to download data
import os
from pathlib import Path
import zipfile
import requests

def download_data(source:str, destination:str, remove_source: bool = True) -> Path:
  """Downloads a zipped dataset from source and unzips it in destination"""
  main_dir = Path('data/')
  data_dir = main_dir / destination

  if data_dir.is_dir():
    print(f'{data_dir} already exists.')
  else:
    print(f'{data_dir} does not exist. Creating it...')
    data_dir.mkdir(parents = True, exist_ok = True)

    target_file = Path(source).name
    with open(main_dir / target_file, 'wb') as f:
      print(f'Downloading {target_file} file from {source}')
      request = requests.get(source)
      f.write(request.content)

    with zipfile.ZipFile(main_dir / target_file, 'r') as zip_ref:
      print(f'Extracting {target_file} file...')
      zip_ref.extractall(data_dir)

    if remove_source:
      os.remove(main_dir / target_file)

  return data_dir

In [6]:
image_path = download_data(source = 'https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip',
                           destination='pizza_steak_sushi')

data/pizza_steak_sushi does not exist. Creating it...
Downloading pizza_steak_sushi.zip file from https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip
Extracting pizza_steak_sushi.zip file...


In [7]:
image_path

PosixPath('data/pizza_steak_sushi')

In [8]:
train_dir = image_path / 'train'
test_dir = image_path / 'test'

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

# Creating Datasets and DataLoader

In [9]:
# Creating the manual transform
normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
                                 std = [0.229, 0.224, 0.225])

manual_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
])

In [10]:
# Creating dataloaders
from torchvision import datasets
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir = train_dir,
                                                                               test_dir = test_dir,
                                                                               transform = manual_transform,
                                                                               batch_size = 32,)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x7916b1c29600>,
 <torch.utils.data.dataloader.DataLoader at 0x7915df16de10>,
 ['pizza', 'steak', 'sushi'])

In [11]:
# Creating an automatic transforms from the EfficientNetB0
eff_net_b0_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
auto_transform = eff_net_b0_weights.transforms()

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir = train_dir,
                                                                               test_dir = test_dir,
                                                                               transform = auto_transform,
                                                                               batch_size = 32,)
train_dataloader, test_dataloader, class_names, auto_transform

(<torch.utils.data.dataloader.DataLoader at 0x7915df16f4f0>,
 <torch.utils.data.dataloader.DataLoader at 0x7915df16dae0>,
 ['pizza', 'steak', 'sushi'],
 ImageClassification(
     crop_size=[224]
     resize_size=[256]
     mean=[0.485, 0.456, 0.406]
     std=[0.229, 0.224, 0.225]
     interpolation=InterpolationMode.BICUBIC
 ))

# Creating a pre-trained model

In [16]:
# THERE COULD BE A BUG IN THE 0.16 VERSION OF TORCHVISION SO USE THIS CODE INSTEAD TO CREATE THE EFFICIENTNETB0 MODEL
# from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
# from torchvision.models._api import WeightsEnum
# from torch.hub import load_state_dict_from_url

# def get_state_dict(self, *args, **kwargs):
#     kwargs.pop("check_hash")
#     return load_state_dict_from_url(self.url, *args, **kwargs)
# WeightsEnum.get_state_dict = get_state_dict

# efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
# efficientnet_b0(weights="DEFAULT")

In [17]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [18]:
# Creating the pretrained model
model = torchvision.models.efficientnet_b0(weights='DEFAULT').to(device)

In [19]:
# Freezing the backbone and updating the classifier head
for param in model.features.parameters():
  param.requires_grad = False

set_seed()
model.classifier = nn.Sequential(
    nn.Dropout(p = 0.2, inplace=True),
    nn.Linear(in_features = 1280, out_features=len(class_names), bias=True)
).to(device)

In [20]:
model_summary = summary(model = model,
                        input_size=(32,3,224,224),
                        verbose = 0,
                        col_names = ['input_size','output_size','num_params','trainable'],
                        row_settings = ['var_names'])
model_summary

Layer (type (var_name))                                      Input Shape               Output Shape              Param #                   Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]         [32, 3]                   --                        Partial
├─Sequential (features)                                      [32, 3, 224, 224]         [32, 1280, 7, 7]          --                        False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]         [32, 32, 112, 112]        --                        False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]         [32, 32, 112, 112]        (864)                     False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]        [32, 32, 112, 112]        (64)                      False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]        [32, 32, 112, 112]        --         

# Training a model and Tracking its results

In [21]:
# Define a loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)

In [22]:
# Setup a summary writer to track the results of model training and testing
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x7915d8b62b00>

In [23]:
# Now we'll update our train function in engine.py to track our model
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

from going_modular.going_modular.engine import train_step, test_step

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]}
    For example if training for epochs=2:
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]}
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Make sure model on target device
    model.to(device)

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        # NEW: Experiment tracking
        # What we want to track
        writer.add_scalars(main_tag = 'Loss', # The name of what wee want to track
                           tag_scalar_dict={'train_loss':train_loss, 'test_loss':test_loss}, # The values we want to track
                           global_step=epoch) # We want to track the values every epoch

        writer.add_scalars(main_tag = 'Accuracy',
                          tag_scalar_dict = {'train_acc':train_acc, 'test_acc': test_acc},
                          global_step = epoch)

        # To see what computation our model is going through
        writer.add_graph(model = model,
                         input_to_model=torch.randn(32,3,224,224).to(device))

    # Closing the writer
    writer.close()

    # Return the filled results at the end of the epochs
    return results

In [25]:
# Now to train our model with the updated train function
# The results of training our model will be stored in the 'runs' folder
set_seed()
model_results = train(model = model,
                      train_dataloader=train_dataloader,
                      test_dataloader = test_dataloader,
                      loss_fn = loss_fn,
                      optimizer = optimizer,
                      epochs = 5,
                      device = device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.5359 | train_acc: 0.9258 | test_loss: 0.5879 | test_acc: 0.8456
Epoch: 2 | train_loss: 0.5374 | train_acc: 0.8008 | test_loss: 0.5588 | test_acc: 0.8655
Epoch: 3 | train_loss: 0.5311 | train_acc: 0.8203 | test_loss: 0.5006 | test_acc: 0.8759
Epoch: 4 | train_loss: 0.4616 | train_acc: 0.9180 | test_loss: 0.4964 | test_acc: 0.8873
Epoch: 5 | train_loss: 0.4903 | train_acc: 0.8008 | test_loss: 0.5342 | test_acc: 0.8561
