In [None]:
import os
from pathlib import Path

import requests
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
from torchinfo import summary
from going_modular import data_setup, engine

from typing import Dict, List
from tqdm.auto import tqdm

from going_modular.engine import train_step, test_step

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)
    
def download_data(source : str, destination : str, remove_source : bool = True):
    """Downloads a zipped dataset from source and unzips to destination.

    Args:
        source (str): A link to a zipped file containing data.
        destination (str): A target directory to unzip data to.
        remove_source (bool): Whether to remove the source after downloading and extracting.
    
    Returns:
        pathlib.Path to downloaded data.
    
    Example usage:
        download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                      destination="pizza_steak_sushi")
    """
    
    # Setup path to data folder
    data_path = Path("data/")
    image_path = data_path / destination
    
    if image_path.is_dir():
        print(f"[INFO] {image_path} directory exists, skipping download.")
    else:
        print(f"[INFO] Did not find {image_path} directory, creating one...")
        image_path.mkdir(parents=True, exist_ok=True)    
        
        target_file = Path(source).name
        
        with open(data_path / target_file, "wb") as f:
            request = requests.get(source)
            print(f"[INFO] Downloading {target_file} from {source}...")
            f.write(request.contentent)
            
        with zipfile.ZipFile(data_path / target_file, "r") as zip_ref:
            print(f"[INFO] Unzipping {target_file} data...")
            zip_ref.extractall(image_path)
            
        # remove zipfile
        if remove_source:
            os.remove(data_path / target_file)
            
    return image_path

In [4]:
image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                          destination="pizza_steak_sushi")

[INFO] data/pizza_steak_sushi directory exists, skipping download.


In [5]:
train_dir = image_path / "train"
test_dir = image_path / "test"

manual_transforms = transforms.Compose([
    transforms.Resize(size=(224, 224), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir, test_dir=test_dir, transform=manual_transforms, batch_size=32)

In [6]:
model = torchvision.models.efficientnet_b0(pretrained=True).to(device)

# Freeze all base layers by setting requires_grad attribute to False
for param in model.features.parameters():
    param.requires_grad = False
    
# Since we're creating a new layer with random weights (torch.nn.Linear), 
# let's set the seeds
set_seeds() 

# Update the classifier head to suit our problem
model.classifier = torch.nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names),bias=True).to(device))

In [7]:
from torchinfo import summary

summary(model, input_size=(32, 3, 224, 224), verbose=0, col_width=20, row_settings=["var_names"])

Layer (type (var_name))                                      Output Shape         Param #
EfficientNet                                                 --                   --
├─Sequential (features)                                      [32, 1280, 7, 7]     --
│    └─ConvNormActivation (0)                                [32, 32, 112, 112]   --
│    │    └─Conv2d (0)                                       [32, 32, 112, 112]   (864)
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   (64)
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   --
│    └─Sequential (1)                                        [32, 16, 112, 112]   --
│    │    └─MBConv (0)                                       [32, 16, 112, 112]   (1,448)
│    └─Sequential (2)                                        [32, 24, 56, 56]     --
│    │    └─MBConv (0)                                       [32, 24, 56, 56]     (6,004)
│    │    └─MBConv (1)                       

In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

In [10]:
from typing import Dict, List
from tqdm.auto import tqdm

from going_modular.engine import train_step, test_step

# Import train() function from: 
# https://github.com/mrdbourke/pytorch-deep-learning/blob/main/going_modular/going_modular/engine.py
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
      model: A PyTorch model to be trained and tested.
      train_dataloader: A DataLoader instance for the model to be trained on.
      test_dataloader: A DataLoader instance for the model to be tested on.
      optimizer: A PyTorch optimizer to help minimize the loss function.
      loss_fn: A PyTorch loss function to calculate loss on both datasets.
      epochs: An integer indicating how many epochs to train for.
      device: A target device to compute on (e.g. "cuda" or "cpu").
      
    Returns:
      A dictionary of training and testing loss as well as training and
      testing accuracy metrics. Each metric has a value in a list for 
      each epoch.
      In the form: {train_loss: [...],
                train_acc: [...],
                test_loss: [...],
                test_acc: [...]} 
      For example if training for epochs=2: 
              {train_loss: [2.0616, 1.0537],
                train_acc: [0.3945, 0.3945],
                test_loss: [1.2641, 1.5706],
                test_acc: [0.3400, 0.2973]} 
    """
    
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           device=device)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

#         ### New: Experiment tracking ###
#         # Add loss results to SummaryWriter
#         writer.add_scalars(main_tag="Loss", 
#                            tag_scalar_dict={"train_loss": train_loss,
#                                             "test_loss": test_loss},
#                            global_step=epoch)

#         # Add accuracy results to SummaryWriter
#         writer.add_scalars(main_tag="Accuracy", 
#                            tag_scalar_dict={"train_acc": train_acc,
#                                             "test_acc": test_acc}, 
#                            global_step=epoch)
        
#         # Track the PyTorch model architecture
#         writer.add_graph(model=model, 
#                          # Pass in an example input
#                          input_to_model=torch.randn(32, 3, 224, 224).to(device))
    
#     # Close the writer
#     writer.close()
    
    ### End new ###

    # Return the filled results at the end of the epochs
    return results

In [11]:
set_seeds()

# Setup training and save the results
results = train(model=model,
                       train_dataloader=train_dataloader,
                       test_dataloader=test_dataloader,
                       optimizer=optimizer,
                       loss_fn=loss_fn,
                       epochs=10,
                       device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0908 | train_acc: 0.4453 | test_loss: 0.9066 | test_acc: 0.5284
Epoch: 2 | train_loss: 0.8667 | train_acc: 0.7773 | test_loss: 0.7938 | test_acc: 0.7538
Epoch: 3 | train_loss: 0.7633 | train_acc: 0.7930 | test_loss: 0.7378 | test_acc: 0.7936
Epoch: 4 | train_loss: 0.7271 | train_acc: 0.7383 | test_loss: 0.6497 | test_acc: 0.8759
Epoch: 5 | train_loss: 0.6380 | train_acc: 0.7852 | test_loss: 0.6222 | test_acc: 0.9072
Epoch: 6 | train_loss: 0.5963 | train_acc: 0.7617 | test_loss: 0.5554 | test_acc: 0.9072
Epoch: 7 | train_loss: 0.5376 | train_acc: 0.9219 | test_loss: 0.5253 | test_acc: 0.9176
Epoch: 8 | train_loss: 0.5546 | train_acc: 0.7930 | test_loss: 0.5231 | test_acc: 0.9176
Epoch: 9 | train_loss: 0.5638 | train_acc: 0.8008 | test_loss: 0.4919 | test_acc: 0.9280
Epoch: 10 | train_loss: 0.5136 | train_acc: 0.8086 | test_loss: 0.4180 | test_acc: 0.8655


In [12]:
# Example code to run in Jupyter or Google Colab Notebook (uncomment to try it out)
%load_ext tensorboard
%tensorboard --logdir runs