<a href="https://colab.research.google.com/github/yiwenwangANU/pytorch_review/blob/main/05_pytorch_going_modular.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Turn notebook cell code into python script and run in command line

In [42]:
%%writefile going_modular/get_data.py
import requests
import zipfile
import os

def data_download(
    data_path: str,
    request_url: str):
  """
  Download data in zip file from remote url.
  If the data download is successful, you should be able to access the pizza_steak_sushi images from the data directory.
  Args:
    data_path: Path to data local folder
    request_url: Url for data file to get using get request
  Returns:
    None
  """
  # Setup path to data folder
  image_path = os.path.join(data_path, 'pizza_steak_sushi')

  # If the image folder doesn't exist, download it and prepare it...
  if os.path.exists(image_path):
      print(f"{image_path} directory exists.")
  else:
      print(f"Did not find {image_path} directory, creating one...")
      os.makedirs(image_path, exist_ok=True)

  # Download pizza, steak, sushi data
  with open(os.path.join(data_path, "pizza_steak_sushi.zip"), "wb") as f:
      request = requests.get(request_url)
      print("Downloading pizza, steak, sushi data...")
      f.write(request.content)

  # Unzip pizza, steak, sushi data
  with zipfile.ZipFile(os.path.join(data_path, "pizza_steak_sushi.zip"), "r") as zip_ref:
      print("Unzipping pizza, steak, sushi data...")
      zip_ref.extractall(image_path)

Overwriting going_modular/get_data.py


In [43]:
%%writefile going_modular/data_setup.py
import os
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    batch_size: int,
    num_workers: int = NUM_WORKERS
):
  """
  Create training and testing dataloader
  Args:
    train_dir: Path to training directory.
    test_dir: Path to testing directory.
    batch_size: Number of samples per batch in each of the DataLoaders.
    num_workers: An integer for number of workers per DataLoader.
  Returns:
    a tuple of (train_loader, test_loader, classnames)
    Where class_names is a list of the target classes.
  """
  train_transform = transforms.Compose([
    # Resize the images to 64x64
    transforms.Resize(size=(64, 64)),
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
    ])

  # Write transform for image
  test_transform = transforms.Compose([
      # Resize the images to 64x64
      transforms.Resize(size=(64, 64)),
      # Turn the image into a torch.Tensor
      transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
  ])
  train_data = datasets.ImageFolder(root=train_dir, # target folder of images
                                    transform=train_transform) # transforms to perform on data (images)
  test_data = datasets.ImageFolder(root=test_dir,
                                  transform=test_transform)
  class_names = train_data.classes

  train_loader = DataLoader(dataset=train_data,
                            batch_size=batch_size, # how many samples per batch?
                            num_workers=num_workers, # how many subprocesses to use for data loading? (higher = more)
                            shuffle=True, # shuffle the data?
                            pin_memory=True) # faster training at cost of more memory
  test_loader = DataLoader(dataset=test_data,
                          batch_size=batch_size,
                          num_workers=num_workers,
                          pin_memory=True) # don't usually need to shuffle testing data

  return train_loader, test_loader, class_names

Overwriting going_modular/data_setup.py


In [44]:
%%writefile going_modular/model_builder.py

import torch
from torch import nn

class TinyVGG(nn.Module):
    """
    Model architecture copying TinyVGG from:
    https://poloclub.github.io/cnn-explainer/
    Args:
      input_shape: An integer indicating number of input channels.
      hidden_units: An integer indicating number of hidden units between layers.
      output_shape: An integer indicating number of output units.
    Returns:
      tinyVGG model
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3, # how big is the square that's going over the image?
                      stride=1, # default
                      padding=1), # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) # default stride value is same as kernel_size
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Where did this in_features shape come from?
            # It's because each layer of our network compresses and changes the shape of our input data.
            nn.Linear(in_features=hidden_units*16*16,
                      out_features=output_shape)
        )

    def forward(self, x: torch.Tensor):
        x = self.conv_block_1(x)
        # print(x.shape)
        x = self.conv_block_2(x)
        # print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x
        # return self.classifier(self.conv_block_2(self.conv_block_1(x))) # <- leverage the benefits of operator fusion


Overwriting going_modular/model_builder.py


In [45]:
%%writefile going_modular/engine.py
"""
Contains functions for training and testing a PyTorch model.
"""
from tqdm.auto import tqdm
import torch
from torch import nn

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device):
  """
  Performs a single training step (one epoch) for a PyTorch model.

    Args:
        model (torch.nn.Module): The PyTorch model to be trained.
        dataloader (torch.utils.data.DataLoader): Dataloader providing the training data in batches.
        loss_fn (torch.nn.Module): Loss function used to compute the error between predictions and true labels.
        optimizer (torch.optim.Optimizer): Optimizer used to update model parameters.
        device (torch.device): Device used to train/evel the model.

    Returns:
        tuple: (train_loss, train_acc)
            - train_loss (float): The average loss across all training batches.
            - train_acc (float): The average accuracy across all training batches.
  """
  # Put model in train mode
  model.train()

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      y_pred = model(X)

      # 2. Calculate  and accumulate loss
      loss = loss_fn(y_pred, y)
      train_loss += loss.item()

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward()

      # 5. Optimizer step
      optimizer.step()

      # Calculate and accumulate accuracy metrics across all batches
      y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
      train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  # Adjust metrics to get average loss and accuracy per batch
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device):
  # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0

  # Turn on inference context manager
  with torch.inference_mode():
      # Loop through DataLoader batches
      for batch, (X, y) in enumerate(dataloader):
          # Send data to target device
          X, y = X.to(device), y.to(device)

          # 1. Forward pass
          test_pred_logits = model(X)

          # 2. Calculate and accumulate loss
          loss = loss_fn(test_pred_logits, y)
          test_loss += loss.item()

          # Calculate and accumulate accuracy
          test_pred_labels = test_pred_logits.argmax(dim=1)
          test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  # Adjust metrics to get average loss and accuracy per batch
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc



# 1. Take in various parameters required for training and test steps
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          device: torch.device,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5):

  # 2. Create empty results dictionary
  results = {"train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  # 3. Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

      # 4. Print out what's happening
      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      # 5. Update results dictionary
      # Ensure all data is moved to CPU and converted to float for storage
      results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
      results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
      results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
      results["test_acc"].append(test_acc.item() if isinstance(test_acc, torch.Tensor) else test_acc)

  # 6. Return the filled results at the end of the epochs
  return results

Overwriting going_modular/engine.py


In [46]:
import importlib
from going_modular import engine

importlib.reload(engine)  # Forces reloading of the module

<module 'going_modular.engine' from '/content/going_modular/engine.py'>

In [53]:
%%writefile going_modular/train.py
"""
Trains a PyTorch image classification model using device-agnostic code.
"""
import os
import torch
import argparse
import get_data, data_setup, model_builder, engine

parser = argparse.ArgumentParser(description="Train a PyTorch image classification model.")

parser.add_argument("--batch_size", type=int, default=16, help="Batch size for training.")
parser.add_argument("--lr", type=float, default=0.001, help="Learning rate for the optimizer.")
parser.add_argument("--num_epochs", type=int, default=10, help="Number of training epochs.")

args = parser.parse_args()

device = "cuda" if torch.cuda.is_available() else "cpu"

if(not os.path.exists('going_modular')):
  os.makedirs('going_modular')

get_data.data_download(data_path='data', request_url="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")

train_loader, test_loader, class_names = data_setup.create_dataloaders(
    train_dir = 'data/pizza_steak_sushi/train',
    test_dir = 'data/pizza_steak_sushi/test',
    batch_size = args.batch_size
)

model = model_builder.TinyVGG(
    input_shape=3, # number of color channels (3 for RGB)
    hidden_units=10,
    output_shape=len(class_names)).to(device)

# Use train() by calling it from engine.py
engine.train(model=model,
             train_dataloader=train_loader,
             test_dataloader=test_loader,
             optimizer=torch.optim.Adam(params=model.parameters(), lr=args.lr),
             device=device,
             epochs=args.num_epochs)

Overwriting going_modular/train.py


In [54]:
# train the model on the command line
!python going_modular/train.py  --batch_size 16 --lr 0.0001 --num_epochs 5

data/pizza_steak_sushi directory exists.
Downloading pizza, steak, sushi data...
Unzipping pizza, steak, sushi data...
  0% 0/5 [00:00<?, ?it/s]Epoch: 1 | train_loss: 1.0985 | train_acc: 0.3083 | test_loss: 1.1019 | test_acc: 0.1875
 20% 1/5 [00:01<00:07,  1.87s/it]Epoch: 2 | train_loss: 1.0977 | train_acc: 0.3250 | test_loss: 1.1032 | test_acc: 0.2375
 40% 2/5 [00:04<00:07,  2.43s/it]Epoch: 3 | train_loss: 1.0995 | train_acc: 0.3500 | test_loss: 1.1038 | test_acc: 0.3250
 60% 3/5 [00:06<00:04,  2.34s/it]Epoch: 4 | train_loss: 1.0990 | train_acc: 0.3250 | test_loss: 1.1014 | test_acc: 0.3125
 80% 4/5 [00:08<00:02,  2.10s/it]Epoch: 5 | train_loss: 1.0964 | train_acc: 0.3875 | test_loss: 1.0979 | test_acc: 0.3125
100% 5/5 [00:10<00:00,  2.09s/it]
