<a href="https://colab.research.google.com/github/utyabia/1-notebook/blob/main/PyTorch_Custom_Datasets_Exercises_Template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Our models are underperforming (not fitting the data well). What are 3 methods for preventing underfitting? Write them down and explain each with a sentence.

Underfitting happens when a model can't capture the underlying pattern of the data. Here are 3 methods to prevent it:

Increase Model Complexity
Use a deeper or wider neural network (more layers or more units per layer). This allows the model to better learn complex patterns in the data.

Train for More Epochs
Sometimes the model hasn’t learned enough yet. Training longer can improve performance if underfitting is the issue.

Reduce Regularization
Regularization techniques like dropout or weight decay can limit model capacity. Reducing these can help the model learn better when underfitting.

2. Recreate the data loading functions we built in sections 1, 2, 3 and 4 of notebook 04. You should have train and test DataLoader's ready to use.

In [1]:
import os
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 1. Paths
data_dir = Path("data/pizza_steak_sushi")
train_dir = data_dir / "train"
test_dir = data_dir / "test"

# 2. Walk through the directory
def walk_through_dir(dir_path):
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

walk_through_dir(data_dir)

# 3. Transforms
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# 4. Load datasets
train_data = datasets.ImageFolder(root=train_dir, transform=transform)
test_data = datasets.ImageFolder(root=test_dir, transform=transform)

# 5. Create DataLoaders
train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)

# Class info
class_names = train_data.classes
class_dict = train_data.class_to_idx
print(class_names)
print(class_dict)


FileNotFoundError: [Errno 2] No such file or directory: 'data/pizza_steak_sushi/train'

3. Recreate model_0 we built in section 7 of notebook 04.

In [None]:
import torch.nn as nn

class TinyVGG(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*16*16, out_features=output_shape)
        )

    def forward(self, x):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        return self.classifier(x)

model_0 = TinyVGG(input_shape=3, hidden_units=10, output_shape=len(class_names)).to(device)


In [None]:
def train_step(model, dataloader, loss_fn, optimizer):
    model.train()
    train_loss, train_acc = 0, 0

    for X, y in dataloader:
        X, y = X.to(device), y.to(device)

        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred_class = y_pred.argmax(dim=1)
        train_acc += (y_pred_class == y).sum().item() / len(y)

    return train_loss / len(dataloader), train_acc / len(dataloader)

def test_step(model, dataloader, loss_fn):
    model.eval()
    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            test_loss += loss_fn(y_pred, y).item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item() / len(y)

    return test_loss / len(dataloader), test_acc / len(dataloader)


In [None]:
from tqdm.auto import tqdm

def train(model, train_dataloader, test_dataloader, optimizer, loss_fn, epochs=5):
    results = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model, train_dataloader, loss_fn, optimizer)
        test_loss, test_acc = test_step(model, test_dataloader, loss_fn)

        print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Acc={train_acc:.4f} | Test Loss={test_loss:.4f}, Acc={test_acc:.4f}")

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results

# Run training
torch.manual_seed(42)
torch.cuda.manual_seed(42)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_0.parameters(), lr=0.001)

results_5_epochs = train(model_0, train_dataloader, test_dataloader, optimizer, loss_fn, epochs=5)


In [None]:
# New model with double hidden units
model_1 = TinyVGG(input_shape=3, hidden_units=20, output_shape=len(class_names)).to(device)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_1.parameters(), lr=0.001)

results_more_units = train(model_1, train_dataloader, test_dataloader, optimizer, loss_fn, epochs=20)


In [None]:
# New Datasets
train_data_20 = datasets.ImageFolder(train_data_20_percent_path, transform=transform)
test_data_20 = datasets.ImageFolder(test_data_20_percent_path, transform=transform)

# DataLoaders
train_loader_20 = DataLoader(train_data_20, batch_size=32, shuffle=True)
test_loader_20 = DataLoader(test_data_20, batch_size=32, shuffle=False)

# Train new model
model_2 = TinyVGG(input_shape=3, hidden_units=20, output_shape=len(class_names)).to(device)

torch.manual_seed(42)
torch.cuda.manual_seed(42)
optimizer = torch.optim.Adam(model_2.parameters(), lr=0.001)

results_double_data = train(model_2, train_loader_20, test_loader_20, optimizer, loss_fn, epochs=20)
