In [None]:
# ---- Cell 1: Setup and Data Loading ----

import torch
import matplotlib.pyplot as plt
import numpy as np

from two_layer_net import TwoLayerNet, hello_two_layer_net
from p5_utils import preprocess_cifar10

# Sanity check that two_layer_net imports correctly
hello_two_layer_net()

# Choose device: use GPU if available, otherwise CPU
# Check if CUDA (NVIDIA GPU) is available
if torch.cuda.is_available():
    device = "cuda"
    print("Using NVIDIA GPU (CUDA) for accelerated training.")
    # Optional: print the GPU name for verification
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
# Check for Apple's Metal Performance Shaders (MPS) for Mac users (less common in Colab, but good practice)
elif torch.backends.mps.is_available():
    device = "mps"
    print("Using Apple MPS (Metal) backend.")
# Otherwise, fall back to the CPU
else:
    device = "cpu"
    print("Using CPU (No GPU available).")

# Final confirmation print
print(f"Device selected: {device}")

# Wrap preprocess_cifar10 in a get_data() helper for convenience
def get_data():
    return preprocess_cifar10(
        show_examples=False,
        bias_trick=True,      
        flatten=True,
        validation_ratio=0.1, 
        dtype=torch.float32,
        device=device,
    )

# Load dataset
data_dict = get_data()

print("\nData loaded!")
print(f"Train data shape: {data_dict['X_train'].shape}")
print(f"Validation data shape: {data_dict['X_val'].shape}")
print(f"Test data shape: {data_dict['X_test'].shape}")

# ---- Sanity Check: Test Initial Loss ----
input_size = data_dict['X_train'].shape[1]
num_classes = int(data_dict['y_train'].max().item()) + 1
hidden_size = 50

net = TwoLayerNet(input_size, hidden_size, num_classes)

X_batch = data_dict['X_train'][:200]
y_batch = data_dict['y_train'][:200]

# nn_forward_backward returns (loss, grads), so unpack it
initial_loss, _ = net.loss(X_batch, y_batch, reg=0.0)

print(
    f"\nInitial Loss (expected â‰ˆ {np.log(num_classes):.3f} "
    f"for {num_classes} classes): {initial_loss.item():.4f}"
)


In [None]:
# --- Cell 2: Run Hyperparameter Search ---

from two_layer_net import nn_get_search_params, find_best_net
from p5_utils import plot_stats, plot_acc_curves  # optional, for plotting

print("\nStarting Hyperparameter Tuning (Grid Search)...")

# Get the hyperparameter ranges you defined in nn_get_search_params()
lrs, hss, regs, decays = nn_get_search_params()

total_combinations = len(lrs) * len(hss) * len(regs) * len(decays)
print(f"Testing {total_combinations} combinations...\n")

# Run grid search over all combinations
best_net, best_stat, best_val_acc = find_best_net(
    data_dict,
    nn_get_search_params
)

print("\n=== Hyperparameter Search Complete ===")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

# ---- Optional: visualize training for the best model ----
print("\nPlotting loss/accuracy curves for the best model...")
plot_stats(best_stat)      # loss + train/val accuracy curves


In [None]:
# --- Cell 3: Evaluate Best Model on Train / Val / Test ---

from p5_utils import show_net_weights

# Make sure best_net exists (from Cell 2)
assert best_net is not None, "Run Cell 2 first to create best_net."

X_train = data_dict["X_train"]
y_train = data_dict["y_train"]
X_val   = data_dict["X_val"]
y_val   = data_dict["y_val"]
X_test  = data_dict["X_test"]
y_test  = data_dict["y_test"]

# Predict
y_train_pred = best_net.predict(X_train)
y_val_pred   = best_net.predict(X_val)
y_test_pred  = best_net.predict(X_test)

# Compute accuracies
train_acc = (y_train_pred == y_train).float().mean().item() * 100
val_acc   = (y_val_pred   == y_val).float().mean().item() * 100
test_acc  = (y_test_pred  == y_test).float().mean().item() * 100

print("=== Best Model Performance ===")
print(f"Train accuracy: {train_acc:.2f}%")
print(f"Val   accuracy: {val_acc:.2f}%")
print(f"Test  accuracy: {test_acc:.2f}%")

# ---- Optional: visualize the first-layer weights ----
print("\nVisualizing first-layer weights...")
show_net_weights(best_net)
