In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
def get_dataloaders(batch_size=128, num_workers=4):
    """
    TODO:
      - Compose transforms (Resize→32, ToTensor, Normalize)
      - Load CIFAR10 train & test datasets with torchvision.datasets.CIFAR10
      - Return train_loader and test_loader DataLoader objects
    """
    raise NotImplementedError

In [3]:
def build_model(device):
    """
    TODO:
      - Instantiate torchvision.models.resnet18(pretrained=False)
      - Replace final fc layer with nn.Linear(in_features, 10)
      - Move model to `device` and return it
    """
    raise NotImplementedError


In [4]:
def train_step(model, data, target, optimizer, criterion):
    """
    TODO:
      - Zero grads, do forward(model(data)), compute loss,
        backward(), step optimizer
      - Return the loss value (item)
    """
    raise NotImplementedError

In [5]:
def inference_step(model, data):
    """
    TODO:
      - Run a forward pass under torch.no_grad()
    """
    raise NotImplementedError

In [None]:
def start():
    # 1) Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 2) Load data
    train_loader, test_loader = get_dataloaders(batch_size=128, num_workers=4)

    # 3) Build model + optimizer + loss
    model     = build_model(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    # 4) Warm up on one batch
    data, target = next(iter(train_loader))
    data, target = data.to(device), target.to(device)
    train_step(model, data, target, optimizer, criterion)
    inference_step(model, data)

    # 5) Prepare profiler logdir
    logdir = "./profiler_logs"
    os.makedirs(logdir, exist_ok=True)

    # 6) Profile exactly one train + one inference
    with profile(
        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
        record_shapes=True,
        profile_memory=True,
        with_stack=True,
        on_trace_ready=torch.profiler.tensorboard_trace_handler(logdir)
    ) as prof:
        with record_function("train_batch"):
            train_step(model, data, target, optimizer, criterion)
        with record_function("inference_batch"):
            inference_step(model, data)

    # 7) Print top‐3 CPU ops
    print("\n=== Top 3 ops by CPU self time ===")
    print(prof.key_averages().table(
        sort_by="self_cpu_time_total", row_limit=3
    ))

    # 8) Print top‐3 CUDA ops
    print("\n=== Top 3 ops by CUDA self time ===")
    print(prof.key_averages().table(
        sort_by="self_cuda_time_total", row_limit=3
    ))

    print(f"\nTrace files written to: {logdir}")
    print("Run `tensorboard --logdir profiler_logs` and open the Profile dashboard.")

In [None]:
start()