Installing required modules

In [None]:
import sys
import subprocess

def install(pkg):
    subprocess.check_call([sys.executable, "-m", "pip", "install", pkg]);

packages = ["torch", "torchvision", "seaborn", "psutil"];

for p in packages:
    try:
        __import__(p.split("-")[0]);
    except ImportError:
        print("Installing package:", p);
        install(p);

importing modules and basic setup

In [None]:
import os;
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from data_loader import VideoDataset
from models import ResNet18Temporal
from utils import train_one_epoch, eval_one_epoch, EarlyStopping
from tqdm import trange
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import time
import random
import psutil


Reproducibility and Random Seed Initialization

In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Creating required directories

In [None]:

os.makedirs("../dataset_info/sample_frames", exist_ok=True);
os.makedirs("../results", exist_ok=True);
os.makedirs("../results/confusion_matrices", exist_ok=True);
os.makedirs("../results/performance_plots", exist_ok=True);
os.makedirs("../results/feature_visualizations", exist_ok=True);
os.makedirs("../results/saved_models", exist_ok=True);
os.makedirs("../results/saved_feature_matrices", exist_ok=True);

Transforms

In [None]:
train_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])


Dataset & loaders

In [None]:
class_map = {"PullUps": 0, "Punch": 1, "PushUps": 2}
num_classes = len(class_map)

dataset_root = "../dataset_info/dataset"

train_ds = VideoDataset("../dataset_info/dataset/splits/train.csv",
                         dataset_root, class_map,
                         num_frames=18, transform=train_tfms, train=True)

val_ds = VideoDataset("../dataset_info/dataset/splits/val.csv",
                       dataset_root, class_map,
                       num_frames=18, transform=val_tfms, train=False)

test_ds = VideoDataset("../dataset_info/dataset/splits/test.csv",
                        dataset_root, class_map,
                        num_frames=18, transform=val_tfms, train=False)

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=4, pin_memory=False)

val_loader   = DataLoader(val_ds, batch_size=4, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=4, shuffle=False)



Model, optimizer, scheduler

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = ResNet18Temporal(num_classes=num_classes, pooling="avg", dropout=0.5).to(device)


criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.95)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

early_stop = EarlyStopping(patience=3)


Training loop (with early stopping)

In [None]:
EPOCHS = 12
best_val_acc = 0

train_losses = []
val_losses = []
train_accs = []
val_accs = []

for epoch in trange(EPOCHS, desc="Epochs"):

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion, device)

    val_loss, val_acc = eval_one_epoch(
        model, val_loader, criterion, device)

    scheduler.step()

    print(f"\nEpoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} || "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_resnet18_temporal.pth")

    early_stop(val_loss)
    if early_stop.stop:
        print("Early stopping triggered")
        break

print("Training complete. Best Val Acc:", best_val_acc)


Loading best model

In [None]:
# Load best saved model
model.load_state_dict(torch.load("best_resnet18_temporal.pth", map_location=device))
model.eval()

print("Model loaded successfully")
print("Backbone: ResNet-18 (ImageNet pretrained)")
print("Input: fixed-length RGB frame sequences (224x224)")
print("Temporal aggregation: Temporal pooling")
print("Classifier head: Dropout → Fully Connected (3 classes)")

print("Trainable parameters:",
      sum(p.numel() for p in model.parameters() if p.requires_grad))

print("Total parameters:",
      sum(p.numel() for p in model.parameters()))



Test set evaluation

In [None]:
all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        preds = torch.argmax(out, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(y.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)


Evaluation metrices

In [None]:
print("Test Accuracy:", accuracy_score(all_labels, all_preds))

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=list(class_map.keys())))

Confusion matrix

In [None]:
conf_matrix = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=class_map.keys(),
            yticklabels=class_map.keys(),
            cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix – 2D CNN (ResNet-18)")
plt.show()
plt.savefig("../results/confusion_matrices/2d_resnet18_confusion_matrix.png")

Learning curves

In [None]:
plt.figure()
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curve – 2D CNN")
plt.legend()
plt.show()

plt.figure()
plt.plot(train_accs, label="Train Accuracy")
plt.plot(val_accs, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curve – 2D CNN")
plt.legend()
plt.show()


Inference time (computational analysis)

In [None]:
x, y = next(iter(test_loader))
x = x.to(device)

start = time.time()
with torch.no_grad():
    _ = model(x)
end = time.time()

print("Inference time per batch:", end-start)
print("Approx inference time per video:", (end-start)/x.size(0))


Model size (parameter count)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Total parameters:", total_params)
print("Trainable parameters:", trainable_params)


Error analysis

In [None]:
wrong_idx = np.where(all_preds != all_labels)[0]
print("Wrong predictions:", len(wrong_idx))

if len(wrong_idx) > 0:
    for i in wrong_idx[:5]:

        row = test_ds.data.iloc[i]   # CSV row
        video_path = row["clip_path"]

        true_label = list(class_map.keys())[all_labels[i]]
        pred_label = list(class_map.keys())[all_preds[i]]

        print("Video:", video_path)
        print("True:", true_label, "| Pred:", pred_label)
        print("-"*50)


File size on disk

In [None]:
model_path = "best_resnet18_temporal.pth"
file_size_mb = os.path.getsize(model_path) / (1024 * 1024)

print("Saved model file size: {:.2f} MB".format(file_size_mb))


GPU memory usage

In [None]:
if torch.cuda.is_available():
    print("GPU memory allocated:",
          torch.cuda.memory_allocated() / (1024**2), "MB")
    print("GPU memory reserved:",
          torch.cuda.memory_reserved() / (1024**2), "MB")


CPU RAM usage

In [None]:
process = psutil.Process(os.getpid())
ram_mb = process.memory_info().rss / (1024 * 1024)

print("Current RAM usage:", ram_mb, "MB")
