Installing required modules

In [None]:
import sys
import subprocess

def install(pkg):
    subprocess.check_call([sys.executable, "-m", "pip", "install", pkg]);

packages = ["torch", "torchvision"];

for p in packages:
    try:
        __import__(p.split("-")[0]);
    except ImportError:
        print("Installing package:", p);
        install(p);

importing modules and basic setup

In [6]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from data_loader import VideoDataset
from models import ResNet18Temporal
from utils import train_one_epoch, eval_one_epoch, EarlyStopping
from tqdm import trange

Transforms

In [7]:
train_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])


Dataset & loaders

In [11]:
class_map = {"PullUps": 0, "Punch": 1, "PushUps": 2}
num_classes = len(class_map)

dataset_root = "../dataset_info/dataset"

train_ds = VideoDataset("../dataset_info/dataset/splits/train.csv",
                         dataset_root, class_map,
                         num_frames=30, transform=train_tfms, train=True)

val_ds = VideoDataset("../dataset_info/dataset/splits/val.csv",
                       dataset_root, class_map,
                       num_frames=30, transform=val_tfms, train=False)

test_ds = VideoDataset("../dataset_info/dataset/splits/test.csv",
                        dataset_root, class_map,
                        num_frames=30, transform=val_tfms, train=False)

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=4, pin_memory=False)

val_loader   = DataLoader(val_ds, batch_size=4, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=4, shuffle=False)



Model, optimizer, scheduler

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = ResNet18Temporal(num_classes=num_classes, pooling="avg", dropout=0.5).to(device)


criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

early_stop = EarlyStopping(patience=3)


Training loop (with early stopping)

In [13]:


EPOCHS = 12
best_val_acc = 0

for epoch in trange(EPOCHS, desc="Epochs"):

    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion, device)

    val_loss, val_acc = eval_one_epoch(
        model, val_loader, criterion, device)

    scheduler.step()

    print(f"\nEpoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} || "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_resnet18_temporal.pth")

    early_stop(val_loss)
    if early_stop.stop:
        print("Early stopping triggered")
        break

print("Training complete. Best Val Acc:", best_val_acc)



Epochs:   8%|▊         | 1/12 [00:57<10:34, 57.66s/it]


Epoch [1/12] Train Loss: 1.2208 | Train Acc: 0.2915 || Val Loss: 1.1272 | Val Acc: 0.4444


Epochs:  17%|█▋        | 2/12 [01:55<09:38, 57.80s/it]


Epoch [2/12] Train Loss: 1.2002 | Train Acc: 0.3727 || Val Loss: 1.0801 | Val Acc: 0.4667


Epochs:  25%|██▌       | 3/12 [02:53<08:40, 57.84s/it]


Epoch [3/12] Train Loss: 1.1223 | Train Acc: 0.4096 || Val Loss: 1.0252 | Val Acc: 0.5556


Epochs:  33%|███▎      | 4/12 [03:51<07:43, 57.91s/it]


Epoch [4/12] Train Loss: 1.1229 | Train Acc: 0.4059 || Val Loss: 0.9893 | Val Acc: 0.5111


Epochs:  42%|████▏     | 5/12 [04:49<06:46, 58.04s/it]


Epoch [5/12] Train Loss: 1.1007 | Train Acc: 0.4502 || Val Loss: 0.9714 | Val Acc: 0.5333


Epochs:  50%|█████     | 6/12 [05:48<05:48, 58.12s/it]


Epoch [6/12] Train Loss: 1.0545 | Train Acc: 0.4649 || Val Loss: 0.9374 | Val Acc: 0.6667


Epochs:  58%|█████▊    | 7/12 [06:46<04:50, 58.20s/it]


Epoch [7/12] Train Loss: 1.0878 | Train Acc: 0.4133 || Val Loss: 0.9172 | Val Acc: 0.7333


Epochs:  67%|██████▋   | 8/12 [07:44<03:53, 58.31s/it]


Epoch [8/12] Train Loss: 1.0567 | Train Acc: 0.5240 || Val Loss: 0.9109 | Val Acc: 0.7111


Epochs:  75%|███████▌  | 9/12 [08:43<02:55, 58.35s/it]


Epoch [9/12] Train Loss: 1.0460 | Train Acc: 0.4945 || Val Loss: 0.8943 | Val Acc: 0.7333


Epochs:  83%|████████▎ | 10/12 [09:41<01:56, 58.36s/it]


Epoch [10/12] Train Loss: 1.0669 | Train Acc: 0.4576 || Val Loss: 0.8845 | Val Acc: 0.7111


Epochs:  92%|█████████▏| 11/12 [10:40<00:58, 58.36s/it]


Epoch [11/12] Train Loss: 1.0213 | Train Acc: 0.4945 || Val Loss: 0.8708 | Val Acc: 0.7333


Epochs: 100%|██████████| 12/12 [11:38<00:00, 58.22s/it]


Epoch [12/12] Train Loss: 0.9439 | Train Acc: 0.5609 || Val Loss: 0.8635 | Val Acc: 0.7333
Training complete. Best Val Acc: 0.7333333333333333





Loading best model

In [14]:
# Load best saved model
model.load_state_dict(torch.load("best_resnet18_temporal.pth", map_location=device))
model.eval()


ResNet18Temporal(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

Test set evaluation

In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        preds = torch.argmax(out, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(y.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)


Evaluation metrices

In [None]:
print("Test Accuracy:", accuracy_score(all_labels, all_preds))

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=list(class_map.keys())))


Confusion matrix

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=class_map.keys(),
            yticklabels=class_map.keys(),
            cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix – 2D CNN (ResNet-18)")
plt.show()
plt.savefig("../results/confusion_matrices/2d_resnet18_confusion_matrix.png")

