In [34]:
import pandas as pd, torch, torch.nn as nn, torch.nn.functional as F, numpy as np, os, collections
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split



In [35]:
META_DIR = 'data/metadata.csv'
NPY_DIR = 'data/npy'

In [36]:
#Time to manually map phases for each video, this takes a while

PHASE_RANGES = {
    "vid1.mp4": [(0, 125, "start"),
                 (126, 220, "acceleration"),
                 (221, 300+123, "max_velocity"),
                 (300+124, 300+134, "deceleration")],

    "vid2.mp4": [(0, 5, "start"),
                 (6, 96, "acceleration"),
                 (97, 285, "max_velocity"),
                 (286, 290, "deceleration"),

                 (291, 292, "start"),
                 (293, 300+78, "acceleration"),
                 (300+79, 300+279, "max_velocity"),
                 (300+280, 300+281, "deceleration"),

                 (300+282, 600+28, "start"),
                 (600+29, 600+126, "acceleration"),
                 (600+127, 600+290, "max_velocity")],

    "vid3.mp4": [(0, 65, "start"),
                 (66, 121, "acceleration"),
                 (172, 300+70, "max_velocity"),
                 (300+71, 300+75, "deceleration")],

    "vid4.mp4": [(0, 91, "start"),
                 (92, 226, "acceleration"),
                 (227, 300+71, "max_velocity"),

                 (300+84, 300+268, "max_velocity")],

    "vid5.mp4": [(0, 300+118, "max_velocity"),
                 (300+119, 300+181, "deceleration")],
                 
    "vid6.mp4": [(0, 24, "start"),
                 (25, 300+204, "acceleration"),

                 (300+205, 300+232, "start"),
                 (300+233, 900+110, "acceleration")],

    "vid7.mp4": [(33, 101, "start"),
                 (102, 205, "acceleration"),
                 (206, 353, "max_velocity")],
                 
    "vid8.mp4": [(0, 201, "max_velocity")],

    "vid9.mp4": [(0, 357, "max_velocity")],

    "vid10.mp4": [(0, 300+33, "max_velocity")],

    "vid11.mp4": [(0, 59, "start"),
                  (60, 300+187, "acceleration")],
}


In [37]:
PHASE_TO_LABEL = {
    "start": 0,
    "acceleration": 1,
    "max_velocity": 2,
    "deceleration": 3,
    "transition": 4
}

In [38]:
def get_majority_Label(vid_file, s_f, e_f):
    if vid_file not in PHASE_RANGES:
        return 4
    
    ranges = PHASE_RANGES[vid_file]
    frame_votes = []

    for f in range(s_f, e_f):
        found = False
        for p_s, p_e, p_n, in ranges:
            if p_s <= f <= p_e:
                frame_votes.append(PHASE_TO_LABEL[p_n])
                found = True
                break
        if not found:
            frame_votes.append(4)

    return collections.Counter(frame_votes).most_common(1)[0][0]

In [39]:
df = pd.read_csv(META_DIR)
df['label'] = df.apply(lambda x: get_majority_Label(x['video_file'], x['start_frame'], x['end_frame']), axis=1)
df.to_csv(META_DIR, index=False)
df

Unnamed: 0,sample_id,video_file,start_frame,end_frame,label
0,0,vid1.mp4,0,30,0
1,1,vid1.mp4,15,45,0
2,2,vid1.mp4,30,60,0
3,3,vid1.mp4,45,75,0
4,4,vid1.mp4,60,90,0
...,...,...,...,...,...
355,355,vid9.mp4,255,285,2
356,356,vid9.mp4,270,300,2
357,357,vid9.mp4,285,315,2
358,358,vid9.mp4,300,330,2


In [56]:
class PoseDataset(Dataset): #custom dataset class for loading pose data from CSV and NPY files
    def __init__(self, csv_file, npy_dir):
        self.metadata = pd.read_csv(csv_file)
        self.npy_dir = npy_dir

    def __len__(self):
        return len(self.metadata)
    
    def __getitem__(self, index):
        row = self.metadata.iloc[index]
        label = row['label']
        x = np.load(f"{self.npy_dir}/sample{row.sample_id}.npy")
        y = label

        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)
    
# Create dataset objects for train and val splits
class DataFrameDataset(Dataset):
    def __init__(self, dataframe, npy_dir):
        self.dataframe = dataframe
        self.npy_dir = npy_dir
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        label = row['label']
        x = np.load(f"{self.npy_dir}/sample{row['sample_id']}.npy")
        y = label
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)
    

In [57]:
dataset = PoseDataset(META_DIR, NPY_DIR) #initalising dataset
dataloader = DataLoader(dataset, batch_size=32, shuffle=True) #creating dataloader

In [24]:
for batch_id, data in enumerate(dataloader):
    inputs, labels = data
    print(f"Batch {batch_id}:")
    print(f"Inputs shape: {inputs.shape}")
    print(f"Labels shape: {labels.shape}")
    if batch_id == 2:  # just checking each batch
        break

Batch 0:
Inputs shape: torch.Size([32, 30, 132])
Labels shape: torch.Size([32])
Batch 1:
Inputs shape: torch.Size([32, 30, 132])
Labels shape: torch.Size([32])
Batch 2:
Inputs shape: torch.Size([32, 30, 132])
Labels shape: torch.Size([32])


In [59]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
train_dataset = DataFrameDataset(train_df.reset_index(drop=True), NPY_DIR)
val_dataset = DataFrameDataset(val_df.reset_index(drop=True), NPY_DIR)

In [60]:
class PhaseClassifier(nn.Module):
    def __init__(self, num_classes=5, input_channels=132, window_size=30):
        super(PhaseClassifier, self).__init__()
        #Feat extrcttion, 1st conv layer looks at raw coordinates, output channels expands to 64
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)

        #2nd conv layer further extracts features and looks for pattern, output channels expands to 128
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128)

        #3rd conv layer further extracts features, output channels expands to 256
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(256)

        #pooling reduces time dimensionality from 30 -> 15 -> 7 -> 3 by integer division of kernel_size each time
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(p=0.5)

        self.fc_input_dimension= 256 * (window_size // 8)  #after 3 pooling layers, window size reduced by factor of 8

        self.fc1 = nn.Linear(self.fc_input_dimension, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        #x shape: (batch, 30, 132), should be (batch, 132, 30)

        x = x.permute(0,2,1)

        #b1
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)

        #b2
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)

        #b3
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool(x)

        #flatten [batch, features, time] -> [batch, features * time]
        x = x.flatten(start_dim=1)

        #dense layers
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x) # no need for softmax, as CrossEntropyLoss does that

        return x
        

In [65]:
import torch.optim as optim

batch_size = 32
learning_rate = 0.001
epochs = 30

#preparing data for training and validation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_loader = DataLoader(train_df, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_df, batch_size=batch_size, shuffle=False)

#initialise the model
model = PhaseClassifier().to(device)
CELoss = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr = learning_rate)

In [64]:
train_dataset = DataFrameDataset(train_df.reset_index(drop=True), NPY_DIR)
val_dataset = DataFrameDataset(val_df.reset_index(drop=True), NPY_DIR)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

print(f"Training on {device}...")

for epoch in range(epochs):
    model.train() #training phase
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        #zero gradients
        optimiser.zero_grad()
        
        # forward pass
        outputs = model(inputs)
        loss = CELoss(outputs, labels)
        
        # backward pass
        loss.backward()
        optimiser.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # validation phase
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    train_acc = 100 * correct / total
    val_acc = 100 * val_correct / val_total
    
    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

print("Training Complete!")

Training on cpu...
Epoch 1/50 | Loss: 1.0093 | Train Acc: 60.76% | Val Acc: 37.50%
Epoch 2/50 | Loss: 0.7271 | Train Acc: 73.96% | Val Acc: 70.83%
Epoch 3/50 | Loss: 0.5948 | Train Acc: 79.86% | Val Acc: 70.83%
Epoch 4/50 | Loss: 0.4705 | Train Acc: 84.03% | Val Acc: 70.83%
Epoch 5/50 | Loss: 0.3866 | Train Acc: 87.15% | Val Acc: 72.22%
Epoch 6/50 | Loss: 0.4301 | Train Acc: 85.76% | Val Acc: 75.00%
Epoch 7/50 | Loss: 0.3603 | Train Acc: 86.81% | Val Acc: 68.06%
Epoch 8/50 | Loss: 0.3087 | Train Acc: 86.81% | Val Acc: 81.94%
Epoch 9/50 | Loss: 0.3083 | Train Acc: 89.24% | Val Acc: 86.11%
Epoch 10/50 | Loss: 0.2976 | Train Acc: 89.24% | Val Acc: 79.17%
Epoch 11/50 | Loss: 0.2791 | Train Acc: 90.97% | Val Acc: 70.83%
Epoch 12/50 | Loss: 0.3121 | Train Acc: 90.62% | Val Acc: 80.56%
Epoch 13/50 | Loss: 0.2113 | Train Acc: 93.06% | Val Acc: 79.17%
Epoch 14/50 | Loss: 0.2020 | Train Acc: 92.01% | Val Acc: 79.17%
Epoch 15/50 | Loss: 0.1768 | Train Acc: 94.10% | Val Acc: 83.33%
Epoch 16/50 | L

## Next Steps

aiming to boost validation accuracy by getting more video data to extract more features. also gonna be tweaking the learning_rate and dropout values to see if that helps. pretty much gonna be maintaining and iterating on this model over the next 30 days while i build out the website to test everything. once the website is up, can start testing features properly and hopefully refine it based on real data.

In [69]:
torch.save(model.state_dict(), "models/phase_classifier.pth")