In [191]:
# Combine CSV files into a DataSet
import pandas as pd
import numpy as np
import os
import glob
import ast
import torch
from torch.utils.data import Dataset



In [192]:
# Pandas interprets tuples as strings in its cells, so we must convert back in friendly lists
def str_to_tuple(cell_str):
    if(isinstance(cell_str,str)):
        return list(ast.literal_eval(cell_str))
    else:
        return cell_str
    
def load_word_features(folder, vocab):
    X_dim_list, X_2d1_list, X_2d2_list, labels = [], [], [], []

    # Find all engineered features CSVs
    for file in glob.glob(os.path.join(folder, "Engineered_Features_*.csv")):
        # Extract word name (everything after last underscore, before .csv)
        word = os.path.basename(file).split("_")[-1].replace(".csv", "")
        # Construct matching filenames for other features
        file_coords = file.replace("Engineered_Features", "Coordinates")
        file_vel = file.replace("Engineered_Features", "Velocities")
        
        
        # Load CSVs into tensors
        df_dim_csv = pd.read_csv(file)
        df_dim_csv = df_dim_csv.drop(columns=["Unnamed: 0"])
        df_dim = torch.tensor(df_dim_csv.to_numpy(), dtype=torch.float32)
        
        df_2d1_csv = pd.read_csv(file_coords)
        df_2d1_csv = df_2d1_csv.drop(columns=["Unnamed: 0"])
        # print(df_2d1_csv)
        df_2d1_tuple = df_2d1_csv.map(str_to_tuple)
        df_2d1 = np.array(df_2d1_tuple.to_numpy().tolist(), dtype= np.float32) # convert from inferred object type to true list
        
        df_2d2_csv = pd.read_csv(file_vel)
        df_2d2_csv = df_2d2_csv.drop(columns=["Unnamed: 0"])
        df_2d2_tuple = df_2d2_csv.map(str_to_tuple)
        df_2d2 = np.array(df_2d2_tuple.to_numpy().tolist(), dtype= np.float32) # convert from inferred object type to true list
        
        # ✅ pad/truncate each word clip
        # df_dim = pad_or_truncate(df_dim)
        # df_2d1 = pad_or_truncate(df_2d1)
        # df_2d2 = pad_or_truncate(df_2d2)

        # Append to lists
        X_dim_list.append(df_dim)  # Skip first row (header)
        X_2d1_list.append(df_2d1)
        X_2d2_list.append(df_2d2)
        labels.append(word)

    labels = [vocab[word] for word in labels]
    
    # Return a list containing features for each word
    return (X_dim_list), (X_2d1_list), (X_2d2_list), labels



In [193]:
# D Path
# folder = r"C:\Users\User\OneDrive\Documents\Projects\Lip-Reading\notebooks\test_data"
# R Path
folder = r"C:\Projects\Lip_Reading\notebooks\test_data"
grid_vocab = {
    "<pad>": 0,
    "<sos>": 1,
    "<eos>": 2,
    "sp": 3,
    "bin": 4,
    "lay": 5,
    "place": 6,
    "set": 7,
    "blue": 8,
    "green": 9,
    "red": 10,
    "white": 11,
    "at": 12,
    "by": 13,
    "in": 14,
    "with": 15,
    "zero": 16,
    "one": 17,
    "two": 18,
    "three": 19,
    "four": 20,
    "five": 21,
    "six": 22,
    "seven": 23,
    "eight": 24,
    "nine": 25,
    "again": 26,
    "now": 27,
    "please": 28,
    "soon": 29,
    "a": 30,
    "b": 31,
    "c": 32,
    "d": 33,
    "e": 34,
    "f": 35,
    "g": 36,
    "h": 37,
    "i": 38,
    "j": 39,
    "k": 40,
    "l": 41,
    "m": 42,
    "n": 43,
    "o": 44,
    "p": 45,
    "q": 46,
    "r": 47,
    "s": 48,
    "t": 49,
    "u": 50,
    "v": 51,
    "x": 52,
    "y": 53,
    "z": 54
}
X_dim, X_2d1, X_2d2, labels = load_word_features(folder, grid_vocab)



In [194]:
# Encode Labels (Words -> IDs)
words = sorted(set(labels))
vocab = {w: i for i, w in enumerate(words)}
y = np.array([vocab[w] for w in labels])


In [195]:
# Create Pytorch Dataset
class LipReadingWordDataset(Dataset):
    def __init__(self, X_dim, X_2d1, X_2d2, y):
        self.X_dim = X_dim
        self.X_2d1 = X_2d1
        self.X_2d2 = X_2d2
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_dim[idx], self.X_2d1[idx], self.X_2d2[idx], self.y[idx]

# Create a DataLoader (Data conversion from lists into tensors)
from torch.nn.utils.rnn import pad_sequence
def collate_fn(batch):
    X_dim, X_2d1, X_2d2, y = zip(*batch) # unpack getItem data
    
    X_dim_padded = pad_sequence(X_dim, batch_first=True) # outputs 3dim tensor of (batch_size, max_seq_len, feature_size) "True Tensors"
    X_2d1_padded = pad_sequence(X_2d1, batch_first=True)
    X_2d2_padded = pad_sequence(X_2d2, batch_first=True)
    
    y_tensor = torch.tesnor(y, dtype= torch.long) # make labels into tensor as well
    
from torch.utils.data import DataLoader

dataset = LipReadingWordDataset(X_dim, X_2d1, X_2d2, y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)


In [196]:
# Define a simple model
import torch.nn as nn
import torch.nn.functional as F

class LipReadingModel(nn.Module):
    def __init__(self, dim_features, coords_features, vel_features, hidden_size, num_classes):
        super(LipReadingModel, self).__init__()

        input_size = dim_features + coords_features + vel_features
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x_dim, x_2d1, x_2d2):
        # Concatenate features along last dimension
        x = torch.cat([x_dim, x_2d1, x_2d2], dim=2)  # (batch, frames, features)
        _, (h_n, _) = self.lstm(x)  # Use final hidden state
        out = self.fc(h_n[-1])      # (batch, num_classes)
        return out


In [197]:
# Train the model 
import torch.optim as optim

num_classes = len(vocab)
model = LipReadingModel(dim_features=X_dim.shape[2],
                        coords_features=X_2d1.shape[2],
                        vel_features=X_2d2.shape[2],
                        hidden_size=128,
                        num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for x_dim_batch, x2d1_batch, x2d2_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(x_dim_batch, x2d1_batch, x2d2_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


AttributeError: 'list' object has no attribute 'shape'

In [None]:
# Test the model
model.eval()
with torch.no_grad():
    pred = model(x_dim_batch, x2d1_batch, x2d2_batch).argmax(1)
    print(pred)
