In [144]:
def pad_or_truncate(seq, max_len=8):
    seq = np.array(seq)
    seq_len, feature_dim = seq.shape
    if seq_len < max_len:
        pad = np.zeros((max_len - seq_len, feature_dim))
        return np.vstack([seq, pad])
    else:
        return seq[:max_len]


In [145]:
# Combine CSV files into a DataSet
import pandas as pd
import numpy as np
import os
import glob

def load_word_features(folder, max_len=8):
    X_dim_list, X_2d1_list, X_2d2_list, labels = [], [], [], []

    # Find all engineered features CSVs
    for file in glob.glob(os.path.join(folder, "Engineered_Features_*.csv")):

        # Extract word name (everything after last underscore, before .csv)
        word = os.path.basename(file).split("_")[-1].replace(".csv", "")

        # Construct matching filenames for other features
        file_coords = file.replace("Engineered_Features", "Coordinates")
        file_vel = file.replace("Engineered_Features", "Velocities")


        # Load CSVs into numpy arrays
        df_dim = pd.read_csv(file).to_numpy()
        df_2d1 = pd.read_csv(file_coords).to_numpy()
        df_2d2 = pd.read_csv(file_vel).to_numpy()

        # ✅ pad/truncate each word clip
        df_dim = pad_or_truncate(df_dim, max_len)
        df_2d1 = pad_or_truncate(df_2d1, max_len)
        df_2d2 = pad_or_truncate(df_2d2, max_len)

        # print(pd.read_csv(df_dim))
        a = pd.DataFrame(df_dim)
        a.to_csv("C:\\Users\\User\\OneDrive\\Documents\\Projects\\Lip-Reading\\notebooks\\A.csv")

        # Append to lists
        X_dim_list.append(df_dim[1:])  # Skip first row (header)
        X_2d1_list.append(df_2d1[1:])
        X_2d2_list.append(df_2d2[1:])
        labels.append(word)

    # Convert lists to numpy arrays

    print(X_dim_list)
    
    return np.array(X_dim_list), np.array(X_2d1_list), np.array(X_2d2_list), labels



In [146]:
folder = r"C:\Users\User\OneDrive\Documents\Projects\Lip-Reading\notebooks\test_data"


X_dim, X_2d1, X_2d2, labels = load_word_features(folder)



[array([[1.        , 1.        , 0.03277248, 0.05990243, 0.04532177,
        1.37353454, 0.2104676 , 0.08410234, 0.03822768, 0.09364836],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]]), array([[ 1.00000000e+00,  1.00000000e+

In [147]:
# Encode Labels (Words -> IDs)
words = sorted(set(labels))
vocab = {w: i for i, w in enumerate(words)}
y = np.array([vocab[w] for w in labels])


In [148]:
# Create Pytorch Dataset
import torch
from torch.utils.data import Dataset

class LipReadingWordDataset(Dataset):
    def __init__(self, X_dim, X_2d1, X_2d2, y):
        self.X_dim = torch.tensor(X_dim, dtype=torch.float32)
        self.X_2d1 = torch.tensor(X_2d1, dtype=torch.float32)
        self.X_2d2 = torch.tensor(X_2d2, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_dim[idx], self.X_2d1[idx], self.X_2d2[idx], self.y[idx]

# Create a DataLoader
from torch.utils.data import DataLoader
dataset = LipReadingWordDataset(X_dim, X_2d1, X_2d2, y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [None]:
# Define a simple model
import torch.nn as nn
import torch.nn.functional as F

class LipReadingModel(nn.Module):
    def __init__(self, dim_features, coords_features, vel_features, hidden_size, num_classes):
        super(LipReadingModel, self).__init__()

        input_size = dim_features + coords_features + vel_features
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x_dim, x_2d1, x_2d2):
        # Concatenate features along last dimension
        x = torch.cat([x_dim, x_2d1, x_2d2], dim=2)  # (batch, frames, features)
        _, (h_n, _) = self.lstm(x)  # Use final hidden state
        out = self.fc(h_n[-1])      # (batch, num_classes)
        return out


In [None]:
# Train the model 
import torch.optim as optim

num_classes = len(vocab)
model = LipReadingModel(dim_features=X_dim.shape[2],
                        coords_features=X_2d1.shape[2],
                        vel_features=X_2d2.shape[2],
                        hidden_size=128,
                        num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for x_dim_batch, x2d1_batch, x2d2_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(x_dim_batch, x2d1_batch, x2d2_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


In [None]:
# Test the model
model.eval()
with torch.no_grad():
    pred = model(x_dim_batch, x2d1_batch, x2d2_batch).argmax(1)
    print(pred)
