In [None]:
import os
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from torch.nn.utils.rnn import pad_sequence

class GlossKeypointDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]

        self.glosses = []
        self.sequences = []

        # Parse each CSV file
        for csv_file in self.csv_files:
            gloss_name = os.path.splitext(csv_file)[0]
            csv_path = os.path.join(data_dir, csv_file)

            try:
                df = pd.read_csv(csv_path)

                # Parse keypoints in each row
                keypoint_sequences = []
                for _, row in df.iterrows():
                    try:
                        keypoints_dict = json.loads(row['keypoints'])
                        keypoints = keypoints_dict['pose']

                        # Validate keypoints
                        if keypoints and isinstance(keypoints, list) and len(keypoints) > 0:
                            # Convert keypoints to flat list of floats
                            keypoints_flat = [float(k) for k in keypoints]
                            if len(keypoints_flat) == output_dim:  # Make sure dimensions match
                                keypoint_sequences.append(keypoints_flat)
                    except (json.JSONDecodeError, KeyError, ValueError) as e:
                        print(f"Error processing row in {csv_file}: {e}")
                        continue

                # Only add sequences if we have valid keypoints
                if len(keypoint_sequences) > 0:
                    self.glosses.append(gloss_name)
                    self.sequences.append(keypoint_sequences)
                else:
                    print(f"Warning: No valid keypoints found in {csv_file}")

            except Exception as e:
                print(f"Error processing file {csv_file}: {e}")
                continue

        if len(self.glosses) == 0:
            raise ValueError("No valid data found in the dataset")

        # Label encode the glosses
        self.label_encoder = LabelEncoder()
        self.encoded_glosses = self.label_encoder.fit_transform(self.glosses)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        gloss = self.encoded_glosses[idx]
        keypoint_sequence = self.sequences[idx]

        # Convert to tensor and ensure proper dimensions
        keypoint_sequence = torch.tensor(keypoint_sequence, dtype=torch.float32)
        return torch.tensor(gloss, dtype=torch.long), keypoint_sequence

def collate_fn(batch):
    # Separate glosses and sequences
    glosses, sequences = zip(*batch)

    # Stack glosses
    glosses = torch.stack(glosses)

    # Get sequence lengths
    lengths = [seq.size(0) for seq in sequences]

    # Pad sequences
    if all(len(seq) > 0 for seq in sequences):
        padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0.0)
    else:
        raise ValueError("Found empty sequence in batch")

    return glosses, padded_sequences, torch.tensor(lengths)

class LSTMGlossToKeypointModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, gloss, seq_lengths):
        # Embedding layer
        embedded = self.embedding(gloss).unsqueeze(1)  # [batch_size, 1, embedding_dim]

        # LSTM layer
        lstm_out, _ = self.lstm(embedded)  # [batch_size, 1, hidden_dim]

        # Get final hidden state
        final_hidden = lstm_out.squeeze(1)  # [batch_size, hidden_dim]

        # Output layer
        output = self.fc(final_hidden)  # [batch_size, output_dim]
        return output

# Parameters
data_dir = '/content/drive/MyDrive/gloss_selected'
embedding_dim = 128
hidden_dim = 256
output_dim = 33  # Ensure this matches your keypoint dimension
batch_size = 16
learning_rate = 0.001
num_epochs = 50

# Create dataset and dataloader
try:
    dataset = GlossKeypointDataset(data_dir)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_fn,
        drop_last=True  # Drop incomplete last batch
    )
except Exception as e:
    print(f"Error creating dataset: {e}")
    raise

# Initialize model
vocab_size = len(dataset.label_encoder.classes_)
model = LSTMGlossToKeypointModel(vocab_size, embedding_dim, hidden_dim, output_dim)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    batch_count = 0

    try:
        for batch_idx, (gloss, keypoint_seq, lengths) in enumerate(dataloader):
            # Move tensors to device
            gloss = gloss.to(device)
            keypoint_seq = keypoint_seq.to(device)

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass
            predictions = model(gloss, lengths)

            # Compute loss (using first frame of sequence as target)
            loss = criterion(predictions, keypoint_seq[:, 0, :])

            # Backward pass
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            batch_count += 1

            if batch_idx % 10 == 0:
                print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}, Loss: {loss.item():.4f}")

    except Exception as e:
        print(f"Error during training: {e}")
        continue

    avg_loss = total_loss / batch_count if batch_count > 0 else float('inf')
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

# Save the trained model
try:
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': num_epochs,
        'vocab_size': vocab_size,
        'embedding_dim': embedding_dim,
        'hidden_dim': hidden_dim,
        'output_dim': output_dim
    }, 'lstm_gloss_to_keypoint_model.pth')
except Exception as e:
    print(f"Error saving model: {e}")

Error processing file improve.csv: float() argument must be a string or a real number, not 'list'
Error processing file spit.csv: float() argument must be a string or a real number, not 'list'
Error processing file fold.csv: float() argument must be a string or a real number, not 'list'
Error processing file discuss.csv: float() argument must be a string or a real number, not 'list'
Error processing file include.csv: float() argument must be a string or a real number, not 'list'
Error processing file spoon.csv: float() argument must be a string or a real number, not 'list'
Error processing file rough.csv: float() argument must be a string or a real number, not 'list'
Error processing file plan.csv: float() argument must be a string or a real number, not 'list'
Error processing file rubber.csv: float() argument must be a string or a real number, not 'list'
Error processing file bee.csv: float() argument must be a string or a real number, not 'list'
Error processing file increase.csv: fl

ValueError: No valid data found in the dataset

In [None]:
import cv2
import numpy as np

# Function to load the model
def load_model(model_path, vocab_size, embedding_dim, hidden_dim, num_keypoints):
    model = LSTMGlossToKeypointModel(vocab_size, embedding_dim, hidden_dim, num_keypoints)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Function to visualize keypoints using OpenCV
def visualize_keypoints(keypoints, frame_size=(500, 500)):
    # Create a blank white background
    canvas = np.ones((frame_size[0], frame_size[1], 3), dtype=np.uint8) * 255

    # Scale keypoints to fit the canvas
    scale_factor = frame_size[0] // 2

    # Convert the keypoints to pixel coordinates
    for frame in keypoints:
        for i, point in enumerate(frame):
            x, y = int(point[0] * scale_factor + frame_size[1] // 2), int(point[1] * scale_factor + frame_size[0] // 2)
            cv2.circle(canvas, (x, y), 5, (0, 0, 255), -1)

    # Show the result
    cv2.imshow("Keypoint Visualization", canvas)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Load the saved model
vocab_size = len(dataset.label_encoder.classes_)
model = load_model('lstm_gloss_to_keypoint_model.pth', vocab_size, embedding_dim, hidden_dim, num_keypoints)

# Input a gloss ID for testing
test_gloss_id = torch.tensor([dataset.label_encoder.transform(['test_gloss'])[0]], dtype=torch.long)

# Sequence length (for demonstration purposes, assume 100)
seq_length = torch.tensor([100])

# Predict the keypoints for the test gloss
with torch.no_grad():
    predicted_keypoints = model(test_gloss_id, seq_length)

# Visualize the predicted keypoints
predicted_keypoints = predicted_keypoints.view(-1, num_keypoints, 3).numpy()  # Reshape for visualization
visualize_keypoints(predicted_keypoints)


AttributeError: 'list' object has no attribute 'get'

## Thats what a real goat would do


In [None]:
pip install numpy pandas tensorflow matplotlib



In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split

def load_data(data_folder):
    data = []
    for file in os.listdir(data_folder):
        if file.endswith('_keypoints.csv'):
            df = pd.read_csv(os.path.join(data_folder, file))
            for _, row in df.iterrows():
                keypoints = json.loads(row['keypoints'])
                data.append(keypoints)
    return data

data_folder = r"/content/drive/MyDrive/gloss_selected"
keypoint_data = load_data(data_folder)

In [None]:
def preprocess_data(keypoint_data, seq_length=30):
    sequences = []
    for keypoints in keypoint_data:
        # Flatten and normalize keypoints
        flat_keypoints = np.array(keypoints).flatten()
        flat_keypoints /= np.max(flat_keypoints)  # Normalize if necessary

        # Create sequences of a fixed length
        for i in range(len(flat_keypoints) - seq_length):
            sequences.append(flat_keypoints[i:i + seq_length])

    return np.array(sequences)

sequences = preprocess_data(keypoint_data)
X, y = sequences[:-1], sequences[1:]  # Predict next frame based on previous frames

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

def build_model(input_shape):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(input_shape[1], activation='linear'))  # Output layer with linear activation

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

input_shape = (X.shape[1], X.shape[2])  # (sequence_length, num_features)
model = build_model(input_shape)

IndexError: tuple index out of range

In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def load_gloss_pose_data(csv_directory):
    data = []
    labels = []

    # Iterate through all files in the directory
    for file_name in os.listdir(csv_directory):
        if file_name.endswith(".csv"):
            gloss_word = file_name.split('.')[0]  # Extract gloss word from filename (without extension)
            csv_path = os.path.join(csv_directory, file_name)

            try:
                # Read CSV file containing pose points
                df = pd.read_csv(csv_path)

                # Extract pose points (assuming all columns after the first one are pose points)
                pose_points = df.iloc[:, 1:].values.tolist()  # Convert to list for padding

                # Append the pose points and gloss word to the dataset
                data.append(pose_points)
                labels.append(gloss_word)
            except Exception as e:
                print(f"Error reading {file_name}: {e}")

    # Pad sequences to ensure uniform length
    max_length = max(len(p) for p in data)  # Find the max length of pose sequences
    padded_data = pad_sequences(data, maxlen=max_length, padding='post', dtype='float32')

    return np.array(padded_data), np.array(labels)

# Path to the directory containing the CSV files
csv_directory = "/content/drive/MyDrive/gloss_selected/"

# Load the data
pose_data, gloss_words = load_gloss_pose_data(csv_directory)
print(f"Loaded {len(gloss_words)} gloss words with their corresponding pose points.")


ValueError: could not convert string to float: '{"pose": [[0.5068119764328003, 0.2505396604537964, -0.7745587825775146], [0.517985463142395, 0.21460002660751343, -0.7370471954345703], [0.5256759524345398, 0.21422415971755981, -0.7371422648429871], [0.5333788990974426, 0.21460479497909546, -0.7371224761009216], [0.49168792366981506, 0.2193293571472168, -0.7339509725570679], [0.4814605116844177, 0.22154128551483154, -0.7340691089630127], [0.4723874628543854, 0.22400003671646118, -0.7344253063201904], [0.5447446703910828, 0.23342502117156982, -0.4648835361003876], [0.45668351650238037, 0.24424046277999878, -0.4441893398761749], [0.5234630703926086, 0.2895978093147278, -0.6691169142723083], [0.48945653438568115, 0.29372578859329224, -0.6642863154411316], [0.6267673373222351, 0.45579758286476135, -0.3205034136772156], [0.3758520483970642, 0.45019102096557617, -0.23195897042751312], [0.6451781392097473, 0.7267289757728577, -0.33913204073905945], [0.2912769913673401, 0.7027644515037537, -0.4259045422077179], [0.5183265805244446, 0.8374385833740234, -0.5693448781967163], [0.4124425947666168, 0.7239401340484619, -0.9169824123382568], [0.4761546552181244, 0.897973358631134, -0.6533198952674866], [0.45094573497772217, 0.7644388675689697, -1.0192431211471558], [0.4599469006061554, 0.8718729019165039, -0.6509089469909668], [0.47186845541000366, 0.7213166356086731, -1.0079882144927979], [0.4779302179813385, 0.8461194038391113, -0.5687683820724487], [0.45987871289253235, 0.7056257724761963, -0.9200266599655151], [0.5593220591545105, 0.9555050730705261, -0.04824531078338623], [0.4140048623085022, 0.9543834328651428, 0.04975584149360657], [0.5552815198898315, 1.3310856819152832, 0.0345701165497303], [0.399272620677948, 1.3182746171951294, 0.27765902876853943], [0.5553569197654724, 1.627111792564392, 0.6650064587593079], [0.4069681167602539, 1.6202483177185059, 0.8485192656517029], [0.5591808557510376, 1.6786953210830688, 0.7117526531219482], [0.406404972076416, 1.675075888633728, 0.9000666737556458], [0.533635139465332, 1.7516813278198242, 0.40300866961479187], [0.42291703820228577, 1.7381709814071655, 0.5773932933807373]], "hands": [[[0.4088708162307739, 0.7379905581474304, 1.893765499971778e-07], [0.4220694899559021, 0.692511796951294, -0.013070297427475452], [0.45088157057762146, 0.6750643253326416, -0.018809150904417038], [0.47922489047050476, 0.6856014132499695, -0.021392732858657837], [0.4960877001285553, 0.7045249938964844, -0.023432115092873573], [0.4609639048576355, 0.719141960144043, -0.02009950391948223], [0.4916229546070099, 0.7324444651603699, -0.027570126578211784], [0.5117886066436768, 0.7414465546607971, -0.0321592278778553], [0.527121901512146, 0.7486773133277893, -0.03470839560031891], [0.46243709325790405, 0.7445027232170105, -0.016578393056988716], [0.4967189133167267, 0.7532432079315186, -0.021993303671479225], [0.5201101899147034, 0.759464681148529, -0.024810101836919785], [0.5383497476577759, 0.7651221752166748, -0.026916222646832466], [0.463125079870224, 0.761499285697937, -0.01355151366442442], [0.4955325126647949, 0.7668308615684509, -0.018683092668652534], [0.5173864364624023, 0.7705451250076294, -0.02128615416586399], [0.5340827107429504, 0.7740864753723145, -0.02273222804069519], [0.4635210931301117, 0.7717870473861694, -0.011378428898751736], [0.4906700551509857, 0.7766295671463013, -0.01617271639406681], [0.5077329874038696, 0.780029833316803, -0.017936794087290764], [0.5208514928817749, 0.7831579446792603, -0.018669195473194122]], [[0.49400466680526733, 0.8257044553756714, -1.7364773441386205e-07], [0.470843106508255, 0.8376078605651855, -0.00044046383118256927], [0.45175909996032715, 0.8758535981178284, -0.00661203358322382], [0.45006921887397766, 0.9191014766693115, -0.013417787849903107], [0.45105496048927307, 0.9468843340873718, -0.01981842890381813], [0.43143904209136963, 0.8648654222488403, -0.012922701425850391], [0.43359532952308655, 0.9300887584686279, -0.026163019239902496], [0.4448965787887573, 0.9223579168319702, -0.03246750310063362], [0.446000337600708, 0.9036462306976318, -0.035594385117292404], [0.4472540616989136, 0.8638396859169006, -0.018868250772356987], [0.4517209231853485, 0.9348196387290955, -0.029987873509526253], [0.46039631962776184, 0.9211486577987671, -0.030570561066269875], [0.46061500906944275, 0.9019266963005066, -0.030689429491758347], [0.4664393365383148, 0.8664460182189941, -0.024948783218860626], [0.47094088792800903, 0.9328449964523315, -0.0345429852604866], [0.47825029492378235, 0.9163010120391846, -0.029158884659409523], [0.47676363587379456, 0.8960199952125549, -0.025019019842147827], [0.48676541447639465, 0.8709841966629028, -0.03084700182080269], [0.4895126521587372, 0.9238948822021484, -0.03836595267057419], [0.4937610328197479, 0.908560037612915, -0.034558892250061035], [0.4926527440547943, 0.8879593014717102, -0.03073567897081375]]]}'

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import json
import os
from tqdm import tqdm
import pickle

# Mount Google Drive (uncomment if running in Colab)
# from google.colab import drive
# drive.mount('/content/drive')

class GRUSeq2Seq(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUSeq2Seq, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.encoder = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.decoder = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, target_len):
        # Ensure input is 3D: (batch_size, sequence_length, input_size)
        if x.dim() == 2:
            x = x.unsqueeze(1)

        _, hidden = self.encoder(x)

        outputs = []
        input = torch.zeros(x.size(0), 1, self.hidden_size).to(x.device)

        for _ in range(target_len):
            output, hidden = self.decoder(input, hidden)
            output = self.fc(output)
            outputs.append(output)
            input = output

        return torch.cat(outputs, dim=1)

class SignLanguageDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def load_data(folder_path):
    all_glosses = []
    all_keypoints = []

    for filename in tqdm(os.listdir(folder_path), desc="Loading CSV files"):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            data = pd.read_csv(file_path)

            gloss = os.path.splitext(filename)[0]
            all_glosses.append(gloss)

            keypoints = json.loads(data['keypoints'].iloc[0])
            all_keypoints.append(keypoints)

    label_encoder = LabelEncoder()
    encoded_glosses = label_encoder.fit_transform(all_glosses)

    keypoints = np.array([np.array(kp['pose']).flatten() for kp in all_keypoints])

    return encoded_glosses, keypoints, label_encoder

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.train()
    for epoch in range(num_epochs):
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_x, batch_y.size(1))
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x, batch_y.size(1))
                val_loss += criterion(outputs, batch_y).item()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss/len(val_loader):.4f}')
        model.train()

def main():
    hidden_size = 128
    num_layers = 2
    batch_size = 32
    num_epochs = 50
    learning_rate = 0.001

    folder_path = '/content/drive/MyDrive/gloss_selected'  # Replace with your folder path
    save_path = '/content/drive/MyDrive/gloss_selected'

    # Create save directory if it doesn't exist
    os.makedirs(save_path, exist_ok=True)

    glosses, keypoints, label_encoder = load_data(folder_path)

    # Use the actual input size from the data
    input_size = keypoints.shape[1]
    output_size = keypoints.shape[1]

    # Convert glosses to one-hot encoding
    num_classes = len(label_encoder.classes_)
    glosses_one_hot = np.eye(num_classes)[glosses]

    X_train, X_test, y_train, y_test = train_test_split(glosses_one_hot, keypoints, test_size=0.2, random_state=42)

    train_dataset = SignLanguageDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
    test_dataset = SignLanguageDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Modify this line to correctly reflect the number of classes as the input size
    model = GRUSeq2Seq(num_classes, hidden_size, output_size, num_layers)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device)

    # Save the model
    model_save_path = os.path.join(save_path, 'sign_language_gru_model.pth')
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved successfully at {model_save_path}")

    # Save the label encoder
    encoder_save_path = os.path.join(save_path, 'label_encoder.pkl')
    with open(encoder_save_path, 'wb') as f:
        pickle.dump(label_encoder, f)
    print(f"Label encoder saved successfully at {encoder_save_path}")

if __name__ == "__main__":
    main()


Loading CSV files: 100%|██████████| 2029/2029 [00:28<00:00, 70.99it/s] 


RuntimeError: input.size(-1) must be equal to input_size. Expected 128, got 99

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import json
import os
from tqdm import tqdm
from torch.nn.utils.rnn import pad_sequence
import glob

class GlossToKeypointsDataset:
    def __init__(self, data_dir):
        """
        data_dir: Directory containing CSV files named as 'gloss.csv'
        """
        self.data_dir = data_dir
        # Initialize BERT tokenizer for word embeddings
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # Load and process all data
        self.gloss_to_sequences = {}
        self.load_all_data()

    def load_all_data(self):
        """Load all CSV files and organize data by gloss"""
        print("Loading data from CSV files...")
        csv_files = glob.glob(os.path.join(self.data_dir, "*.csv"))

        for csv_file in tqdm(csv_files):
            # Get gloss from filename
            gloss = os.path.basename(csv_file).replace('.csv', '')

            try:
                # Read keypoints from CSV
                df = pd.read_csv(csv_file)
                sequences = []

                for _, row in df.iterrows():
                    keypoints = json.loads(row['keypoints'])['pose']
                    # Flatten the 3D keypoints array
                    flat_keypoints = np.array(keypoints).flatten()
                    sequences.append(flat_keypoints)

                if sequences:  # Only add if we got valid sequences
                    self.gloss_to_sequences[gloss] = np.array(sequences)

            except Exception as e:
                print(f"Error processing {gloss}: {str(e)}")

        print(f"Successfully loaded {len(self.gloss_to_sequences)} gloss sequences")

    def get_word_embedding(self, word):
        """Get BERT embedding for a word"""
        with torch.no_grad():
            inputs = self.tokenizer(word, return_tensors="pt", padding=True, truncation=True)
            outputs = self.bert(**inputs)
            # Use the [CLS] token embedding as word representation
            return outputs.last_hidden_state[:, 0, :].squeeze(0)

    def prepare_data_loader(self, batch_size=32, train_split=0.8):
        """Prepare train and validation data loaders"""
        # Prepare dataset entries
        all_glosses = list(self.gloss_to_sequences.keys())
        dataset_entries = []

        for gloss in all_glosses:
            word_embedding = self.get_word_embedding(gloss)
            keypoint_sequence = torch.FloatTensor(self.gloss_to_sequences[gloss])
            dataset_entries.append((word_embedding, keypoint_sequence))

        # Split into train and validation
        train_size = int(len(dataset_entries) * train_split)
        train_entries = dataset_entries[:train_size]
        val_entries = dataset_entries[train_size:]

        # Create data loaders
        train_loader = DataLoader(train_entries, batch_size=batch_size,
                                shuffle=True, collate_fn=self.collate_fn)
        val_loader = DataLoader(val_entries, batch_size=batch_size,
                              shuffle=False, collate_fn=self.collate_fn)

        return train_loader, val_loader

    @staticmethod
    def collate_fn(batch):
        """Custom collate function to handle variable length sequences"""
        word_embeddings, sequences = zip(*batch)
        word_embeddings = torch.stack(word_embeddings)
        # Pad sequences to same length
        sequences = pad_sequence(sequences, batch_first=True)
        return word_embeddings, sequences

class KeypointGenerator(nn.Module):
    def __init__(self, embedding_dim=768, hidden_dim=512, pose_dim=96):
        super().__init__()

        self.sequence_generator = nn.Sequential(
            # Process word embedding
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),

            # Hidden layers
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),

            # Output layer for pose dimension
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

        # LSTM for sequence generation
        self.lstm = nn.LSTM(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=2,
            dropout=0.2,
            batch_first=True,
            bidirectional=True
        )

        # Final pose generator
        self.pose_generator = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, pose_dim)
        )

    def forward(self, word_embedding, seq_length):
        batch_size = word_embedding.size(0)

        # Generate initial sequence representation
        sequence_features = self.sequence_generator(word_embedding)

        # Repeat for desired sequence length
        sequence_features = sequence_features.unsqueeze(1).repeat(1, seq_length, 1)

        # Generate sequence through LSTM
        lstm_out, _ = self.lstm(sequence_features)

        # Generate final pose sequence
        poses = self.pose_generator(lstm_out)

        return poses

def train_model(model, train_loader, val_loader, num_epochs=100):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.ReduceLROnPlateau(optimizer, mode='min', patience=5)

    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0

        for word_embeddings, target_sequences in train_loader:
            word_embeddings = word_embeddings.to(device)
            target_sequences = target_sequences.to(device)

            optimizer.zero_grad()

            # Generate poses
            pred_sequences = model(word_embeddings, target_sequences.size(1))
            loss = criterion(pred_sequences, target_sequences)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for word_embeddings, target_sequences in val_loader:
                word_embeddings = word_embeddings.to(device)
                target_sequences = target_sequences.to(device)

                pred_sequences = model(word_embeddings, target_sequences.size(1))
                loss = criterion(pred_sequences, target_sequences)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)
        scheduler.step(avg_val_loss)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_keypoint_generator.pth')

def generate_for_new_gloss(model, dataset, gloss, seq_length=30):
    """Generate keypoint sequence for any gloss word"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()

    # Get word embedding for the new gloss
    word_embedding = dataset.get_word_embedding(gloss)
    word_embedding = word_embedding.unsqueeze(0).to(device)

    # Generate sequence
    with torch.no_grad():
        generated_sequence = model(word_embedding, seq_length)

    return generated_sequence.cpu().numpy()

def main():
    # Initialize dataset
    dataset = GlossToKeypointsDataset("/content/drive/MyDrive/gloss_selected")

    # Get data loaders
    train_loader, val_loader = dataset.prepare_data_loader(batch_size=32)

    # Initialize and train model
    model = KeypointGenerator()
    train_model(model, train_loader, val_loader)

    # Example: Generate sequence for a new gloss
    new_gloss = "hello"  # Can be any word, even if not in training set
    generated_sequence = generate_for_new_gloss(model, dataset, new_gloss)
    print(f"Generated sequence shape: {generated_sequence.shape}")

if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Loading data from CSV files...


100%|██████████| 2021/2021 [03:39<00:00,  9.19it/s]


Successfully loaded 2021 gloss sequences


AttributeError: module 'torch.optim' has no attribute 'ReduceLROnPlateau'