In [1]:
import sys
sys.path.append("d:\\SMU\\ml&applns")

import pandas as pd
import numpy as np
import tensorflow as tf
from os import listdir
from os.path import isfile, join
from scipy.stats import skew, kurtosis, entropy




In [2]:
def get_row_id(row):
    return str(row.frame) + "-" + row.type + "-" + str(row.landmark_index)

def duplicate_vals(pf, values):
    offset = 0
    for val in np.unique(values):
        cnt = sum(values==val)
        if offset < cnt:
            offset = cnt
    offset += 1
    
    pf.frame *= offset
    values *= offset

    duplicate_frames = pd.DataFrame()

    for val in np.unique(values):
        amt = sum(values==val)

        for i in range(1, amt+1):
            dup_frame = pf.loc[pf.frame==val].copy()
            dup_frame.frame += i
            duplicate_frames = pd.concat([duplicate_frames, dup_frame])

    pf = pd.concat([pf, duplicate_frames], ignore_index=True).reset_index(drop=True).sort_values('frame')
    return pf

def remove_vals(pf, values: np.array):
    pf = pf.drop(pf.loc[pf.frame.isin(values)].index)
    return pf

def reset_frame_nums(pf):
    # Define the shape of the array
    shape_of_frame_vals = (len(pf.frame.unique()), sum(pf.frame==min(pf.frame)))

    # Create the array using broadcasting
    result_array = np.arange(shape_of_frame_vals[0])[:, np.newaxis]

    # Repeat the values along the second dimension
    result_array = np.tile(result_array, shape_of_frame_vals[1])

    # Concatenated array into list of new frame numbers
    new_frame_values = np.concatenate(result_array)

    pf.frame = new_frame_values

    return pf

def transform_data(pf, frame_amt_goal):
    pf = reset_frame_nums(pf)
    frame_nums = pf.frame.unique()
    frame_diff = abs(frame_amt_goal - len(frame_nums))
    operation = frame_amt_goal > len(frame_nums)

    values_to_operate = np.linspace(0, len(frame_nums) - 1, frame_diff, dtype=int)

    if operation:
        pf = duplicate_vals(pf, values_to_operate)
    else:
        pf = remove_vals(pf, values_to_operate)
        
    return pf

def populate_table(pf, video_data):
    frame_num = 0

    for frame in pf.frame.unique():
        x_vals = list(pf['x'].loc[pf.frame==frame])
        y_vals = list(pf['y'].loc[pf.frame==frame])
        z_vals = list(pf['z'].loc[pf.frame==frame])

        video_data[f'{frame_num}x'] = x_vals
        video_data[f'{frame_num}y'] = y_vals
        video_data[f'{frame_num}z'] = z_vals
        
        frame_num += 1

    return video_data

def create_data_table(pf):
    col_labels = ['type','landmark_index']

    for i in range(len(pf.frame.unique())):
        col_labels.append(f'{i}x')
        col_labels.append(f'{i}y')
        col_labels.append(f'{i}z')

    landmarks = []
    types = []

    for i in pf.type.unique():
        for j in pf.landmark_index.loc[pf.type==i].unique():
            landmarks.append(j)
            types.append(i)

    data = {col: [0.0] * len(types) for col in col_labels}
    data['type'] = types
    data['landmark_index'] = landmarks

    video_data = pd.DataFrame(columns=col_labels, data=data)
    video_data = populate_table(pf, video_data)

    return video_data

def apply_PCA(n_components, x_train, x_test):
    pca = PCA(n_components=n_components)
    pca.fit(x_train)
    return pca.transform(x_train), pca.transform(x_test)

def drop_empty_rows(pf):
    pf = pf.drop(pf.loc[(pf.x == 0) & (pf.y == 0) & (pf.z == 0)].index, axis=0)
    return pf

In [3]:
class Normalize(tf.Module):
  def __init__(self, x):
    # Initialize the mean and standard deviation for normalization
    self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0))
    self.std = tf.Variable(tf.math.reduce_std(x, axis=0)) + 0.001

  def norm(self, x):
    # Normalize the input
    return (x - self.mean)/self.std

  def unnorm(self, x):
    # Unnormalize the input
    return (x * self.std) + self.mean

In [4]:
def normalize_list(values, limit):
    min_val = min(values)
    max_val = max(values)
    
    # Handle the case where max and min are equal
    if max_val == min_val:
        normalized_values = [0 if x == min_val else 1 for x in values]
    else:
        normalized_values = [((x - min_val) / (max_val - min_val))*(limit-1) for x in values]
    
    return normalized_values

In [None]:
def create_data_grids(pf, grid_size=32):
    frames_data = []

    for frame in pf.frame.unique():
        frame_data = []

        for data_type in pf.type.unique():
            data = pf.loc[(pf.frame==frame)&(pf.type==data_type)]

            x = data['x'].values
            y = data['y'].values

            x[x < 0] = 0
            y[y < 0] = 0

            grid = np.zeros((grid_size,grid_size))

            x = np.round(normalize_list(x, grid_size)).astype(int)
            y = np.round(normalize_list(y, grid_size)).astype(int)

            grid[x, y] += data['z']

            frame_data.append(grid)

        frames_data.append(frame_data)

    frames_data = np.transpose(np.array(frames_data), (0, 2, 3, 1))
    return frames_data

In [5]:
import os
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, entropy
from os import listdir
from os.path import isfile, join
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import normalize

types = ['pose', 'left_hand', 'right_hand']
lblBin = LabelBinarizer().fit(types)

# Initialize a list of folder names containing parquet files
folders = ["alligator", "flower", "kiss", "listen", "orange"]

# Initialize lists for aggregated data and labels
aggregated_files = []
labels = []

# Iterate over the folders in the list
for folder in folders:
    # Update path to focus on content inside folder in the current iteration
    path = "./asl-kaggle/by_labels/"+folder+"/"

    # Fetch all file names in the folder
    parquets = [f for f in listdir(path) if isfile(join(path, f))]

    # Iterate over file names in the list (up to the 50th file name)
    for parquet in parquets[:50]:
        # Update path to focus on the file in the current iteration
        parquet_path = path + parquet
        
        # Read the file at the path and load data to pf
        pf = pd.read_parquet(parquet_path)
        
        # Replace all NaN values with 0
        pf = pf.fillna(0)

        pf = transform_data(pf, 60)

        # Remove all rows with the type of face
        pf = pf.drop(pf.loc[pf.type=="face"].index).reset_index(drop=True)
        pf = pf.drop(['row_id', 'landmark_index'], axis=1)

        video_data = create_data_grids(pf, 64)

        # Append array to list
        aggregated_files.append(video_data)
        labels.append(folder)


In [6]:
try:
    dataset = np.array(aggregated_files)
except:
    print("Padding Data")

    # Find the maximum length of arrays
    max_length = max(arr.shape[0] for arr in aggregated_files)

    # Pad each array along the first dimension (rows)
    padded_data = [np.pad(arr, ((0, max_length - arr.shape[0]), (0, 0), (0, 0), (0, 0)), mode='constant') for arr in aggregated_files]

    # Convert the list of padded arrays back to a numpy array
    dataset = np.array(padded_data)

dataset.shape

(250, 60, 64, 64, 3)

In [9]:
np.save("labels", labels)

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, LabelBinarizer, OneHotEncoder
from tensorflow.keras.utils import to_categorical
from random import randint

encoder = LabelBinarizer()
y = encoder.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(dataset, y, test_size=0.2, shuffle=True)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

In [21]:
from keras.models import Sequential
from keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, BatchNormalization, Input, AveragePooling3D, GlobalAveragePooling3D

# Define the model
model = Sequential()

model.add(Input(shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4])))

# model.add(BatchNormalization(input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4])))

# Convolutional layers
model.add(Conv3D(32, kernel_size=(6, 3, 3), activation='relu'))
model.add(Conv3D(64, kernel_size=(3, 2, 2), activation='relu'))
model.add(Conv3D(64, kernel_size=(3, 2, 2), strides=2, activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.5))

model.add(Conv3D(128, kernel_size=(6, 3, 3), activation='relu'))
model.add(Conv3D(128, kernel_size=(3, 2, 2), activation='relu'))
model.add(Conv3D(256, kernel_size=(3, 2, 2), strides=2, activation='relu'))
model.add(BatchNormalization())

model.add(GlobalAveragePooling3D())
model.add(Dropout(0.5))

# Flatten layer
# model.add(Flatten())

# Fully connected layers
model.add(Dense(128, activation='relu'))
# model.add(Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.01)))
model.add(Dense(5, activation='softmax'))  # Assuming binary classification, change this for multi-class

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()





Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 55, 62, 62, 32)    5216      
                                                                 
 conv3d_1 (Conv3D)           (None, 53, 61, 61, 64)    24640     
                                                                 
 conv3d_2 (Conv3D)           (None, 26, 30, 30, 64)    49216     
                                                                 
 batch_normalization (Batch  (None, 26, 30, 30, 64)    256       
 Normalization)                                                  
                                                                 
 max_pooling3d (MaxPooling3  (None, 13, 15, 15, 64)    0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 13, 15, 15, 64)  

In [22]:
from tensorflow.keras.callbacks import EarlyStopping

batch_size = 32
epochs = 40

# Set up early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, callbacks=[early_stopping])

Epoch 1/40



KeyboardInterrupt: 

In [128]:
loss, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

Test Loss: 2.1083831787109375, Test Accuracy: 0.41999998688697815
