# Libraries

In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras_preprocessing.sequence import pad_sequences

import numpy as np
import os

# Preprocessing

In [2]:
# base path
directory_path = './labels_new'
# current directory
c_dir = os.getcwd()

# all actions
# actions = np.array(sorted([folder for folder in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, folder))])) # sorted to follow folder arrangement

# specific actions
# actions = np.array(['alligator', 'flower', 'kiss', 'listen', 'orange'])
# actions = np.array(['afternoon', 'house', 'again', 'open', 'kiss', 'sorry'])
actions = np.array(sorted([folder for folder in os.listdir('./labels_new') if os.path.isdir(os.path.join(directory_path, folder))]))

In [3]:
# create a dictionary for int representation of actions
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'afternoon': 0,
 'again': 1,
 'base': 2,
 'door': 3,
 'hello': 4,
 'house': 5,
 'how': 6,
 'open': 7,
 'see': 8,
 'sorry': 9,
 'why': 10}

Note that at this point, we will not access the video folder, only the numpy folder.

In [4]:
sequences, labels = [], []  # sequence -> video, labels -> action
for action in actions:
    no_actions = len(os.listdir(os.path.join(directory_path, action)))
    print('Opening path:', os.path.join(directory_path, action))
    print(f'Number of instances: {no_actions}')
    for num in range(1, no_actions + 1):
        window = []         # window -> single frame
        file = str(action) + "_" + str(num)
        no_frames_per_action = len(os.listdir(os.path.join(directory_path, action, file)))
        print(f'Number of frames in {file}: {no_frames_per_action}')
        for frame_num in range(1, no_frames_per_action + 1):
            res = np.load(os.path.join(directory_path, action, file,  "{}.npy".format(frame_num)))     # res -> coordinate key points
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
    print('-'*75)

Opening path: ./labels_new\afternoon
Number of instances: 400
Number of frames in afternoon_1: 30
Number of frames in afternoon_2: 30
Number of frames in afternoon_3: 32
Number of frames in afternoon_4: 29
Number of frames in afternoon_5: 30
Number of frames in afternoon_6: 29
Number of frames in afternoon_7: 27
Number of frames in afternoon_8: 30
Number of frames in afternoon_9: 30
Number of frames in afternoon_10: 30
Number of frames in afternoon_11: 32
Number of frames in afternoon_12: 30
Number of frames in afternoon_13: 30
Number of frames in afternoon_14: 28
Number of frames in afternoon_15: 30
Number of frames in afternoon_16: 31
Number of frames in afternoon_17: 29
Number of frames in afternoon_18: 31
Number of frames in afternoon_19: 28
Number of frames in afternoon_20: 30
Number of frames in afternoon_21: 28
Number of frames in afternoon_22: 27
Number of frames in afternoon_23: 30
Number of frames in afternoon_24: 29
Number of frames in afternoon_25: 31
Number of frames in af

In [5]:
# due to difference in number of frames, pad x and y
x = np.array(pad_sequences(sequences, dtype = 'float', padding = 'post', value = 0))
y = to_categorical(labels).astype(int)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, stratify = y)

In [6]:
input_shape = (x_train.shape[1], x_train.shape[2])
print(x_train.shape)
print(x_test.shape)

(1920, 127, 225)
(480, 127, 225)


In [8]:
y_test

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

In [7]:
(2/3)*(x_train.shape[2]+y_train.shape[1])

157.33333333333331

# Models

## LSTM

In [14]:
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import TensorBoard, TerminateOnNaN, EarlyStopping

from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [15]:
# for logging of data with TensorBoard
log_dir = os.path.join(c_dir, 'Logs')
tb_callback = TensorBoard(log_dir = log_dir)

# to end training when failure happens ie. loss == nan
term = TerminateOnNaN()

# to stop training early if there is no change in loss
early = EarlyStopping(monitor = 'loss', patience = 5)

In [16]:
def choose_lstm(n):
    if n == 1:
        model = Sequential()
        model.add(LSTM(64, return_sequences = True, input_shape = input_shape))
        model.add(Dropout(0.2))
        model.add(Bidirectional(LSTM(64)))
        model.add(Dropout(0.2))
        model.add(Dense(actions.shape[0], activation = "softmax"))

        return model

    elif n == 2:
        model = Sequential()
        model.add(LSTM(64, return_sequences = True, input_shape = (117, 225)))
        model.add(LSTM(128, return_sequences = True))
        model.add(LSTM(64, return_sequences = False))
        model.add(Dense(64))
        model.add(Dense(32))
        model.add(Dense(8))
        model.add(Dense(actions.shape[0], activation = "softmax"))

        return model

    elif n == 3:
        model = Sequential()
        model.add(LSTM(128, return_sequences = True, input_shape = (117, 225)))
        model.add(Dropout(0.1))
        model.add(LSTM(64, return_sequences = False))
        model.add(Dropout(0.1))
        model.add(Dense(actions.shape[0], activation = "softmax"))

        return model
    
    elif n == 4:
        model = Sequential()
        model.add(LSTM(128, return_sequences=True, activation='relu', input_shape = input_shape))
        model.add(Dropout(0.2))
        model.add(LSTM(256, return_sequences=True, activation='relu'))
        model.add(Dropout(0.2))
        model.add(LSTM(256, return_sequences=False, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(actions.shape[0], activation='softmax'))

        return model

In [17]:
model = choose_lstm(1)
opt = keras.optimizers.Adam(learning_rate = 0.0001)
model.compile(optimizer = opt, loss = "categorical_crossentropy", metrics = ['categorical_accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 127, 64)           74240     
                                                                 
 dropout_2 (Dropout)         (None, 127, 64)           0         
                                                                 
 bidirectional_1 (Bidirecti  (None, 128)               66048     
 onal)                                                           
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 11)                1419      
                                                                 
Total params: 141707 (553.54 KB)
Trainable params: 141707 (553.54 KB)
Non-trainable params: 0 (0.00 Byte)
______________

In [18]:
model.fit(x_train, y_train, epochs = 100, batch_size = 16, validation_split = 0.2, callbacks = [term, tb_callback, early])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<keras.src.callbacks.History at 0x20d508e7ac0>

In [19]:
# take model predictions
res = model.predict(x_test)



In [20]:
res

array([[2.03765361e-04, 5.96644750e-05, 7.64413926e-05, ...,
        9.95349705e-01, 2.19873618e-03, 8.39478394e-04],
       [1.98083790e-03, 9.32219973e-06, 7.57719274e-04, ...,
        1.52482907e-03, 1.09180277e-04, 3.96027084e-04],
       [5.88742201e-04, 4.85723023e-04, 7.70998280e-03, ...,
        3.78132910e-02, 3.53086065e-03, 8.55774343e-01],
       ...,
       [3.55827413e-03, 1.15550876e-01, 8.44553858e-03, ...,
        7.95990782e-05, 2.51974212e-03, 3.72037000e-04],
       [9.68479156e-01, 2.30378078e-04, 1.19488826e-03, ...,
        1.21845715e-02, 5.46713406e-03, 2.63874978e-03],
       [2.42703594e-03, 1.34999119e-03, 9.77187455e-01, ...,
        9.05288209e-04, 3.50455899e-04, 1.15229180e-02]], dtype=float32)

In [21]:
y_true = np.argmax(y_test, axis = 1).tolist()
y_pred = np.argmax(res, axis = 1).tolist()

In [22]:
print(y_true)
print(y_pred)

[8, 6, 10, 7, 1, 4, 6, 4, 6, 6, 10, 8, 4, 0, 6, 5, 8, 9, 4, 3, 0, 5, 4, 2, 0, 9, 9, 8, 4, 4, 4, 0, 10, 8, 6, 10, 6, 0, 2, 0, 7, 4, 0, 0, 0, 6, 4, 0, 2, 6, 0, 8, 6, 4, 8, 6, 6, 6, 0, 10, 8, 8, 6, 6, 4, 6, 6, 6, 2, 8, 9, 6, 8, 10, 3, 6, 0, 8, 4, 8, 4, 10, 3, 4, 0, 4, 6, 0, 9, 8, 10, 0, 6, 4, 2, 6, 4, 8, 10, 8, 0, 1, 10, 2, 4, 8, 0, 6, 8, 8, 0, 1, 10, 1, 2, 6, 6, 0, 0, 0, 0, 6, 8, 6, 10, 9, 10, 4, 0, 4, 10, 8, 4, 8, 8, 4, 9, 8, 2, 9, 6, 8, 4, 4, 8, 0, 0, 0, 2, 8, 8, 8, 6, 8, 4, 8, 2, 6, 9, 9, 6, 0, 4, 2, 3, 1, 0, 0, 0, 8, 8, 2, 8, 9, 6, 6, 6, 3, 2, 4, 9, 4, 2, 0, 9, 4, 10, 0, 8, 6, 6, 4, 4, 6, 8, 2, 4, 6, 9, 4, 4, 4, 4, 0, 10, 5, 6, 0, 6, 6, 0, 9, 8, 0, 2, 6, 9, 2, 6, 4, 2, 0, 4, 0, 4, 2, 9, 9, 10, 0, 8, 2, 6, 0, 3, 6, 9, 6, 0, 1, 8, 0, 8, 4, 7, 9, 0, 4, 2, 0, 0, 2, 7, 6, 0, 3, 10, 8, 8, 5, 0, 4, 8, 9, 10, 4, 6, 6, 10, 9, 4, 2, 0, 8, 6, 9, 8, 5, 0, 8, 5, 8, 0, 9, 0, 0, 10, 2, 8, 1, 6, 6, 0, 10, 4, 0, 0, 3, 4, 4, 0, 9, 8, 0, 5, 4, 3, 6, 0, 10, 6, 6, 1, 0, 8, 6, 4, 9, 10, 10, 6, 8, 0, 4, 10

In [23]:
print(f'Accuracy: {round(accuracy_score(y_true, y_pred)*100, 1)}%')

Accuracy: 97.9%


In [25]:
if accuracy_score(y_true, y_pred) >= 0.75:
    model.save('test_model_aug.h5', save_format = 'h5')
else:
    print('Model accuracy insufficient')

# keras.models.load_model("test_model.keras")

  saving_api.save_model(


In [80]:
# import os
# import shutil

# folder_path = './averaged_np_labels/'
# parent_files = os.listdir(os.path.join(folder_path))
# write_path = './labels/'

# for parent_file in parent_files:
#     parent_path = os.path.join(folder_path, parent_file)
#     export_path = os.path.join(write_path, parent_file)
#     os.mkdir(export_path)
#     print(f'Created new directory: {export_path}')
#     for i in range(1, len(os.listdir(os.path.join(folder_path, parent_file)))+1):
#         new_subfolder = f'{parent_file}_{i}'
#         os.mkdir(os.path.join(export_path, new_subfolder))
#         print(f'Created new subdirectory: {new_subfolder}')

#         source = os.path.join(parent_path, f'video{i}')
#         destination = os.path.join(os.path.join(export_path, new_subfolder))

#         sourcefolder = os.listdir(os.path.join(parent_path, f'video{i}'))
#         for file in sourcefolder:
#             file_to_copy = os.path.join(os.path.join(parent_path, f'video{i}'), file)
#             shutil.copy(file_to_copy, destination)

In [1]:
import keras
lstm_model = keras.models.load_model('test_model_70.keras')

2024-03-17 01:26:55.427942: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-17 01:26:55.791638: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-17 01:26:55.791696: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-17 01:26:55.857846: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-17 01:26:55.996780: I tensorflow/core/platform/cpu_feature_guar

In [2]:
lstm_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 267, 32)           33024     
                                                                 
 dropout (Dropout)           (None, 267, 32)           0         
                                                                 
 bidirectional (Bidirection  (None, 128)               49664     
 al)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 16)                2064      
                                                                 
Total params: 84752 (331.06 KB)
Trainable params: 84752 (331.06 KB)
Non-trainable params: 0 (0.00 Byte)
__________________

In [3]:
lstm_model.save('lstm_model.h5', save_format = 'h5')

  saving_api.save_model(


In [None]:
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while True:

        # Capture frame-by-frame
        ret, frame = cap.read()

        # extract keypoints
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence = np.array(keypoints)
        
        # keypoints processing
        prediction = lstm_model.predict(sequence.reshape(1, -1))  # Reshape the sequence for prediction
        pred_class = np.argmax(prediction)

        # overlay image
        sign_text = f"Sign: {pred_class}"  
        cv2.putText(image, sign_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Display the resulting frame
        cv2.imshow('Frame', image)
        
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the capture
    cap.release()
    cv2.destroyAllWindows()