# 1. Import Libraris

In [1]:
import cv2
import numpy as np
import os

from matplotlib import pyplot as plt
import time
!pip install mediapipe
import mediapipe as mp
import shutil
from tqdm import tqdm

Collecting mediapipe
  Downloading mediapipe-0.8.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.7 MB)
[K     |████████████████████████████████| 32.7 MB 143 kB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.9.1


# **Skip To section 4 to download the pre-processed data that's already done using sections 2, 3**

#2. Convert videos (dataset) into keppoints frames

In [None]:
#Download Dataset. SKIP if exist 
!gdown --id 1V6cozzvTo6vfN5hWaSiQF0gfkXcYYmIx
!unrar x dataset_v2.rar -o+

In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [None]:
os.mkdir('MP_Data')
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array([name for name in os.listdir("dataset")])
actions = np.sort(actions)

# Number of frames per any single video
sequence_length = 40

In [None]:
#create folders to hold dataset of np arrays; subfolder for each class/word
for action in actions: 
    os.mkdir('MP_Data/'+action)
    
    file_names = os.listdir("dataset/"+action)
    file_names = [int(x[:-4]) for x in file_names]
    num_of_vids = max(file_names)
    for i in range(0,num_of_vids+1):
        os.mkdir('MP_Data/'+action+"/"+str(i))

In [None]:
# Create detections of pose and hands in npy format

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose,lh, rh])

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # Loop through actions
    for action in tqdm(actions):
        
        file_names = os.listdir("dataset/"+action)
        file_names = [int(x[:-4]) for x in file_names]
        num_of_vids = max(file_names)
        
        # Loop through sequences aka videos
        for sequence in range(0, num_of_vids+1):
            
            vidcap = cv2.VideoCapture('dataset/{}/{}.mp4'.format(action,sequence))
            success,frame = vidcap.read()
            count = -1
            # Loop through each frame
            while success:  
                count += 1

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
                
                # Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(count))
                np.save(npy_path, keypoints)
                success,frame = vidcap.read()

100%|███████████████████████████████████████████████████████████████████████████████| 52/52 [3:43:22<00:00, 257.75s/it]


#3. Filter videos to select 20 frame out of 40
using action detection algorithm

In [None]:
#SETUP FOLDERS FOR FILTERED DATA
os.mkdir('MP_Data_FILTERED')
DATA_PATH = os.path.join('MP_Data_FILTERED') 
actions = np.array([name for name in os.listdir("dataset")])
actions = np.sort(actions)
sequence_length = 20
number_of_frames = 20

In [None]:
#create folders to hold dataset of np arrays
for action in actions: 
    os.mkdir('MP_Data_FILTERED/'+action)
    file_names = os.listdir("dataset/"+action)
    file_names = [int(x[:-4]) for x in file_names]
    num_of_vids = max(file_names)
    for i in range(0,num_of_vids+1):
        os.mkdir('MP_Data_FILTERED/'+action+"/"+str(i))

In [None]:
# DO THE PROCESS

# Loop through actions (words)
for action in tqdm(actions):
    file_names = os.listdir("dataset/"+action)
    file_names = [int(x[:-4]) for x in file_names]
    num_of_vids = max(file_names)
    # Loop through sequences (videos)
    for sequence in range(0, num_of_vids+1):
        vidcap = cv2.VideoCapture('dataset/{}/{}.mp4'.format(action,sequence))
        success,frameTemp = vidcap.read() #FRAME IS IMAGE
        success,frame = vidcap.read() 
        count = -1
        actionMagnitude = []
        # Loop through each frame in a single video, to find the action magnitude over each frame in a video
        while success:
            count += 1
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray = cv2.GaussianBlur(gray, (21, 21), 0)
            static_back = cv2.cvtColor(frameTemp, cv2.COLOR_BGR2GRAY)
            static_back = cv2.GaussianBlur(static_back, (21, 21), 0)
            diff_frame = cv2.absdiff(static_back, gray)
            actionMagnitude.append(diff_frame.sum())
            frameTemp = np.copy(frame)
            success,frame = vidcap.read() #FRAME IS IMAGE

        #find best n frames to extract************************************
        maxIndex = 0 #best index to start collecting max(n = 20) frame. ex: max=5 means best is to collect frames[5:25]
        maxMagnitude = 0 #for a given number_of_frames
        for i in range(0,len(actionMagnitude)-number_of_frames):
            if sum(actionMagnitude[i:i+number_of_frames]) > maxMagnitude:
                maxIndex = i
                maxMagnitude = sum(actionMagnitude[i:i+number_of_frames])

        #Copy selected frames' keypoints into another location************
        for i in range(maxIndex,maxIndex+number_of_frames):
            source = "MP_Data/{}/{}/{}.npy".format(action,sequence,i)
            destination = "MP_Data_FILTERED/{}/{}/{}.npy".format(action,sequence,i-maxIndex) #renamed to be all starting from 0
            shutil.copyfile(source, destination)

100%|██████████████████████████████████████████████████████████████████████████████████| 52/52 [28:21<00:00, 32.72s/it]


# 4. Download pre-processed keypoints data 

In [None]:
#download keypoints (MP_Data_FILTERED_v3.rar)

!gdown --id 10vZEV7lnshZEFzUp7ZTZuQ4ZiUmnbePY
!unrar x MP_Data_FILTERED_v3.rar -o+

# 5. Load data into memory

In [6]:
actions = np.array([name for name in os.listdir("MP_Data_FILTERED")])
actions = np.sort(actions)
sequence_length = 20
label_map = {label:num for num, label in enumerate(actions)}
DATA_PATH = os.path.join('MP_Data_FILTERED') 

In [7]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
print('Data loaded!\ndata size: ',np.array(sequences).shape)
print('labels size: ',np.array(labels).shape)


Data loaded!
data size:  (3426, 20, 258)
labels size:  (3426,)


In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

X = np.array(sequences)
y = to_categorical(labels).astype(int)

#DO FLIP HORIZONTAL ON ALL (DATA AUGMENTATION)
X = np.concatenate([X,X*-1])
y = np.concatenate([y,y])
#*********************************************

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.10, random_state=42, shuffle=True)

print('Test       : ',y_test.shape)
print('Validation : ',y_val.shape)
print('Train      : ',y_train.shape)

Test       :  (686, 52)
Validation :  (617, 52)
Train      :  (5549, 52)


# 6. Modeling

In [41]:
!rm -r Logs

In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()

model.add(Bidirectional(LSTM(128, return_sequences=True, activation='tanh', input_shape=(np.array(sequences).shape[1],np.array(sequences).shape[2]))))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(128, return_sequences=True, activation='tanh')))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(256, return_sequences=False, activation='tanh')))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(128, activation='tanh'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(actions.shape[0], activation='softmax'))

model.compile(optimizer='Adam',  loss='categorical_crossentropy', metrics=['categorical_accuracy'])

checkpoint = ModelCheckpoint('weights.hdf5',monitor='val_categorical_accuracy',save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_categorical_accuracy', patience=60)
earlystoppingmonitor =  EarlyStopping(monitor='val_categorical_accuracy',patience=130)

training = model.fit(X_train, y_train,  batch_size=512, epochs=600,validation_data=(X_val,y_val),
          callbacks=[checkpoint,earlystoppingmonitor,reduce_lr,tb_callback])

Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch 71/600
Epoch 72/600
Epoch 73/600
Epoch 74/600
Epoch 75/600
Epoch 76/600
Epoch 77/600
Epoch 78

In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 20, 256)          396288    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 20, 256)           0         
                                                                 
 batch_normalization (BatchN  (None, 20, 256)          1024      
 ormalization)                                                   
                                                                 
 bidirectional_1 (Bidirectio  (None, 20, 256)          394240    
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 20, 256)           0         
                                                      

# 7. Testing and Evaluating

In [43]:
from tensorflow.keras.models import load_model
from sklearn.metrics import  accuracy_score

#using the model checkpoint
best = load_model('weights.hdf5')
yhat = best.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print("Testing Accuracy (Checkpoint): ",accuracy_score(ytrue, yhat))

#using the full trained model (till last executed epoch)
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print("Testing Accuracy: ",accuracy_score(ytrue, yhat))

Testing Accuracy (Checkpoint):  0.9373177842565598
Testing Accuracy:  0.9358600583090378


In [44]:
#using the model checkpoint
best = load_model('weights.hdf5')
yhat = best.predict(X_val)
ytrue = np.argmax(y_val, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print("Val Accuracy (Checkpoint): ",accuracy_score(ytrue, yhat))

yhat = best.predict(X_train)
ytrue = np.argmax(y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print("Train Accuracy (Checkpoint): ",accuracy_score(ytrue, yhat))

Val Accuracy (Checkpoint):  0.9497568881685575
Train Accuracy (Checkpoint):  1.0


# 8. Save Model

In [16]:
model.save('final.h5')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/final.h5 /content/drive/MyDrive/SIGNXv3.3_94.8/final.h5
!cp /content/weights.hdf5 /content/drive/MyDrive/SIGNXv3.3_94.8/weights.hdf5