# Preliminary Notebook

## 1. Install and Import Dependencies

### Install Dependencies

In [27]:
%pip install tensorflow-macos opencv-python mediapipe-silicon sklearn matplotlib
#!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib # original code line from tutorial (he used a Windows system)

Note: you may need to restart the kernel to use updated packages.


### Import Dependencies

In [28]:
# general 
import numpy as np
import pandas as pd
import os # easier file path handling

# for device camera feed
import cv2 # opencv
from matplotlib import pyplot as plt # imshow for easy visualization
import time # to insert breaks / "sleep" in between frames
import mediapipe as mp # for accessing and reading from device camera

# for data pre-processing
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# for model evaluation
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

## 2. Preprocess Data and Create Labels and Features

### Some global setup (edit to your needs)

In [29]:
# path for input data
DATA_PATH = os.path.join('MP_Data_test')
#DATA_PATH = os.path.join('MP_Data')

# actions to detect (get later from our .json file)
actions = np.array(['hello', 'thanks', 'iloveyou'])

# 30 videos per sequence / word / sign (get it later from input file)
no_sequences = 30

# each video with 30 frames (get it later from input file)
sequence_length = 30

In [30]:
# create label map (dict, later our .json file)
label_map = {label:num for num, label in enumerate(actions)}

### Here, we could feed in our Kaggle data

### Loading Data

In [31]:
sequences, labels = [], [] # sequences will be x data, labels will be y data
# loop over all actions (words)
for action in actions: 
    # loop over all sequences (videos)
    for sequence in range(no_sequences): 
        window = [] # represents all frames of particular sequence (video)
        # loop through each frame
        for frame_num in range(sequence_length): 
            # load up current frame (frame_num)
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res) # append to one video
        sequences.append(window) # append all videos to sequence (for a word)
        labels.append(label_map[action])

In [32]:
X = np.array(sequences) # shape: (90, 30, 1662)
y = to_categorical(labels).astype(int) # one-hot encoded labels (words), shape: (90, 3)

### Splitting Train and Test Data

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

## Build and Train LSTM Neural Network

### Setup

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [35]:
# for tensorflow callbacks
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

### Model building

In [36]:
# setup sequential model
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

# compile the model
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# categorical_crossentropy must be used for multiclass classification model! 
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_7 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_8 (LSTM)               (None, 64)                49408     
                                                                 
 dense_6 (Dense)             (None, 64)                4160      
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

### Model fitting

In [37]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])
# advantage of using mediapipe holistic model is you don't need additional data generator to build up a pipeline of data. Training data fits all into memory.

Epoch 1/2000

### Some Tensorboard stuff I didn't figure out (not important)

In [None]:
# %load_ext tensorboard

In [None]:
# to run it in the notebook: 
# %tensorboard --logdir=./Logs/train --port=8008

# to run it in terminal: python3 -m tensorboard --logdir=./Logs/train --port=8008
# then copy+paste this into your internet browser: localhost:8008

## Prediction

In [None]:
y_pred = model.predict(X_test)



Looking at some single example predictions :D

In [None]:
actions[np.argmax(y_pred[0])] # prediction

'hello'

In [None]:
actions[np.argmax(y_test[0])] # actual

'hello'

## Evaluation using Confusion Matrix and Accuracy

In [None]:
# convert one-hot encoded categories back to labels, 
# e.g. 0, 1 and 2 instead of [1,0,0], [0,1,0], [0,0,1]
y_test_original = y_test.copy()
y_pred_original = y_pred.copy()
y_test = np.argmax(y_test, axis=1).tolist()
y_pred = np.argmax(y_pred, axis=1).tolist()

In [None]:
# multilabel confusion matrix 
multilabel_confusion_matrix(y_test, y_pred)

array([[[3, 1],
        [0, 1]],

       [[1, 3],
        [1, 0]],

       [[2, 0],
        [3, 0]]])

In [None]:
# accuracy score
accuracy_score(y_test, y_pred)

0.2

## Save Model Weights

In [None]:
# save model
model_name = 'first_model_whoop_whoop.h5'
model.save(model_name)