## Evaluating and testing model in real time

In [2]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import os
import keras
import tensorflow as tf
from tensorflow import keras
import cv2
import mediapipe as mp
import numpy as np 

#Face Mesh

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
mp_face_mesh = mp.solutions.face_mesh

model_path = 'Models/hand_signs_2'
data_path = 'Dataset_Processed'
tflite_path = 'Models/hand_signs_2.tflite'

In [29]:
#Utility functions

def get_Wrist(hand_landmark):
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.WRIST].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.WRIST].y)

    return landmark
    
def get_Thumb_CMC(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_CMC].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_CMC].y)
        
    return landmark

def get_Thumb_MCP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_MCP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_MCP].y)
        
    return landmark

def get_Thumb_IP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_IP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_IP].y)
        
    return landmark

def get_Thumb_TIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_TIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.THUMB_TIP].y)
        
    return landmark

def get_Index_MCP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y)
        
    return landmark

def get_Index_PIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y)
        
    return landmark

def get_Index_DIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_DIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_DIP].y)
        
    return landmark

def get_Index_TIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y)
        
    return landmark

def get_Middle_MCP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y)
        
    return landmark

def get_Middle_PIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y)
        
    return landmark

def get_Middle_DIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_DIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_DIP].y)
        
    return landmark

def get_Middle_TIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y)
        
    return landmark

def get_Ring_MCP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_MCP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_MCP].y)
        
    return landmark

def get_Ring_PIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_PIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_PIP].y)
        
    return landmark

def get_Ring_DIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_DIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_DIP].y)
        
    return landmark

def get_Ring_TIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_TIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.RING_FINGER_TIP].y)
        
    return landmark

def get_Pinky_MCP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_MCP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_MCP].y)
        
    return landmark

def get_Pinky_PIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_PIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_PIP].y)
        
    return landmark

def get_Pinky_DIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_DIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_DIP].y)
        
    return landmark

def get_Pinky_TIP(hand_landmark):
    
    landmark = []
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_TIP].x)
    landmark.append(hand_landmark.landmark[mp_hands.HandLandmark.PINKY_TIP].y)
        
    return landmark


def checkHands(results):
    handsType = []
    for hand_landmark in results.multi_handedness:
            handsType.append(hand_landmark.classification[0].label)
    return handsType

# Getting and arranging hands data by multi_handedness
def arrangeHands(all_arr):
    if(len(hands_res) == 1):
        arranged = replace_Hand(all_arr)
    else:
        arranged = org_Hand(all_arr)
        
    return arranged
        
def replace_Hand(all_arr):
    
    if((hands_res[0]) == "Left"):
        left_hand = all_arr
        right_hand = np.zeros(21*2)
    else:
        right_hand = all_arr
        left_hand = np.zeros(21*2)
        
    return np.concatenate([left_hand, right_hand])

def org_Hand(all_arr):
    
    if(hands_res[0] == "Left"):
        left_hand = all_arr[:42]
        right_hand = all_arr[42:]
    else:
        right_hand = all_arr[:42]
        left_hand = all_arr[42:]
        
    return np.concatenate([left_hand, right_hand])

#Collect all landmarks
def getHands(results):
    all_landmarks = []
    for hand_landmark in results.multi_hand_landmarks:
        land = get_Wrist(hand_landmark)
        all_landmarks.append(land)
        land = get_Thumb_CMC(hand_landmark)
        all_landmarks.append(land)
        land = get_Thumb_MCP(hand_landmark)
        all_landmarks.append(land)
        land = get_Thumb_IP(hand_landmark)
        all_landmarks.append(land)
        land = get_Thumb_TIP(hand_landmark)
        all_landmarks.append(land)
        
        land = get_Index_MCP(hand_landmark)
        all_landmarks.append(land)
        land = get_Index_PIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Index_DIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Index_TIP(hand_landmark)
        all_landmarks.append(land)
        
        land = get_Middle_MCP(hand_landmark)
        all_landmarks.append(land)
        land = get_Middle_PIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Middle_DIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Middle_TIP(hand_landmark)
        all_landmarks.append(land)
        
        land = get_Ring_MCP(hand_landmark)
        all_landmarks.append(land)
        land = get_Ring_PIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Ring_DIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Ring_TIP(hand_landmark)
        all_landmarks.append(land)
        
        land = get_Pinky_MCP(hand_landmark)
        all_landmarks.append(land)
        land = get_Pinky_PIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Pinky_DIP(hand_landmark)
        all_landmarks.append(land)
        land = get_Pinky_TIP(hand_landmark)
        all_landmarks.append(land)
        
    #Flattened landmarks
    all_arr = np.array(all_landmarks).flatten()
    return all_arr    

def drawLandmarks():

    for hand_landmarks in results.multi_hand_landmarks:

        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
        
def get_labels(file_name):
    with open(file_name) as r: 
        labels = r.read().splitlines()
    return np.array(labels)


def data_collection():
    if frame_num == 0:

        cv2.putText(image, 'STARTING COLLECTION', (200, 200),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
        cv2.putText(image, 'Collecting frames for {} Video number {}'.format(act, sequence), (15, 12),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

        #Display frames
        cv2.imshow("MediaPipe Hands", image)
        cv2.waitKey(2000)

    else:
        cv2.putText(image, 'Collecting frames for {}st video' .format(sequence), (15, 12),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 4, cv2.LINE_AA)

        #Display frames
        cv2.imshow("MediaPipe Hands", image)
        
        
def draw_Face():
    if results2.multi_face_landmarks:
        for face_landmarks in results2.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles
                .get_default_face_mesh_tesselation_style())
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_CONTOURS,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles
                .get_default_face_mesh_contours_style())
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_IRISES,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles
                .get_default_face_mesh_iris_connections_style())
            

def get_face_landmarks():
    face_landmarks = []
    for face_mesh in results2.multi_face_landmarks:
        for i in range(len(face_mesh.landmark)):
            face_landmarks.append(face_mesh.landmark[i].x)
            face_landmarks.append(face_mesh.landmark[i].y)
    return face_landmarks

In [6]:
#Collecting labels
actions = get_labels("labels.txt")

#Thirsty videos worth of data each with 30 frames in length
sequence_length = 40

In [7]:
#Load keras model
model = keras.models.load_model(model_path)

In [None]:
# Openning webcam
sequences = []
predicant = []
sentence = []
threshold = 0.4

cap = cv2.VideoCapture(0)


with mp_face_mesh.FaceMesh(max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
    with mp_hands.Hands(model_complexity=0, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            success, image = cap.read()

            if not success:
                print('Ignoring empty camera frame!')
                continue

            # To improve performance, optionally mark the image as not writeable to
            # pass by reference.

            image = cv2.flip(image, 1)
            image.flags.writeable = False
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image)

            #For facemesh
            results2 = face_mesh.process(image)

            # Draw the hand annotations on the image.
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


            if results.multi_hand_landmarks and results2.multi_face_landmarks:

                #Checking hand multihandedness
                hands_res = checkHands(results)

                #Collect all landmarks
                all_arr = getHands(results)

                #Arrange hands
                arranged = arrangeHands(all_arr)

                sequences.append(arranged)


                if(len(sequences) > 39):
                    x = np.expand_dims(sequences, axis = 0)
                    res = model.predict(np.expand_dims(sequences, axis = 0))
                    print(actions[np.argmax(res)])
                    sequences.clear()



                    #Vizualization predictions
                    if res.flatten()[np.argmax(res)] > threshold:
                        if(len(sentence)) > 0:
                            if actions[np.argmax(res)] != sentence[-1]:
                                sentence.append(actions[np.argmax(res)])
                        else:
                            sentence.append(actions[np.argmax(res)])

                if len(sentence) > 3:
                    sentence = sentence[-3:]

                cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
                cv2.putText(image, ' '.join(sentence), (30, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)


                #drawLandmarks()
                #draw_Face()

                # Show frame display.
                cv2.imshow('MediaPipe Hands', image)

            else:
                #draw_Face()
                cv2.imshow('MediaPipe Hands', image)



            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

    #Release resources
    cap.release()
    cv2.destroyAllWindows()

In [6]:
x.shape

(1, 40, 84)

In [None]:
#Attempting to convert to tflite

# Convert the model
#converter = tf.lite.TFLiteConverter.from_saved_model(model_path) # path to the SavedModel directory
#tflite_model = converter.convert()

# Save the model.
#with open(tflite_path, 'wb') as f:   
    #f.write(tflite_model)

In [7]:
#Testing the tflite model
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=tflite_path)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print('Input shape: ', input_details[0]['shape'])
print('Output shape: ', output_details[0]['shape'])

Input shape:  [ 1 40 84]
Output shape:  [ 1 23]


## Comparing Inference time 

In [13]:
from timeit import timeit
import time
from sklearn.metrics import confusion_matrix

In [9]:
# Test model on random input data.
input_shape = input_details[0]['shape']
#input_data = np.array(np.array(input_shape), dtype=np.float32)
input_data = x.astype(np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

start = time.time()
interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
end = time.time()
#print(output_data)
inf_time = end - start
print('Tflite inference time: ' +  str(inf_time) + ' seconds')


now = time.time()
preds = model.predict(x)
then = time.time()

h5_time = then - now
print('Keras model inference time: ' + str(h5_time) + ' seconds')

Tflite inference time: 0.009178638458251953 seconds
Keras model inference time: 0.5794308185577393 seconds


In [11]:
actions[np.argmax(output_data)]

'thankyou'

In [12]:
print(tf.__version__)

2.6.0


In [13]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_14 (LSTM)               (None, 40, 100)           74000     
_________________________________________________________________
dropout_23 (Dropout)         (None, 40, 100)           0         
_________________________________________________________________
lstm_15 (LSTM)               (None, 150)               150600    
_________________________________________________________________
dropout_24 (Dropout)         (None, 150)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 100)               15100     
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 100)              

# Evaluating Models

#### Evaluating tflite model

In [10]:
X_data = np.load(data_path + '/X_test.npy')
x_data = X_data.astype(np.float32)
y_truth = np.load(data_path + '/y_test.npy')
len(x_data)
x_data[0].shape

(40, 84)

In [11]:
y_pred = []
for item in x_data:
    interpreter.set_tensor(input_details[0]['index'], np.expand_dims(item, axis=0))
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    y_pred.append(np.argmax(output_data))
    
y_true = []
for true in y_truth:
    y_true.append(np.argmax(true))

In [12]:
print('Accuracy : ', accuracy_score(y_true, y_pred))
print("Recall score: ", recall_score(y_true, y_pred, average='micro'))
print("Precision score: ", precision_score(y_true, y_pred, average='micro'))
print("F1 score: ", f1_score(y_true, y_pred, average='micro'))

Accuracy :  0.8347826086956521
Recall score:  0.8347826086956521
Precision score:  0.8347826086956521
F1 score:  0.8347826086956521


In [14]:
confusion_matrix(y_true, y_pred)

array([[12,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  5,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 16,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  1,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  4,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  4,  2,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  3,  0,  0,  0],
       [ 0,  0,  0,  2,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,

### Evaluating keras model

In [10]:
yhat = model.predict(X_data)
yhat = np.argmax(yhat, axis = 1).tolist()
ytrue = np.argmax(y_truth, axis = 1).tolist()

print('Accuracy : ', accuracy_score(ytrue, yhat))
print("Recall score: ", recall_score(ytrue, yhat, average='micro'))
print("Precision score: ", precision_score(ytrue, yhat, average='micro'))
print("F1 score: ", f1_score(ytrue, yhat, average='micro'))

Accuracy :  0.8347826086956521
Recall score:  0.8347826086956521
Precision score:  0.8347826086956521
F1 score:  0.8347826086956521
