In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from xlrd import open_workbook
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import csv
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score # Accuracy metrics 

# Functions for Mediapipe and Extracting Keypoints

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
    
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Extract Keypoint Values

In [59]:
wb = open_workbook(r'F:\Signlanguage\Jupyter\DichNNKH\VideoFileTranslatorList_3_checked_test_2words.xls')
sheet = wb.sheet_by_index(0)
sheet.cell_value(0, 0)
column_index = 0
column = sheet.cell_value(0, column_index)

#n = 0

#Test_Arr = np.array(['A', 'B', 'C', 'D'])

# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

for row in range(3, sheet.nrows):
    
    Vid_Link = sheet.cell_value(row, column_index + 3)
    Letter = sheet.cell_value(row, column_index + 5)
    
    File_No = str(sheet.cell_value(row, column_index))
    #Test = str(Test)
    
#     disallowed_characters = ".?><"
#     for character in disallowed_characters:
#         Letter = Letter.replace(character, "")

    # One video worth of data
    no_sequences = 30

    for sequence in range(no_sequences): #Loop through Videos
            
        npy_path = os.path.join(DATA_PATH, Letter, str(sequence))
        os.makedirs(npy_path)

        cap = cv2.VideoCapture(Vid_Link)

        # Set mediapipe model 
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:


            Frame_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            print(Frame_length)
            print(Letter)

            # Loop through video length aka sequence length
            for frame_num in range(30):

                # Read feed
                ret, frame = cap.read()
                

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                

                #npy_path = os.path.join(DATA_PATH, File_No, str(sequence), str(frame_num))
                npy_path2 = os.path.join(npy_path, str(frame_num))
                np.save(npy_path2, keypoints)
        
                
        cap.release()
        cv2.destroyAllWindows()
#         Test = int(Test)
#         Test += 1
        print("Done")

130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
130
địa chỉ
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
147
Tỉnh
Done
125
Tiếp Tân
Done
125
Tiếp Tân
Done
125
Tiếp Tân
Done
125
Tiếp Tân
Don

# Preprocess Data and Create Labels and Features

In [60]:
Let_List = []
wb = open_workbook(r'F:\Signlanguage\Jupyter\DichNNKH\VideoFileTranslatorList_3_checked_test_2words.xls')
sheet = wb.sheet_by_index(0)
sheet.cell_value(0, 0)
column_index = 0
column = sheet.cell_value(0, column_index)
no_sequences = 30

for row in range(3, sheet.nrows):
    Let_Link = sheet.cell_value(row, column_index+5)
    Let_List.append(Let_Link)
Let_Arr = np.array(Let_List)

label_map = {label: num for num, label in enumerate(Let_Arr)}

In [61]:
Let_Arr

array(['địa chỉ', 'Tỉnh', 'Tiếp Tân', 'nhân viên'], dtype='<U9')

In [62]:
#This set of array is required to keep the Numpy array in shape

Null_List = []
Null_Arr = np.array(Null_List)

for n in range(1662): 
    Null_Arr = np.append(Null_Arr, [0])
    n+=1

In [63]:
sequences, labels = [], []

DATA_PATH = os.path.join('MP_Data')

sheet = wb.sheet_by_index(0)
sheet.cell_value(0, 0)
column_index = 0
column = sheet.cell_value(0, column_index)

for row in range(3, sheet.nrows):
    Vid_Link = sheet.cell_value(row, column_index+3)
    Letter = sheet.cell_value(row, column_index+5)
    File_No = str(sheet.cell_value(row, column_index))


    for sequence in range(no_sequences):
        cap = cv2.VideoCapture(Vid_Link)
        window = []
        Frame_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        for frame_num in range(30):
            try:
                res = np.load(os.path.join(DATA_PATH, Letter, str(sequence), "{}.npy".format(frame_num)))
                
                #print(res)
                #print(len(res))
                window.append(res)
                
            except:
                window.append(res)
                
        sequences.append(window)
        labels.append(label_map[Letter])
        #print(sequences,'1')
        #print(labels, '2')
    cap.release()
    cv2.destroyAllWindows()

    
        


In [64]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [65]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3]

In [66]:
X_train.shape

(114, 30, 1662)

In [67]:
np.array(sequences).shape

(120, 30, 1662)

In [68]:
#res = np.array(res)

# Build and Train LSTM Neural Network

In [69]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf
from tensorflow import keras

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(Let_Arr.shape[0], activation='softmax'))

#res = [.7, 0.2, 0.1]

opt = tf.keras.optimizers.SGD(learning_rate = 0.25)#learning_rate = 0.01

model.compile(optimizer= 'Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])



In [70]:
Let_Arr

array(['địa chỉ', 'Tỉnh', 'Tiếp Tân', 'nhân viên'], dtype='<U9')

In [71]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000

KeyboardInterrupt: 

In [24]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_7 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_8 (LSTM)               (None, 64)                49408     
                                                                 
 dense_6 (Dense)             (None, 64)                4160      
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 4)                 132       
                                                                 
Total params: 596,708
Trainable params: 596,708
Non-tr

In [25]:
res = model.predict(X_test)



In [26]:
np.sum(res)

1.0

In [29]:
Let_Arr[np.argmax(res[1])]

IndexError: index 1 is out of bounds for axis 0 with size 1

In [30]:
Let_Arr[np.argmax(y_test[1])]

IndexError: index 1 is out of bounds for axis 0 with size 1

In [22]:
model.save('action.h5') # Save weights

In [23]:
model.load_weights('action.h5')

In [24]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()




In [25]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[0., 1.],
        [0., 0.]],

       [[0., 0.],
        [1., 0.]]])

In [26]:
accuracy_score(ytrue, yhat)

0.0