# ISNARTECH POSE ESTIMATION ASSIGNMENT, TRISHIT

## importing and installing dependencies

In [70]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib



In [71]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

## keypoints using mp holistics

In [72]:
mp_holistic = mp.solutions.holistic # Holistic model to make prediction

mp_drawing = mp.solutions.drawing_utils # Drawing utilities to draw them

In [73]:
def mediapipe_detection(image, model):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    # color conversion 
    
    image.flags.writeable = False                     # image not writable to save memory
    
    results = model.process(image)
    
    image.flags.writeable = True                      # make image writeable
    
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)    # color conversion 
    
    return image, results

In [74]:
def draw_landmarks(image, results):
    
    # Draw face connections
    
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    
    
    # Draw pose connections
    
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    
    
    # Draw left hand connections
    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    
    
    # Draw right hand connections  
    
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [76]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>


## extracting keypoint values

In [77]:
def extract_keypoints(results):
    
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    return np.concatenate([pose, face, lh, rh])

## setup folders for collection

In [78]:
DATA_PATH = os.path.join('MP_DATA_2')

actions = np.array(['sitting actively', 'feeling sleepy', 'smoking cigarette', 'eating food'])

no_sequences = 30   # data worth 30 videos

sequence_length = 30   # each video has 30 frames

In [79]:
for action in actions:
    
    for sequence in range(no_sequences):
    
        try:
    
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
    
        except:
    
            pass

## collecting keypoint values for training and testing

In [80]:
cap = cv2.VideoCapture(0)
  
 # setting mediapipe model   
    
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # loop through the action
    for action in actions:
       
        # loop through videos
       
        for sequence in range(no_sequences):
            # loop through the frames
       

            for frame_num in range(sequence_length):

             
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
#                

                # Draw landmarks
                draw_landmarks(image, results)
                
                # waiting logic
                
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
       
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                   
                    
                    cv2.imshow('OpenCV Feed', image)
       
                    cv2.waitKey(2000)
                
                else: 
       
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    
                    # Show to screen
       
                    cv2.imshow('OpenCV Feed', image)
                
                
                keypoints = extract_keypoints(results)
       
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
       
                np.save(npy_path, keypoints)

               
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

## preprocess data and create feature labels

In [81]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [82]:
label_map = {label:num for num, label in enumerate(actions)}

In [83]:
sequences, labels = [], []

for action in actions:

    for sequence in range(no_sequences):

        window = []

        for frame_num in range(sequence_length):

            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))

            window.append(res)

        sequences.append(window)

        labels.append(label_map[action])

In [84]:
X = np.array(sequences)

In [85]:
y = to_categorical(labels).astype(int)

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

## Build and Train LSTM Neural Network

In [87]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [88]:
log_dir = os.path.join('logs')
tb_callback = TensorBoard(log_dir = log_dir)

In [89]:
model = Sequential()

model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))

model.add(LSTM(128, return_sequences=True, activation='relu'))

model.add(LSTM(64, return_sequences=False, activation='relu'))

model.add(Dense(64, activation='relu'))

model.add(Dense(32, activation='relu'))

model.add(Dense(actions.shape[0], activation='softmax'))

In [90]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [91]:
model.fit(X_train, y_train, epochs = 170, callbacks = [tb_callback])

Epoch 1/170
Epoch 2/170
Epoch 3/170
Epoch 4/170
Epoch 5/170
Epoch 6/170
Epoch 7/170
Epoch 8/170
Epoch 9/170
Epoch 10/170
Epoch 11/170
Epoch 12/170
Epoch 13/170
Epoch 14/170
Epoch 15/170
Epoch 16/170
Epoch 17/170
Epoch 18/170
Epoch 19/170
Epoch 20/170
Epoch 21/170
Epoch 22/170
Epoch 23/170
Epoch 24/170
Epoch 25/170
Epoch 26/170
Epoch 27/170
Epoch 28/170
Epoch 29/170
Epoch 30/170
Epoch 31/170
Epoch 32/170
Epoch 33/170
Epoch 34/170
Epoch 35/170
Epoch 36/170
Epoch 37/170
Epoch 38/170
Epoch 39/170
Epoch 40/170
Epoch 41/170
Epoch 42/170
Epoch 43/170
Epoch 44/170
Epoch 45/170
Epoch 46/170
Epoch 47/170
Epoch 48/170
Epoch 49/170
Epoch 50/170
Epoch 51/170
Epoch 52/170
Epoch 53/170
Epoch 54/170
Epoch 55/170
Epoch 56/170
Epoch 57/170
Epoch 58/170
Epoch 59/170
Epoch 60/170
Epoch 61/170
Epoch 62/170
Epoch 63/170
Epoch 64/170
Epoch 65/170
Epoch 66/170
Epoch 67/170
Epoch 68/170
Epoch 69/170
Epoch 70/170
Epoch 71/170
Epoch 72/170
Epoch 73/170
Epoch 74/170


Epoch 75/170
Epoch 76/170
Epoch 77/170
Epoch 78/170
Epoch 79/170
Epoch 80/170
Epoch 81/170
Epoch 82/170
Epoch 83/170
Epoch 84/170
Epoch 85/170
Epoch 86/170
Epoch 87/170
Epoch 88/170
Epoch 89/170
Epoch 90/170
Epoch 91/170
Epoch 92/170
Epoch 93/170
Epoch 94/170
Epoch 95/170
Epoch 96/170
Epoch 97/170
Epoch 98/170
Epoch 99/170
Epoch 100/170
Epoch 101/170
Epoch 102/170
Epoch 103/170
Epoch 104/170
Epoch 105/170
Epoch 106/170
Epoch 107/170
Epoch 108/170
Epoch 109/170
Epoch 110/170
Epoch 111/170
Epoch 112/170
Epoch 113/170
Epoch 114/170
Epoch 115/170
Epoch 116/170
Epoch 117/170
Epoch 118/170
Epoch 119/170
Epoch 120/170
Epoch 121/170
Epoch 122/170
Epoch 123/170
Epoch 124/170
Epoch 125/170
Epoch 126/170
Epoch 127/170
Epoch 128/170
Epoch 129/170
Epoch 130/170
Epoch 131/170
Epoch 132/170
Epoch 133/170
Epoch 134/170
Epoch 135/170
Epoch 136/170
Epoch 137/170
Epoch 138/170
Epoch 139/170
Epoch 140/170
Epoch 141/170
Epoch 142/170
Epoch 143/170
Epoch 144/170
Epoch 145/170
Epoch 146/170
Epoch 147/170


Epoch 148/170
Epoch 149/170
Epoch 150/170
Epoch 151/170
Epoch 152/170
Epoch 153/170
Epoch 154/170
Epoch 155/170
Epoch 156/170
Epoch 157/170
Epoch 158/170
Epoch 159/170
Epoch 160/170
Epoch 161/170
Epoch 162/170
Epoch 163/170
Epoch 164/170
Epoch 165/170
Epoch 166/170
Epoch 167/170
Epoch 168/170
Epoch 169/170
Epoch 170/170


<tensorflow.python.keras.callbacks.History at 0x1f91cc0f550>

## real time

In [92]:
colors = [(245,117,16), (117,245,16), (16,117,245), (240,128,128)]

def prob_viz(res, actions, input_frame, colors):


    output_frame = input_frame.copy()

    for num, prob in enumerate(res):

        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)

        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

        
    return output_frame

In [93]:
sequence = []

sentence = []

predictions = []

threshold = 0.5


cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():

        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        
        draw_landmarks(image, results)
        
        keypoints = extract_keypoints(results)

        sequence.append(keypoints)

        sequence = sequence[-30:]
        
        if len(sequence) == 30:

            res = model.predict(np.expand_dims(sequence, axis = 0))[0]

            print(actions[np.argmax(res)])

            predictions.append(np.argmax(res))
            
            if np.unique(predictions[-10:])[0]==np.argmax(res): 

                if res[np.argmax(res)] > threshold: 

                    if len(sentence) > 0: 

                        if actions[np.argmax(res)] != sentence[-1]:

                            sentence.append(actions[np.argmax(res)])
                    else:

                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 

                sentence = sentence[-5:]
                
            image = prob_viz(res, actions, image, colors)
            

            cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)

            cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            
            cv2.imshow('openCV Feed', image)
        
            if cv2.waitKey(10) & 0xFF == ord('q'):
        
               break
        
    cap.release()
    cv2.destroyAllWindows()

eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
eating food
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
feeling sleepy
feeling sleepy
sitting actively
sitting actively
sitting actively
sitting activ

eating food
eating food
eating food
eating food
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
smoking cigarette
eating food
smoking cigarette
smoking cigarette
feeling sleepy
feeling sleepy
feeling sl

sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
eating food
eating food
eating food
eating food
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
eating food
eating food
eating food
sitting actively
sitting activel

sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting active

sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting actively
sitting active