# Preliminary Notebook

## 1. Import and Install Dependencies

In [31]:
%pip install tensorflow-macos opencv-python mediapipe-silicon sklearn matplotlib
#!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib

Note: you may need to restart the kernel to use updated packages.


In [32]:
import cv2 # opencv
import numpy as np
import os # easier file path handling
from matplotlib import pyplot as plt # im.show for easy visualization
import time # to insert "sleep" in between frames
import mediapipe as mp # for accessing and reading from webcam

## 2. Keypoints using MP Holistic

In [33]:
mp_holistic = mp.solutions.holistic # holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

In [34]:
def mediapipe_detection(image, model): 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # color conversion BGR to RGB
    image.flags.writeable = False                   # image no longer writeable
    results = model.process(image)                  # make prediction
    image.flags.writeable = True                    # image is writeable again
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # color conversion back to original
    return image, results


In [35]:
def draw_landmarks(image, results): 
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # draw right hand connections

In [36]:
def draw_styled_landmarks(image, results): 
    # draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    # draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)) 
    # draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    # draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

In [37]:
# old function without saving landmark data
"""
cap = cv2.VideoCapture(0) # grabbing webcam
# set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: 
    while cap.isOpened(): # loop through all frames 

        # read feed
        ret, frame = cap.read()

        # make detections 
        image, results = mediapipe_detection(frame, holistic)
        #print(results)

        # draw landmarks
        #draw_landmarks(image, results)
        draw_styled_landmarks(image, results)

        # show to screen
        cv2.imshow("OpenCV Feed", image)

        # break gracefully 
        if cv2.waitKey(10) & 0xFF == ord('q'): 
            break 
    cap.release()
    cv2.destroyAllWindows() 
"""

'\ncap = cv2.VideoCapture(0) # grabbing webcam\n# set mediapipe model\nwith mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: \n    while cap.isOpened(): # loop through all frames \n\n        # read feed\n        ret, frame = cap.read()\n\n        # make detections \n        image, results = mediapipe_detection(frame, holistic)\n        #print(results)\n\n        # draw landmarks\n        #draw_landmarks(image, results)\n        draw_styled_landmarks(image, results)\n\n        # show to screen\n        cv2.imshow("OpenCV Feed", image)\n\n        # break gracefully \n        if cv2.waitKey(10) & 0xFF == ord(\'q\'): \n            break \n    cap.release()\n    cv2.destroyAllWindows() \n'

In [38]:
# plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # plots single frame with landmarks

## 3. Extract Keypoint Values

In [40]:
def extract_keypoints(results): 
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4) # x, y, z and extra value visibility
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])
    # a flattened list with list of all pose, face, lh, rh landmark x, y, z, (+visibility) coordinates

## 4. Setup Folders for Collection

In [41]:
# path for exported data (numpy arrays)
DATA_PATH = os.path.join('MP_Data')

# actions to detect
actions = np.array(['hello', 'thanks', 'iloveyou'])

# 30 videos of data
no_sequences = 30

# each video with 30 frames
sequence_length = 30

In [42]:
# folder structure: 

# hello
## 0
## 1
## 2
## ...
## 29

# thanks

# iloveyou

In [43]:
for action in actions: 
    for sequence in range(no_sequences): 
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except: 
            pass

## 5. Collect Keypoint Values for Training and Testing

In [44]:
cap = cv2.VideoCapture(0) # grabbing webcam
# set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: 

    # NEW loop
    # loop through actions (words) # = 3
    for action in actions: 
        # loop through sequences (videos) # = 30
        for sequence in range(no_sequences): 
            # loop through sequence length (number of frames per video aka "word" # = 30)
            for frame_num in range(sequence_length):

                # read feed
                ret, frame = cap.read()

                # make detections 
                image, results = mediapipe_detection(frame, holistic)
                #print(results)

                # draw landmarks
                #draw_landmarks(image, results)
                draw_styled_landmarks(image, results)

                # NEW: apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLETION', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # show to screen
                cv2.imshow("OpenCV Feed", image)

                # break gracefully 
                if cv2.waitKey(10) & 0xFF == ord('q'): 
                    break 
    cap.release()
    cv2.destroyAllWindows() 

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
