# Example Data Generation

## Reference

The code in this notebook is adapted and modified from the following Youtube tutorial: https://www.youtube.com/watch?v=doDUihpj6ro 

## Usage

With this notebook, new data can be generated using your webcam and the mediapipe holistic model. 

The output data is save in `DATA_PATH = os.path.join('MP_Data')`, but you can change this as you want. 

Press "Q" on your keyboard to interrupt recording. 

## 1. Install and Import Dependencies

### Install Dependencies

In [10]:
%pip install tensorflow-macos opencv-python mediapipe-silicon sklearn matplotlib
#!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib

Note: you may need to restart the kernel to use updated packages.


### Import Dependencies

In [11]:
import cv2 # opencv
import numpy as np
import os # easier file path handling
from matplotlib import pyplot as plt # im.show for easy visualization
import time # to insert "sleep" in between frames
import mediapipe as mp # for accessing and reading from webcam

## 2. Keypoints using MP Holistic

### Initialize Mediapipe Holistic Model

In [12]:
mp_holistic = mp.solutions.holistic # holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

### Define Functions (later they all go into a python module for multiple use)

In [13]:
# function to detect MP Holistic landmarks from an image, e.g. from a frame of your camera feed
def mediapipe_detection(image, model): 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # color conversion BGR to RGB
    image.flags.writeable = False                   # image no longer writeable
    results = model.process(image)                  # make prediction
    image.flags.writeable = True                    # image is writeable again
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # color conversion back to original
    return image, results


In [14]:
# function to draw landmarks points and connecting lines on top of an image, e.g. on top of your camera feed
def draw_styled_landmarks(image, results): 
    # draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    # draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)) 
    # draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    # draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

In [15]:
# function to extract coordinates (+visibility) of all landmarks --> keypoints
# and concatenates everything into a flattened list 
def extract_keypoints(results): 
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4) # x, y, z and extra value visibility
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])
    # a flattened list with list of all pose, face, lh, rh landmark x, y, z, (+visibility) coordinates

## 3. Setup Folders and Objects for Collection

### Define Path and Data Acquisition Parameters

In [16]:
DATA_PATH = os.path.join('MP_Data') # path for exported data (numpy arrays)
actions = np.array(['hello', 'thanks', 'iloveyou']) # actions to detect
no_sequences = 30 # 30 videos of data
sequence_length = 30 # each video with 30 frames

### Create all needed folders

In [17]:
# loop over all actions (signs / words)
for action in actions: 
    # loop over all sequences (videos)
    for sequence in range(no_sequences): 
        # create new folder, if possible
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except: 
            pass

## 4. Collect Keypoint Values

In [18]:
cap = cv2.VideoCapture(0) # grabbing webcam
# set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: 

    # NEW loop
    # loop through actions (words) # = 3
    for action in actions: 
        # loop through sequences (videos) # = 30
        for sequence in range(no_sequences): 
            # loop through sequence length (number of frames per video aka "word" # = 30)
            for frame_num in range(sequence_length):

                # read feed
                ret, frame = cap.read()

                # make detections 
                image, results = mediapipe_detection(frame, holistic)
                #print(results)

                # draw landmarks
                #draw_landmarks(image, results)
                draw_styled_landmarks(image, results)

                # NEW: apply wait logic
                if frame_num == 0: 
                    cv2.waitKey(2000)
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                # create keypoint data list as a flattened list of all pose, face, lh, rh landmark x, y, z, (+visibility) coordinates
                keypoints = extract_keypoints(results)

                # save keypoint data list
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # show to screen
                cv2.imshow("OpenCV Feed", image)

                # break gracefully 
                if cv2.waitKey(10) & 0xFF == ord('q'): 
                    # release camera and close feed window     
                    cap.release()
                    cv2.waitKey(1) # some workaround to fix the bug, that window doesn't close
                    cv2.destroyAllWindows() 
                    cv2.waitKey(1) # some workaround to fix the bug, that window doesn't close
                    break

    


error: OpenCV(4.7.0) /Users/xperience/GHA-OCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
