In [2]:
# %pip install numpy
# %pip install -q mediapipe
# %pip install opencv-camera
# %pip install selenium
# %pip install chromedriver-py
# %pip install webdriver-manager

In [24]:
%%html --isolated
<h2>TODO:</h2>
<ul>
    <li>Transition selenium to headless version</li>
    <li>Bug fixes</li>
    <li>Create importer class to allow user to determine gestures</li>
    <li>Clean up code, verify commentation</li>
</ul>

In [7]:
import numpy as np
import cv2
import mediapipe as mp 
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2

import sp_controller
import time

In [15]:
finger_tips = [4,8,12,16,20] 

In [10]:
def draw_landmarks_on_image(rgb_image, hand_landmarks_list: list):
    if hand_landmarks_list == []:
        print("No landmarks to draw")
        return rgb_image
    else:
        annotated_image = np.copy(rgb_image)

        # Loop through the detected hands to visualize.
        for idx in range(len(hand_landmarks_list)):
            hand_landmarks = hand_landmarks_list[idx]
        
            # Draw the hand landmarks.
            hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
            hand_landmarks_proto.landmark.extend(
                [landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks]
            )
            mp.solutions.drawing_utils.draw_landmarks(
                annotated_image,
                hand_landmarks_proto,
                mp.solutions.hands.HAND_CONNECTIONS,
                mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
                mp.solutions.drawing_styles.get_default_hand_connections_style()
            )
        return annotated_image

In [11]:
# wrapper class to make accessing the landmarks-model and its results easier
class landmarker_and_result():
    def createLandmarker(self):
        def update_result(result: vision.HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
            self.result = result
            
        options = mp.tasks.vision.HandLandmarkerOptions( 
            base_options = mp.tasks.BaseOptions(model_asset_path="./hand_landmarker.task"), # path to model
            running_mode = mp.tasks.vision.RunningMode.LIVE_STREAM, # running on a live stream
            num_hands = 1, # track one hand
            min_hand_detection_confidence = 0.5, # lower the value to get predictions more often
            min_hand_presence_confidence = 0.5, # lower the value to get predictions more often
            min_tracking_confidence = 0.3, # lower the value to get predictions more often
            result_callback=update_result)
        # initialize landmarker
        self.landmarker = self.landmarker.create_from_options(options)
    
    def detect_async(self, frame):
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data = frame)
        self.landmarker.detect_async(image = mp_image, timestamp_ms = int(time.time() * 1000))
    
    def close(self):
        self.landmarker.close()

    def __init__(self):
        self.result = mp.tasks.vision.HandLandmarkerResult
        self.landmarker = mp.tasks.vision.HandLandmarker
        self.createLandmarker()

In [13]:
def count_up_down(hand_landmarks_list: list):    
    if(hand_landmarks_list == []):
        print("No landmarks preset")
        return [-1,-1]
        
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend(
        [landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks_list[0]]
    )
    try:
        normal_landmarks_list = hand_landmarks_proto.landmark
        up = 0
        down = 0
        #skipping thumb since this method doesn't work well with it
        for pos in range(8,21,4):
            if(normal_landmarks_list[pos].y < normal_landmarks_list[pos-3].y):
                up = up + 1
            else:
                down = down + 1
        return [up,down]
    except Exception as e:
        print(e)
    return [-1,-1]

In [20]:
un = input("Enter your username, then password: ")
pw = input()
controller = sp_controller.SPController(un, pw)

#map these to actually useful functions when we implement spotify controller
gesture_to_action = {
    "Unknown"       : lambda : print("Unknown"),
    "None"          : lambda : print("None"),
    "Closed_Fist"   : controller.pause_play,
    "Open_Palm"     : lambda : print("Open_Palm"),
    "Pointing_Up"   : lambda : print("Pointing_Up"),
    "Thumb_Down"    : controller.rewind,
    "Thumb_Up"      : controller.skip,
    "Victory"       : lambda : print("Victory"),
    "ILoveYou"      : lambda : print("ILoveYou")
}

In [None]:
class recognizer_and_result():
    def create_recognizer(self):
        def update_result(result: vision.GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
            self.result = result
        
        options = vision.GestureRecognizerOptions( 
            base_options = mp.tasks.BaseOptions(model_asset_path="./gesture_recognizer.task"), # path to model
            running_mode = vision.RunningMode.LIVE_STREAM, # running on a live stream
            num_hands = 1, # track one hand
            min_hand_detection_confidence = 0.5, # lower the value to get predictions more often
            min_hand_presence_confidence = 0.5, # lower the value to get predictions more often
            min_tracking_confidence = 0.3, # lower the value to get predictions more often
            result_callback=update_result)
        # initialize landmarker
        self.gesture_recognizer = vision.GestureRecognizer.create_from_options(options)
    
    def detect_async(self, frame):
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data = frame)
        self.gesture_recognizer.recognize_async(image = mp_image, timestamp_ms = int(time.time() * 1000))

    def gesture_to_action(self):
        if(not self.result):
            return
        try:
            gesture = self.result.gestures
            # print("Gesture is of type: ", type(gesture), " with length: ", len(gesture), "\n", gesture)
            # print("Gesture[0] is of type: ", type(gesture[0]), " with fields ", dir(gesture[0]) )
            name = gesture[0][0].category_name
            
            #prevent repeat calls
            #BUG: still repeating. Look into how results are updated with async callbacks
            if(name == self.prev_gesture):
                return
            self.prev_gesture = name
            
            gesture_to_action[name]()
        except Exception as e:
            print("Error recognizing gesture: \n", e)
    
    def close(self):
        self.gesture_recognizer.close()

    def __init__(self):
        self.result = vision.GestureRecognizerResult
        self.gesture_recognizer = vision.GestureRecognizer
        self.create_recognizer()
        self.prev_gesture = "None"

In [22]:
prev_gesture = "None"
try:
    cv2.namedWindow("preview")
    vc = cv2.VideoCapture(0)

    if vc.isOpened():
        rval, frame = vc.read()
    else: 
        print("Could not open camera")
        rval = False

    recognizer = recognizer_and_result()
    recognizer.detect_async(frame)

    #BUG
    #hand_landmarks field is not always present
    #I believe that the asynchronous calling of the recognizer means that the 
    #hand_landmarks field is not present at the time when it would first be accessed by the system
    #this is in spite of the fact that the result field is present on the recognizer
    #I am quite sure there is a better solution, but this works for now
    landmarks_present = False
    while(not landmarks_present):
        try:
            recognizer.result.hand_landmarks
            landmarks_present = True
        except AttributeError as e:
            print("landmarks not present on recognizer result")
            time.sleep(.5)

    while rval:
        ret, frame = vc.read()
        if(not ret): continue
        
        frame = cv2.flip(frame, 1)
        # hand_landmarker.detect_async(frame)
        recognizer.detect_async(frame)
        # print("Attributes of recognizer results object: ", dir(recognizer))
           
        frame = draw_landmarks_on_image(frame, recognizer.result.hand_landmarks)
        # print(count_up_down(recognizer.result.hand_landmarks))
        
        #TODO: second attempt at stopping rapid repeat calls
        #it takes time for a hand to change gestures, so we can wait for that change in gesture without sleeping
        #init and gesture_to_action method are modified to accomplish this
        recognizer.gesture_to_action()
        
        cv2.imshow("preview", frame)

        #TODO: first attempt to limit repeat function calls by simply waiting
        #Not great, but better than skipping 20 times a second or similar
        # time.sleep(1)

        key = cv2.waitKey(1)
        if key == 27:
            break
except Exception as e:
    print(e)
    print("closing out...")
    
# hand_landmarker.close()
recognizer.close()
vc.release()
cv2.destroyWindow("preview")



landmarks not present on recognizer result
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of range
No landmarks to draw
Error recognizing gesture: 
 list index out of r