### Hand Detection Testing with MediaPipe

This notebook implements a script to test the capabilities of MediaPipe Hands, an ML model by Google that marks hand landmarks. There is additional code which uses the landmarks obtained by MediaPipe to create a custom bounding box around a detected hand, with pixel adjustments to create an appropriate buffer.

In [2]:
import cv2
import mediapipe as mp
from google.protobuf.json_format import MessageToDict
import numpy as np

from tensorflow.keras.models import load_model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import time

In [3]:
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1,
    min_detection_confidence=0.75,
    min_tracking_confidence=0.75,
    max_num_hands=2)

In [4]:
# https://www.geeksforgeeks.org/right-and-left-hand-detection-using-python/
# https://github.com/google/mediapipe/issues/1390#issuecomment-749333655

In [6]:
cap = cv2.VideoCapture(0)
letters = np.array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'])

while True:
    
    # Read video frame by frame
    success, img = cap.read()
  
    # Flip the image(frame)
    img = cv2.flip(img, 1)
  
    # Convert BGR image to RGB image
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  
    # Process the RGB image
    results = hands.process(imgRGB)
  
    # If hands are present in image(frame)
    if results.multi_hand_landmarks:
  
        for hand_landmark in results.multi_hand_landmarks:
            x = [landmark.x for landmark in hand_landmark.landmark]
            y = [landmark.y for landmark in hand_landmark.landmark]  
            
            img_height = img.shape[0]
            img_width = img.shape[1]
            xmin, xmax, ymin, ymax = int(min(x)*img_width), int(max(x)*img_width), int(min(y)*img_height), int(max(y)*img_height)
            xmin, ymin, xmax, ymax = xmin-23, ymin-23, xmax+23, ymax+23

            center = np.array([np.median(x)*img_width, np.median(y)*img_height]).astype('int32')
            cv2.circle(img, tuple(center), 10, (36, 238, 42), 1)  #for checking the center 
            cv2.rectangle(img=img, pt1=(xmin, ymin), pt2=(xmax, ymax), color=(36, 238, 42), thickness=2)

              
    # Display Video and when 'q' is entered, destroy the window
    cv2.imshow('Image', img)
    if cv2.waitKey(1) & 0xff == ord('q'):
        break
        
cv2.destroyAllWindows()

In [23]:
pred_array[0][3]

0.25912893