Download the necessary python libraries to run the program

In [2]:
!pip install mediapipe opencv-python numpy 

Collecting mediapipe
  Using cached mediapipe-0.10.14-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy
  Using cached numpy-2.1.2-cp312-cp312-win_amd64.whl.metadata (59 kB)
Collecting absl-py (from mediapipe)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting attrs>=19.1.0 (from mediapipe)
  Using cached attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting jax (from mediapipe)
  Using cached jax-0.4.34-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Using cached jaxlib-0.4.34-cp312-cp312-win_amd64.whl.metadata (1.0 kB)
Collecting matplotlib (from mediapipe)
  Using cached matplotlib-3.9.2-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached openc

In [3]:
import mediapipe as mp
import cv2
import numpy as np

The following are all part of mediapipe from Google. It is a framework that include a Convulutional Nueral Network that was trained to recognize 20 spots on a hand to track hand movement. Luckily for us, most of the time we can just use someone else's work.

In [4]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [5]:
#Define our camera to use from our computer, we will use "0" for our front facing laptop camera
cap = cv2.VideoCapture(0)

#Define how to track our hands in the image
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
    
    while cap.isOpened():
        
        #Frame is the iamge we are working with. Ret is a boolean to check if an image was taken.
        ret, frame = cap.read()
        
        
        image = frame
        image.flags.writeable = False
        
        #pass our image through the Neural Network to let it mark our hands with points
        results = hands.process(image)
        image.flags.writeable = True
        image_height, image_width, _ = image.shape  
        
        #If there were points labeled on our hand
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                print(
                f'Index finger tip coordinates: (',
                f'{hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
                f'{hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
                )
                #Draw the points onto the image
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS)
        
        #What is displayed, note that we are not displaying frame as that is just a regular image without any markups
        cv2.imshow("hand tracking", image)
        
        #Press q to break out of the loop
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
#Make sure to return all resources to the computer
cap.release()
cv2.destroyAllWindows()



Index finger tip coordinates: ( 279.7473907470703, 182.77421951293945)
Index finger tip coordinates: ( 300.02227783203125, 167.18595027923584)
Index finger tip coordinates: ( 334.74422454833984, 138.34805488586426)
Index finger tip coordinates: ( 336.2134552001953, 138.84060859680176)
Index finger tip coordinates: ( 337.8125762939453, 137.17371940612793)
Index finger tip coordinates: ( 337.03086853027344, 134.6055793762207)
Index finger tip coordinates: ( 338.4282684326172, 134.6791648864746)
Index finger tip coordinates: ( 341.0719299316406, 140.1382541656494)
Index finger tip coordinates: ( 339.98680114746094, 141.4854383468628)
Index finger tip coordinates: ( 314.2742919921875, 156.67791366577148)
Index finger tip coordinates: ( 299.88367080688477, 157.68731117248535)
Index finger tip coordinates: ( 334.2617416381836, 160.4954481124878)
Index finger tip coordinates: ( 339.68170166015625, 172.22505569458008)
Index finger tip coordinates: ( 313.49565505981445, 157.67073154449463)
Inde

In [6]:
#Define our camera to use from our computer, we will use "0" for our front facing laptop camera
cap = cv2.VideoCapture(0)

generatedDot = False
#Define how to track our hands in the image
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
    
    while cap.isOpened():
        
        #Frame is the iamge we are working with. Ret is a boolean to check if an image was taken.
        ret, frame = cap.read()
        
        
        image = frame
        image.flags.writeable = False
        
        #pass our image through the Neural Network to let it mark our hands with points
        results = hands.process(image)
        image.flags.writeable = True
        image_height, image_width, _ = image.shape
        if not generatedDot:  
            height = np.random.randint(0, high=image_height)
            width = np.random.randint(0, high=image_width)
            generatedDot = True
        cv2.circle(image, tuple((width, height)), 5, (0, 255, 0), -1)
        generatedDot = True
        #If there were points labeled on our hand
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
            #     print(
            #     f'Index finger tip coordinates: (',
            #     f'{hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
            #     f'{hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
            #     )
            #     print(height, width)
                #Draw the points onto the image
                if abs(width - hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width) < 20 and abs(height - hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height) < 20:
                    generatedDot = False
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS)
        
        #What is displayed, note that we are not displaying frame as that is just a regular image without any markups
        cv2.imshow("hand tracking", image)
        
        #Press q to break out of the loop
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
#Make sure to return all resources to the computer
cap.release()
cv2.destroyAllWindows()