In [1]:
import cv2
import numpy as np
import mediapipe as mp
from collections import deque

class HandDetector:
    def __init__(self, max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.7):
        # Initialize HandDetector with parameters for hand tracking
        self.max_num_hands = max_num_hands
        self.min_detection_confidence = min_detection_confidence
        self.min_tracking_confidence = min_tracking_confidence
        
        # Initialize MediaPipe Hands module for hand detection and tracking
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(max_num_hands=self.max_num_hands, 
                                         min_detection_confidence=self.min_detection_confidence, 
                                         min_tracking_confidence=self.min_tracking_confidence)
        
        # Utility for drawing landmarks and connections on the image
        self.mp_drawing = mp.solutions.drawing_utils

    def findHands(self, image, draw=True):
        # Process the image to detect and track hands
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert the frame to RGB
        self.results = self.hands.process(image_rgb)  # Process the frame and find hands

        if self.results.multi_hand_landmarks:
            # Draw landmarks and connections if hands are detected
            for hand_landmarks in self.results.multi_hand_landmarks:
                if draw:
                    self.mp_drawing.draw_landmarks(image, hand_landmarks, self.mp_hands.HAND_CONNECTIONS,
                                                   self.mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=2, circle_radius=2),
                                                   self.mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=2, circle_radius=2))

        return image

    def findPosition(self, image, handNo=0, draw=True):
        # Extract positions of specific hand landmarks
        hand_positions = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                h, w, c = image.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                hand_positions.append([id, cx, cy, lm.z])
                if draw:
                    cv2.circle(image, (cx, cy), 5, (255, 255, 0), cv2.FILLED)
        return hand_positions

# Initialize color point buffers and indexes for drawing on the canvas
bpoints = [deque(maxlen=1024)]
gpoints = [deque(maxlen=1024)]
rpoints = [deque(maxlen=1024)]
ypoints = [deque(maxlen=1024)]

blue_index = 0
green_index = 0
red_index = 0
yellow_index = 0

# Define kernel for morphological operations
kernel = np.ones((5,5),np.uint8)

# Define colors for drawing
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255)]
colorIndex = 0

# Initialize the paint window with buttons and text
paintWindow = np.zeros((471,636,3)) + 255
paintWindow = cv2.rectangle(paintWindow, (40,1), (140,65), (0,0,0), 2)
paintWindow = cv2.rectangle(paintWindow, (160,1), (255,65), (255,0,0), 2)
paintWindow = cv2.rectangle(paintWindow, (275,1), (370,65), (0,255,0), 2)
paintWindow = cv2.rectangle(paintWindow, (390,1), (485,65), (0,0,255), 2)
paintWindow = cv2.rectangle(paintWindow, (505,1), (600,65), (0,255,255), 2)
cv2.putText(paintWindow, "CLEAR", (49, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
cv2.putText(paintWindow, "BLUE", (185, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
cv2.putText(paintWindow, "GREEN", (298, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
cv2.putText(paintWindow, "RED", (420, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
cv2.putText(paintWindow, "YELLOW", (520, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
cv2.namedWindow('Paint', cv2.WINDOW_AUTOSIZE)

def main():
    global bpoints, gpoints, rpoints, ypoints
    global blue_index, green_index, red_index, yellow_index
    global colorIndex

    # Initialize video capture
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

    if not cap.isOpened():
        print("Error: Could not open video capture")
        exit()

    detector = HandDetector()  # Initialize HandDetector object

    while True:
        # Read a frame from the video
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame from the camera.")
            break

        frame_flipped = cv2.flip(frame, 1)  # Flip the frame horizontally

        # Integrate the drawing area on the frame
        frame_flipped = cv2.rectangle(frame_flipped, (40,1), (140,65), (0,0,0), 2)
        frame_flipped = cv2.rectangle(frame_flipped, (160,1), (255,65), (255,0,0), 2)
        frame_flipped = cv2.rectangle(frame_flipped, (275,1), (370,65), (0,255,0), 2)
        frame_flipped = cv2.rectangle(frame_flipped, (390,1), (485,65), (0,0,255), 2)
        frame_flipped = cv2.rectangle(frame_flipped, (505,1), (600,65), (0,255,255), 2)
        cv2.putText(frame_flipped, "CLEAR", (49, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(frame_flipped, "BLUE", (185, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(frame_flipped, "GREEN", (298, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(frame_flipped, "RED", (420, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(frame_flipped, "YELLOW", (520, 33), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)

        # Find hands and draw landmarks on the frame
        frame_flipped = detector.findHands(frame_flipped)

        # Extract hand positions for drawing on the canvas
        hand_positions = detector.findPosition(frame_flipped)

        if hand_positions:
            fore_finger = (hand_positions[8][1], hand_positions[8][2])
            center = fore_finger
            thumb = (hand_positions[4][1], hand_positions[4][2])
            cv2.circle(frame_flipped, center, 3, (0,255,0), -1)

            # Logic for drawing based on hand gestures
            if (thumb[1]-center[1]<30):
                bpoints.append(deque(maxlen=512))
                blue_index += 1
                gpoints.append(deque(maxlen=512))
                green_index += 1
                rpoints.append(deque(maxlen=512))
                red_index += 1
                ypoints.append(deque(maxlen=512))
                yellow_index += 1
            elif center[1] <= 65:
                if 40 <= center[0] <= 140: # Clear Button
                    bpoints = [deque(maxlen=512)]
                    gpoints = [deque(maxlen=512)]
                    rpoints = [deque(maxlen=512)]
                    ypoints = [deque(maxlen=512)]

                    blue_index = 0
                    green_index = 0
                    red_index = 0
                    yellow_index = 0

                    paintWindow[67:,:,:] = 255
                elif 160 <= center[0] <= 255:
                        colorIndex = 0 # Blue
                elif 275 <= center[0] <= 370:
                        colorIndex = 1 # Green
                elif 390 <= center[0] <= 485:
                        colorIndex = 2 # Red
                elif 505 <= center[0] <= 600:
                        colorIndex = 3 # Yellow
            else:
                if colorIndex == 0:
                    bpoints[blue_index].appendleft(center)
                elif colorIndex == 1:
                    gpoints[green_index].appendleft(center)
                elif colorIndex == 2:
                    rpoints[red_index].appendleft(center)
                elif colorIndex == 3:
                    ypoints[yellow_index].appendleft(center)
            
        # Draw lines of all the colors on the canvas and frame
        points = [bpoints, gpoints, rpoints, ypoints]
        for i in range(len(points)):
            for j in range(len(points[i])):
                for k in range(1, len(points[i][j])):
                    if points[i][j][k - 1] is None or points[i][j][k] is None:
                        continue
                    cv2.line(frame_flipped, points[i][j][k - 1], points[i][j][k], colors[i], 2)
                    cv2.line(paintWindow, points[i][j][k - 1], points[i][j][k], colors[i], 2)
        
        # Display frames with tracking and painting windows
        cv2.imshow("Tracking", frame_flipped)
        cv2.imshow("Paint", paintWindow)

        # Exit the loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


I0000 00:00:1720204463.967914 3765129 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1720204463.995986 3765361 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1720204464.004804 3765361 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
