In [None]:
pip install opencv-Python

In [None]:
pip install mediapipe

In [None]:
pip install pyautogui

In [4]:
import cv2
import mediapipe as mp
import pyautogui
import time

In [5]:
# Disable PyAutoGUI fail-safe (optional, use with caution)
pyautogui.FAILSAFE = False

In [6]:
# Initialize MediaPipe Hands
mp_Hands = mp.solutions.hands
hand = mp_Hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_Drawing = mp.solutions.drawing_utils

In [7]:
# Get screen size for mapping hand coordinates to screen coordinates
screen_width, screen_height = pyautogui.size()

In [8]:
# Smoothing variables for cursor movement
smoothing_factor = 0.1  # Lower value slows down cursor movement while keeping it smooth
prev_x, prev_y = 0, 0   # Previous cursor position for smoothing

In [9]:
# Click detection variables
click_threshold = 0.07 # Distance threshold to detect finger bending
click_duration_threshold = 0.7 # Seconds, if bend duration < this, it’s a click; else drag
double_click_time = 1.0 # Seconds, time window to count multiple clicks
action_delay = 0.35 # Seconds to wait after a bend ends before deciding action
click_buffer = [] # List to store (start_time, end_time) of recent bends
last_action_time = 0 # Time of last executed action
is_dragging = False # Tracks drag state
was_bent = False # Tracks index finger state
bend_start_time = None # Tracks when the current bend started
was_middle_bent = False # Tracks middle finger state for right click

In [10]:
# Zoom variables
prev_thumb_index_distance = None
zoom_threshold = 0.02
last_zoom_time = 0
zoom_debounce = 0.2
zoom_active = False
last_tap_time = 0
tap_threshold = 0.08
tap_debounce = 0.15

In [11]:
# Scroll variables
scroll_active = False
last_scroll_tap_time = 0
last_scroll_time = 0
scroll_delay = 0.05
scroll_amount = 100
upper_threshold = -0.05
lower_threshold = 0.05

In [None]:
# Hand movement range for full screen coverage
hand_range_x_min = 0.25
hand_range_x_max = 0.75
hand_range_y_min = 0.25
hand_range_y_max = 0.75

In [19]:
# Hand detection delay
hand_detected_time = None
gesture_delay = 2.0  # 2-second delay before gestures activate
hand_present = False

In [20]:
# Start webcam
web_Cam = cv2.VideoCapture(0)

In [21]:
while web_Cam.isOpened():
    succ, frame = web_Cam.read()
    if not succ:
        print("Failed to read from webcam.")
        break
    
    # Flip frame horizontally for a natural (mirror-like) experience
    frame = cv2.flip(frame, 1)
    # Convert frame to RGB as MediaPipe expects RGB images
    rgb_Frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame to detect hand landmarks
    result = hand.process(rgb_Frame)

    current_time = time.time()

    # Check if hand is present
    if result.multi_hand_landmarks:
        if not hand_present:
            # Hand just appeared
            hand_detected_time = current_time
            hand_present = True
            prev_thumb_index_distance = None  # Reset zoom baseline
            print("Hand detected, waiting 2s to start gestures...")
    else:
        if hand_present:
            # Hand just disappeared
            hand_present = False
            hand_detected_time = None
            prev_thumb_index_distance = None
            zoom_active = False
            scroll_active = False
            is_dragging = False
            click_buffer = []
            print("Hand lost, resetting...")

    # Wait for gesture delay after hand detection
    if hand_detected_time and current_time - hand_detected_time < gesture_delay:
        cv2.putText(frame, "Gestures start in {:.1f}s".format(gesture_delay - (current_time - hand_detected_time)),
                    (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    elif result.multi_hand_landmarks:
        hand_landmarks = result.multi_hand_landmarks[0]

        # Extract key landmarks
        middle_mcp = hand_landmarks.landmark[mp_Hands.HandLandmark.MIDDLE_FINGER_MCP]
        indx_tip = hand_landmarks.landmark[mp_Hands.HandLandmark.INDEX_FINGER_TIP]
        indx_pip = hand_landmarks.landmark[mp_Hands.HandLandmark.INDEX_FINGER_PIP]
        middle_tip = hand_landmarks.landmark[mp_Hands.HandLandmark.MIDDLE_FINGER_TIP]
        middle_pip = hand_landmarks.landmark[mp_Hands.HandLandmark.MIDDLE_FINGER_PIP]
        thumb_tip = hand_landmarks.landmark[mp_Hands.HandLandmark.THUMB_TIP]
        thumb_mcp = hand_landmarks.landmark[mp_Hands.HandLandmark.THUMB_MCP]
        ring_tip = hand_landmarks.landmark[mp_Hands.HandLandmark.RING_FINGER_TIP]
        ring_pip = hand_landmarks.landmark[mp_Hands.HandLandmark.RING_FINGER_PIP]
        # Get landmarks for pinky finger
        pinky_tip = hand_landmarks.landmark[mp_Hands.HandLandmark.PINKY_TIP]
        pinky_pip = hand_landmarks.landmark[mp_Hands.HandLandmark.PINKY_PIP]
            

        # Calculate distances between landmarks for gesture detection
        tip_pip_distance = ((indx_tip.x - indx_pip.x) ** 2 + (indx_tip.y - indx_pip.y) ** 2) ** 0.5
        middle_tip_pip_distance = ((middle_tip.x - middle_pip.x) ** 2 + (middle_tip.y - middle_pip.y) ** 2) ** 0.5
        thumb_index_distance = ((thumb_tip.x - indx_tip.x) ** 2 + (thumb_tip.y - indx_tip.y) ** 2) ** 0.5
        thumb_middle_distance = ((thumb_tip.x - middle_tip.x) ** 2 + (thumb_tip.y - middle_tip.y) ** 2) ** 0.5
        ring_tip_pip_distance = ((ring_tip.x - ring_pip.x) ** 2 + (ring_tip.y - ring_pip.y) ** 2) ** 0.5
        thumb_y_relative = thumb_tip.y - thumb_mcp.y

        # Left button control
        is_bent = tip_pip_distance < click_threshold
        if is_bent and not was_bent:
             # Finger just bent, start timing
            bend_start_time = current_time
        elif is_bent and bend_start_time is not None:
            # Finger is bent, check duration
            bend_duration = current_time - bend_start_time
            if bend_duration >= click_duration_threshold and not is_dragging:
                # Bend exceeds threshold, start dragging
                pyautogui.mouseDown(button='left')
                print("Drag started")
                is_dragging = True
                click_buffer = [] # Clear buffer since it’s a drag
        elif not is_bent and was_bent:
            # Finger just extended
            if is_dragging:
                # End drag
                pyautogui.mouseUp(button='left')
                print("Drag ended")
                is_dragging = False
            else:
                # Short bend, potential click
                if bend_start_time is not None:
                    bend_duration = current_time - bend_start_time
                    if bend_duration < click_duration_threshold:
                        click_buffer.append((bend_start_time, current_time))
            bend_start_time = None

        # Process click buffer after delay
        if click_buffer and current_time - click_buffer[-1][1] >= action_delay:
            # Enough time has passed to process clicks
            recent_clicks = [click for click in click_buffer if current_time - click[1] < double_click_time * 2]
            if recent_clicks and current_time - last_action_time > action_delay:
                click_count = len(recent_clicks)
                if click_count >= 3 and recent_clicks[-1][1] - recent_clicks[-3][1] < double_click_time * 2:
                    print("Triple left click")
                    pyautogui.click(clicks=3)
                    last_action_time = current_time
                elif click_count >= 2 and recent_clicks[-1][1] - recent_clicks[-2][1] < double_click_time:
                    print("Double left click")
                    pyautogui.doubleClick()
                    last_action_time = current_time
                else:
                    print("Single left click")
                    pyautogui.click()
                    last_action_time = current_time
            click_buffer = [] # Clear buffer after action

        was_bent = is_bent

        # Right button control
        is_middle_bent = middle_tip_pip_distance < click_threshold
        if is_middle_bent and not was_middle_bent:
            pyautogui.click(button='right')
            print("Performed right click")
        was_middle_bent = is_middle_bent

        # Cursor movement (moves unless in scroll mode)
        if not scroll_active:
            x_normalized = (middle_mcp.x - hand_range_x_min) / (hand_range_x_max - hand_range_x_min)
            y_normalized = (middle_mcp.y - hand_range_y_min) / (hand_range_y_max - hand_range_y_min)
            x_normalized = max(0, min(1, x_normalized))
            y_normalized = max(0, min(1, y_normalized))
            x = int(x_normalized * screen_width)
            y = int(y_normalized * screen_height)
            x = int(smoothing_factor * x + (1 - smoothing_factor) * prev_x)
            y = int(smoothing_factor * y + (1 - smoothing_factor) * prev_y)
            prev_x, prev_y = x, y
            pyautogui.moveTo(prev_x, prev_y)

        # Check if pinky tip is below the pinky PIP joint
        if pinky_tip.y > pinky_pip.y:  # Pinky tip lower than PIP means it's bent
            zoom_active = not zoom_active  # Toggle zoom mode
            print("Zoom Mode: ON" if zoom_active else "Zoom Mode: OFF")  # Print status

        # Scroll toggle (ring finger bent)
        if ring_tip_pip_distance < click_threshold and current_time - last_scroll_tap_time > tap_debounce:
            scroll_active = not scroll_active
            last_scroll_tap_time = current_time
            print("Scroll mode:", "ON" if scroll_active else "OFF")

        # Zoom functionality
        if zoom_active and prev_thumb_index_distance is not None and current_time - last_zoom_time > zoom_debounce:
            distance_change = thumb_index_distance - prev_thumb_index_distance
            if abs(distance_change) > zoom_threshold:
                if distance_change < 0:
                    pyautogui.hotkey('ctrl', '+')
                    print("Zoom in")
                    last_zoom_time = current_time
                elif distance_change > 0:
                    pyautogui.hotkey('ctrl', '-')
                    print("Zoom out")
                    last_zoom_time = current_time
        prev_thumb_index_distance = thumb_index_distance

        # Scroll functionality
        if scroll_active and current_time - last_scroll_time > scroll_delay:
            if thumb_y_relative < upper_threshold:
                pyautogui.scroll(scroll_amount)
                print("Scroll up")
                last_scroll_time = current_time
            elif thumb_y_relative > lower_threshold:
                pyautogui.scroll(-scroll_amount)
                print("Scroll down")
                last_scroll_time = current_time

        # Draw hand landmarks
        for hand_landmarks in result.multi_hand_landmarks:
            mp_Drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_Hands.HAND_CONNECTIONS,
                mp_Drawing.DrawingSpec(color=(50, 50, 100), circle_radius=3, thickness=5)
            )

    # Display the frame and check for 'q' to exit
    cv2.imshow("Hand Tracking", frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        print("Exiting program...")
        break

# Cleanup
web_Cam.release()
cv2.destroyAllWindows()

Hand detected, waiting 2s to start gestures...
Single left click
Single left click
Hand lost, resetting...
Hand detected, waiting 2s to start gestures...
Single left click
Single left click
Single left click
Single left click
Single left click
Single left click
Single left click
Single left click
Hand lost, resetting...
Exiting program...
