In [12]:
import cv2
import mediapipe as mp
import time
from collections import deque
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.8,
    min_tracking_confidence=0.8
)
mp_draw = mp.solutions.drawing_utils

# Zoom gestures 

ZOOM_TRIGGER_THRESHOLD = 0.12 # distance b/w thumb and index
ZOOM_HOLD_TIME = 1.0 # prevents accidental change
ZOOM_COOLDOWN_TIME = 2.0 # prevents glitching
MIN_ZOOM_DISTANCE = 0.05  # Fingers very close - no zoom (zoom scale = 1.0)
MAX_ZOOM_DISTANCE = 0.3   # Fingers far apart - max zoom (zoom scale = 2.0)
MIN_ZOOM_SCALE = 1.0
MAX_ZOOM_SCALE = 2.0


zoom_active = False
zoom_start_time = 0
last_zoom_time = 0

volume_change_delay = 0.1
last_volume_change_time = 0

# Pinch detection
PINCH_TOGGLE_THRESHOLD = 0.03
pinched = False
last_toggle_time = 0

# Hand lost detection
hand_detected = False
last_hand_time = 0
HAND_TIMEOUT = 1.5  # seconds

# representing circular motion

index_tip_his = deque(maxlen=20)
brightness_lvl = 1.0
brightness_change_delay = 0.1
last_brightness_change_time = 0

# smoothen the zoom animation

smooth_zoom_scale = 1.0


# Finger detection

def detection(hand_landmarks, hand_label):
    fingers = []
    landmarks = hand_landmarks.landmark
    tip_ids = [4, 8, 12, 16, 20]

    # Thumb detection based on hand side
    if hand_label == "Right":
        fingers.append(1 if landmarks[tip_ids[0]].x < landmarks[tip_ids[0] - 1].x else 0)
    else:
        fingers.append(1 if landmarks[tip_ids[0]].x > landmarks[tip_ids[0] - 1].x else 0)

    # Other fingers
    for id in range(1, 5):
        fingers.append(1 if landmarks[tip_ids[id]].y < landmarks[tip_ids[id] - 2].y else 0)

    return fingers

# calculating distance

def distance_between_points(p1, p2):
    return ((p1.x - p2.x)**2 + (p1.y - p2.y)**2) ** 0.5

def detect_circle(points):
    if len(points) < 10:
        return None
    
    # array of points 
    pts = np.array(points)
    pts_mean = np.mean(pts, axis=0) # cal centre of motion
    centred = pts - pts_mean # centering all points


    angles = np.arctan2(centred[:,1], centred[:,0]) # angles of each pt
    angle_diffs = np.diff(angles)# cal change of angles.

    # fixing the discontinuity of angles

    angle_diffs = np.mod(angle_diffs + np.pi, 2 * np.pi) - np.pi # setting range .i.e [-180, 180] here 180 is pi

    total_angle = np.sum(angle_diffs) # sum of changes

    if abs(total_angle) < 4 :
        return None
    elif total_angle > 0 :
        return "clockwise"
    else :
        return "counterclockwise"

# Webcam

capture = cv2.VideoCapture(0)
capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1152)   # Width
capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 648)   # Height

cv2.namedWindow("Hand Tracking", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Hand Tracking", 1280, 720)


while True:
    
    ret, frame = capture.read()
    if not ret :
        break

    frame = cv2.flip(frame, 1)

    # OG copy save
    orignal_frame = frame.copy()
     
    # adjusts brightness 
    frame = cv2.convertScaleAbs(frame , alpha = brightness_lvl , beta = 0)

    # hand tracking relies on OG frame

    rgb_frame = cv2.cvtColor(orignal_frame, cv2.COLOR_BGR2RGB)

    results = hands.process(rgb_frame)

    current_time = time.time()

    if results.multi_hand_landmarks:
        hand_detected = True
        last_hand_time = current_time

        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            hand_label = results.multi_handedness[idx].classification[0].label
            landmarks = hand_landmarks.landmark

            # Get bounding box
            h, w, _ = frame.shape
            x_list = [int(lm.x * w) for lm in landmarks]
            y_list = [int(lm.y * h) for lm in landmarks]
            x_min, x_max = min(x_list), max(x_list)
            y_min, y_max = min(y_list), max(y_list)

            cv2.rectangle(frame, (x_min - 20, y_min - 20), (x_max + 20, y_max + 20), (255, 0, 0), 2)

            # Finger detection
            finger_states = detection(hand_landmarks, hand_label)
            total_fingers = sum(finger_states)

            # on / off
            thumb_tip = landmarks[4]
            index_tip = landmarks[8]
            pinch_distance = distance_between_points(thumb_tip, index_tip)
            
            # reset brightness 
            if pinch_distance < PINCH_TOGGLE_THRESHOLD:
                if not pinched and current_time - last_toggle_time > 1.0:
                    brightness_lvl = 1.0
                    zoom_active = False
                    zoom_start_time = 0
                    last_toggle_time = current_time
                pinched = True
            else:
                pinched = False

            is_pinched = pinch_distance < PINCH_TOGGLE_THRESHOLD
            is_brightness_gesture = (finger_states == [0,1,0,0,0])
            is_fist = (total_fingers == 0) 

            if is_brightness_gesture:
               x, y = int(index_tip.x * w), int(index_tip.y * h)
               index_tip_his.append((x, y))

               direction = detect_circle(index_tip_his)
               if direction and current_time - last_brightness_change_time > brightness_change_delay:
 
                  if direction == "clockwise":
                    brightness_lvl = min(2.0, brightness_lvl + 0.1)
                  elif direction == "counterclockwise":
                    brightness_lvl = max(0.1, brightness_lvl - 0.1)
                  last_brightness_change_time = current_time
            else:
              index_tip_his.clear()

            for pt in index_tip_his:
                cv2.circle(frame, pt, 3, (0, 255, 255), -1)

            # zoom detection
            # zoom_dis = distance_between_points(thumb_tip, index_tip)
 
            if not is_pinched and not is_brightness_gesture and not is_fist :
                
                exclude_gestures = [
                     [0, 0, 1, 1, 0],  # Peace sign
                     [1, 0, 0, 0, 0],  # Thumbs up
                     [1, 1, 1, 1, 1],  # Open Palm
                     [0, 0, 0, 0, 0],  # Fist
                     [0, 1, 1, 1, 0],  # 3 fingers
                     [0, 1, 1, 1, 1],  # 4 fingers
                ]
             
                if (finger_states == [1, 1, 0, 0, 0]) and (pinch_distance > ZOOM_TRIGGER_THRESHOLD):
                    if zoom_start_time == 0:
                        zoom_start_time = current_time
                    if not zoom_active and (current_time - zoom_start_time >= ZOOM_HOLD_TIME):
                        zoom_active = True
                        last_zoom_time = current_time
                else:
                    zoom_start_time = 0
                    zoom_active = False


                if zoom_active:
                       
                    clamped_dis = max(MIN_ZOOM_DISTANCE, min(pinch_distance, MAX_ZOOM_DISTANCE))
                    zoom_scale = MIN_ZOOM_SCALE + (clamped_dis - MIN_ZOOM_DISTANCE) * (MAX_ZOOM_SCALE - MIN_ZOOM_SCALE) / (MAX_ZOOM_DISTANCE - MIN_ZOOM_DISTANCE)
                 
                    # adding smoothing factor
                    SMOOTHING_FACTOR = 0.15

                    #exponential smoothing
                    smooth_zoom_scale = (1 - SMOOTHING_FACTOR) * smooth_zoom_scale + SMOOTHING_FACTOR * zoom_scale
                    # define cordiantes of hand
                    pad = 40
                    x1 = max(x_min - pad, 0) 
                    y1 = max(y_min - pad, 0)
                    x2 = min(x_max + pad, w)
                    y2 = min(y_max + pad, h) 

                    # region of intrest / area to be zoomed

                    roi = frame[y1:y2 ,x1:x2] 

                    if roi.size != 0 :
                        new_w = int(w / smooth_zoom_scale)
                        new_h = int(h / smooth_zoom_scale)

                        zoomed_roi = cv2.resize( roi, (new_w,new_h), interpolation = cv2.INTER_LINEAR)

                        # to clear the frame and centre the zoom

                        # Creating blurred background
                        blurred_bg = cv2.GaussianBlur(frame, (51, 51), 0)

                        # Clear frame with background blur
                        frame[:] = blurred_bg

                        x_offset = (w - new_w) // 2
                        y_offset = (h - new_h) // 2

                        frame[y_offset:y_offset+new_h , x_offset:x_offset+new_w] = zoomed_roi

                        cv2.putText(frame, f"Zoom: {smooth_zoom_scale:.2f}", (50, 50),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)
                else :
                    cv2.putText(frame, "No Zoom", (50, 50),
                               cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)
                       
                       
            # Gesture name (for display)
            if total_fingers == 0:
                gesture = "Fist"
            elif total_fingers == 5:
                gesture = "Open Palm"
            elif finger_states == [0, 1, 1, 0, 0]:
                gesture = "Peace Sign"
            elif finger_states == [1, 0, 0, 0, 0]:
                gesture = "Thumbs Up"
            elif finger_states == [1, 1, 0, 0, 1]:
                gesture = "Watch dogs : You're being watched "
            else:
                gesture = f"{total_fingers} fingers up"
                

            display_text = f"Fingers: {total_fingers} | Gesture: {gesture}"

            
            # bightness control

            if finger_states == [0,1,0,0,0] :
                x,y = int(index_tip.x * w), int(index_tip.y * h)
                index_tip_his.append((x,y))

                direction = detect_circle(index_tip_his)
                if direction and current_time - last_brightness_change_time > brightness_change_delay :
                    if direction == "clockwise" :
                        brightness_lvl = min(2.0, brightness_lvl + 0.1)
                    elif direction == "counterclockwise" :
                        brightness_lvl = max(0.1, brightness_lvl - 0.1) 
                    last_brightness_change_time = current_time
            else :
                index_tip_his.clear()      
            
            # display

            cv2.putText(frame, f"{hand_label} Hand", (x_min - 20, y_min - 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)

            cv2.putText(frame, display_text, (x_min - 20, y_max + 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

    else:
        # not detected
        if current_time - last_hand_time > HAND_TIMEOUT:
            hand_detected = False

    if not hand_detected:
        cv2.putText(frame, "Hand Lost", (10, 130),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)

    if hand_detected :
        bar_x, bar_y = 10, 100
        bar_width = 200
        bar_height = 20
        filled_width = int(bar_width * (brightness_lvl / 2.0))

        cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_width, bar_y + bar_height), (200, 200, 200), 2)
        cv2.rectangle(frame, (bar_x, bar_y), (bar_x + filled_width, bar_y + bar_height), (0, 255, 0), -1)
        cv2.putText(frame, f"Brightness: {brightness_lvl:.2f}", (bar_x, bar_y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Show full screen
    cv2.imshow("Hand Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
capture.release()
cv2.destroyAllWindows()
