In [1]:
import cv2
import mediapipe as mp
import math
import pyautogui
import subprocess

# Initialize MediaPipe hands detector and drawing utilities with GPU acceleration
my_hands = mp.solutions.hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5,
    model_complexity=1  # Use GPU-optimized model
)
drawing_utils = mp.solutions.drawing_utils

# Initialize webcam capture
webcam = cv2.VideoCapture(0)

# Check and enable CUDA backend for OpenCV if available
if cv2.cuda.getCudaEnabledDeviceCount() > 0:
    # Set backend to CUDA
    cv2.setUseOptimized(True)
    cv2.cuda.setDevice(0)

# Initialize finger landmark coordinates  
x1 = y1 = x2 = y2 = 0

while True:
    # Read frame from webcam
    success, frame = webcam.read()
    if not success:
        continue
        
    # Get frame dimensions and convert to RGB
    frame_height, frame_width, _ = frame.shape
    
    # Use GPU acceleration for color conversion if CUDA is available
    if cv2.cuda.getCudaEnabledDeviceCount() > 0:
        gpu_frame = cv2.cuda_GpuMat()
        gpu_frame.upload(frame)
        gpu_rgb = cv2.cuda.cvtColor(gpu_frame, cv2.COLOR_BGR2RGB)
        rgb_image = gpu_rgb.download()
    else:
        rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process frame with MediaPipe
    output = my_hands.process(rgb_image)
    hands = output.multi_hand_landmarks
    
    if hands:
        for hand in hands:
            # Draw hand landmarks
            drawing_utils.draw_landmarks(frame, hand, mp.solutions.hands.HAND_CONNECTIONS)
            landmarks = hand.landmark
            
            # Process each landmark
            for id, landmark in enumerate(landmarks):
                # Convert normalized coordinates to pixel coordinates
                x = int(landmark.x * frame_width)
                y = int(landmark.y * frame_height)
                
                # Track index finger tip (landmark 8)
                if id == 8:
                    cv2.circle(img=frame, center=(x, y), radius=10, 
                             color=(0, 255, 255), thickness=2)
                    x1, y1 = x, y
                
                # Track thumb tip (landmark 4)
                if id == 4:
                    cv2.circle(img=frame, center=(x, y), radius=10,
                             color=(0, 0, 255), thickness=2)
                    x2, y2 = x, y
            
            # Calculate distance between fingers and adjust volume
            dist = int(math.sqrt((x2 - x1)**2 + (y2 - y1)**2))
            cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 0), 5)
            
            #For Windows
            # Adjust volume based on distance
            # if dist > 50:
            #     pyautogui.press('volumeup')
            # elif dist < 50:
            #     pyautogui.press('volumedown')

            #For Mac
            # Adjust volume based on distance
            if dist > 50:
                subprocess.call(["osascript", "-e", "set volume output volume (output volume of (get volume settings) + 5)"])
            elif dist < 50:
                subprocess.call(["osascript", "-e", "set volume output volume (output volume of (get volume settings) - 5)"])

    
    # Display output frame
    cv2.imshow("Hand Volume Control", frame)
    
    # Check for escape key press
    if cv2.waitKey(10) == 27:
        break

# Clean up
webcam.release()
cv2.destroyAllWindows()


I0000 00:00:1730996374.885429 4519612 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1730996374.893343 4520483 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730996374.897980 4520483 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730996376.557298 4520480 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
2024-11-07 21:49:36.647 Python[18138:4519612] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-07 21:49:36.647 Python[18138:4519612] +[IMKInputSession subclass]: chose IMKInputSession_Modern


KeyboardInterrupt: 

: 