In [10]:
pip install --upgrade mediapipe


Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-win_amd64.whl (50.9 MB)
     ---------------------------------------- 50.9/50.9 MB 3.6 MB/s eta 0:00:00
Collecting protobuf<5,>=4.25.3
  Using cached protobuf-4.25.5-cp310-abi3-win_amd64.whl (413 kB)
Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp310-cp310-win_amd64.whl (991 kB)
     -------------------------------------- 991.5/991.5 KB 4.8 MB/s eta 0:00:00
Installing collected packages: sentencepiece, protobuf, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.28.2
    Uninstalling protobuf-5.28.2:
      Successfully uninstalled protobuf-5.28.2
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Goutham\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\google\\~-pb\\_message.pyd'
Consider using the `--user` option or check the permissions.



In [None]:
import cv2
import numpy as np
import mediapipe as mp
import screen_brightness_control as sbc
from math import hypot
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL

def main():
    devices = AudioUtilities.GetSpeakers()
    interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    volume = cast(interface, POINTER(IAudioEndpointVolume))
    volRange = volume.GetVolumeRange()
    minVol, maxVol, _ = volRange

    mpHands = mp.solutions.hands
    mpDrawing = mp.solutions.drawing_utils
    landmark_style = mpDrawing.DrawingSpec(color=(0, 0, 255), thickness=5, circle_radius=3)  # Green landmarks
    connection_style = mpDrawing.DrawingSpec(color=(255, 0, 0), thickness=2) 
    
    hands = mpHands.Hands(
        static_image_mode=False,
        model_complexity=1,
        min_detection_confidence=0.75,
        min_tracking_confidence=0.75,
        max_num_hands=2)

    mpFaceMesh = mp.solutions.face_mesh
    face_mesh = mpFaceMesh.FaceMesh(min_detection_confidence=0.75, min_tracking_confidence=0.75)

    draw = mp.solutions.drawing_utils
    landmarkStyle = draw.DrawingSpec(color=(0, 0, 255), thickness=5, circle_radius=3)  # Green landmarks
    connectionStyle = draw.DrawingSpec(color=(255, 0, 0), thickness=2) 
    cap = cv2.VideoCapture(0)
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.flip(frame, 1)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            
            processed_hands = hands.process(frameRGB)
            left_landmark_list, right_landmark_list = get_left_right_landmarks(frame, processed_hands, draw, mpHands)

            
            processed_face = face_mesh.process(frameRGB)

            if left_landmark_list:
                left_distance = get_distance(frame, left_landmark_list)
                b_level = np.interp(left_distance, [50, 220], [0, 100])
                sbc.set_brightness(int(b_level))

            if right_landmark_list:
                right_distance = get_distance(frame, right_landmark_list)
                vol = np.interp(right_distance, [50, 220], [minVol, maxVol])
                volume.SetMasterVolumeLevel(vol, None)

            if processed_face.multi_face_landmarks:
                for face_landmarks in processed_face.multi_face_landmarks:
                    draw.draw_landmarks(frame, face_landmarks, mpFaceMesh.FACEMESH_CONTOURS)
                    eye_gesture = detect_eye_gesture(face_landmarks)
                    if eye_gesture == "BLINK_LEFT":
                        print("Previous Track")
                    elif eye_gesture == "BLINK_RIGHT":
                        print("Next Track")

            if processed_hands.multi_hand_landmarks:
                for hand_landmarks in processed_hands.multi_hand_landmarks:
                    palm_gesture = detect_palm_gesture(hand_landmarks)
                    if palm_gesture == "FIVE":
                        print("Play/Pause Media")
                    elif palm_gesture == "FIST":
                        print("Mute Media")

            cv2.imshow('Image', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

def get_left_right_landmarks(frame, processed, draw, mpHands):
    left_landmark_list = []
    right_landmark_list = []

    if processed.multi_hand_landmarks:
        for handlm in processed.multi_hand_landmarks:
            for idx, found_landmark in enumerate(handlm.landmark):
                height, width, _ = frame.shape
                x, y = int(found_landmark.x * width), int(found_landmark.y * height)
                if idx == 4 or idx == 8:
                    landmark = [idx, x, y]
                    if handlm == processed.multi_hand_landmarks[0]:
                        left_landmark_list.append(landmark)
                    elif handlm == processed.multi_hand_landmarks[1]:
                        right_landmark_list.append(landmark)

            draw.draw_landmarks(frame, handlm, mpHands.HAND_CONNECTIONS)

    return left_landmark_list, right_landmark_list

def get_distance(frame, landmark_list):
    if len(landmark_list) < 2:
        return
    (x1, y1), (x2, y2) = (landmark_list[0][1], landmark_list[0][2]), \
        (landmark_list[1][1], landmark_list[1][2])
    cv2.circle(frame, (x1, y1), 7, (0, 255, 0), cv2.FILLED)
    cv2.circle(frame, (x2, y2), 7, (0, 255, 0), cv2.FILLED)
    cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
    L = hypot(x2 - x1, y2 - y1)

    return L

def detect_eye_gesture(face_landmarks):
    left_eye = [face_landmarks.landmark[i] for i in range(33, 42)]
    right_eye = [face_landmarks.landmark[i] for i in range(42, 51)]

    left_eye_height = abs(left_eye[1].y - left_eye[5].y)
    right_eye_height = abs(right_eye[1].y - right_eye[5].y)

    if left_eye_height < 0.02 and right_eye_height > 0.03:
        return "BLINK_LEFT"
    elif right_eye_height < 0.02 and left_eye_height > 0.03:
        return "BLINK_RIGHT"
    return None

def detect_palm_gesture(hand_landmarks):
    thumb_tip = hand_landmarks.landmark[4]
    index_tip = hand_landmarks.landmark[8]
    middle_tip = hand_landmarks.landmark[12]

    if all(landmark.y < hand_landmarks.landmark[0].y for landmark in [thumb_tip, index_tip, middle_tip]):
        return "FIVE"
    elif all(landmark.y > hand_landmarks.landmark[0].y for landmark in [thumb_tip, index_tip, middle_tip]):
        return "FIST"
    return None

if __name__ == '__main__':
    main()




Play/Pause Media
Play/Pause Media
Play/Pause Media
Previous Track
Previous Track
Previous Track
Previous Track
Previous Track
Previous Track
Previous Track
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Mute Media
Mute Media
Mute Media
Mute Media
Mute Media
Mute Media
Mute Media
Mute Media
Mute Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Media
Play/Pause Med

In [6]:

pip install pygetwindow pymsgbox pyscreeze PyTweening


Collecting pygetwindowNote: you may need to restart the kernel to use updated packages.

  Downloading PyGetWindow-0.0.9.tar.gz (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pymsgbox
  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'error'


  error: subprocess-exited-with-error
  
  pip subprocess to install build dependencies did not run successfully.
  exit code: 2
  
  [42 lines of output]
  Collecting setuptools>=40.8.0
    Using cached setuptools-75.6.0-py3-none-any.whl (1.2 MB)
  Collecting wheel
    Using cached wheel-0.45.1-py3-none-any.whl (72 kB)
  Installing collected packages: wheel, setuptools
  ERROR: Exception:
  Traceback (most recent call last):
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-6822nv4b\__env_pip__.zip\pip\_internal\cli\base_command.py", line 167, in exc_logging_wrapper
      status = run_func(*args)
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-6822nv4b\__env_pip__.zip\pip\_internal\cli\req_command.py", line 205, in wrapper
      return func(self, options, args)
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-6822nv4b\__env_pip__.zip\pip\_internal\commands\install.py", line 405, in run
      installed = install_given_reqs(
    File "C:\U

In [7]:
pip install pyautogui  

Collecting pyautoguiNote: you may need to restart the kernel to use updated packages.

  Using cached PyAutoGUI-0.9.54.tar.gz (61 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'error'


  error: subprocess-exited-with-error
  
  pip subprocess to install build dependencies did not run successfully.
  exit code: 2
  
  [42 lines of output]
  Collecting setuptools>=40.8.0
    Using cached setuptools-75.6.0-py3-none-any.whl (1.2 MB)
  Collecting wheel
    Using cached wheel-0.45.1-py3-none-any.whl (72 kB)
  Installing collected packages: wheel, setuptools
  ERROR: Exception:
  Traceback (most recent call last):
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-4lu52td7\__env_pip__.zip\pip\_internal\cli\base_command.py", line 167, in exc_logging_wrapper
      status = run_func(*args)
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-4lu52td7\__env_pip__.zip\pip\_internal\cli\req_command.py", line 205, in wrapper
      return func(self, options, args)
    File "C:\Users\Goutham\AppData\Local\Temp\pip-standalone-pip-4lu52td7\__env_pip__.zip\pip\_internal\commands\install.py", line 405, in run
      installed = install_given_reqs(
    File "C:\U

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import screen_brightness_control as sbc
import pyautogui
from math import hypot
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL

def main():
    
    devices = AudioUtilities.GetSpeakers()
    interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    volume = cast(interface, POINTER(IAudioEndpointVolume))
    volRange = volume.GetVolumeRange() 
    minVol, maxVol, _ = volRange

    
    mpHands = mp.solutions.hands
    hands = mpHands.Hands(
        static_image_mode=False,
        model_complexity=1,
        min_detection_confidence=0.75,
        min_tracking_confidence=0.75,
        max_num_hands=2)

    
    mpFaceMesh = mp.solutions.face_mesh
    face_mesh = mpFaceMesh.FaceMesh(
        min_detection_confidence=0.75, 
        min_tracking_confidence=0.75,
        refine_landmarks=True
    )

    
    mpDrawing = mp.solutions.drawing_utils
    hand_landmark_style = mpDrawing.DrawingSpec(
        color=(0, 255, 0),  
        thickness=2, 
        circle_radius=3
    )
    hand_connection_style = mpDrawing.DrawingSpec(
        color=(255, 0, 0),  
        thickness=2
    )
    
    face_landmark_style = mpDrawing.DrawingSpec(
        color=(0, 255, 0),  
        thickness=1, 
        circle_radius=1
    )
    face_connection_style = mpDrawing.DrawingSpec(
        color=(255, 0, 0),     
        thickness=1
    )

    
    cap = cv2.VideoCapture(0)
    
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            
            frame = cv2.flip(frame, 1)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            
            processed_hands = hands.process(frameRGB)
            left_landmark_list, right_landmark_list = get_left_right_landmarks(
                frame, processed_hands, mpDrawing, mpHands, 
                hand_landmark_style, hand_connection_style
            )

            
            processed_face = face_mesh.process(frameRGB)

            
            if left_landmark_list:
                left_distance = get_distance(frame, left_landmark_list)
                if left_distance:
                    b_level = np.interp(left_distance, [50, 100], [0, 100])
                    sbc.set_brightness(int(b_level))

            
            if right_landmark_list:
                right_distance = get_distance(frame, right_landmark_list)
                if right_distance:
                    vol = np.interp(right_distance, [50, 100], [minVol, maxVol])
                    volume.SetMasterVolumeLevel(vol, None)

            
            if processed_face.multi_face_landmarks:
                for face_landmarks in processed_face.multi_face_landmarks:
                    mpDrawing.draw_landmarks(
                        frame, 
                        face_landmarks, 
                        mpFaceMesh.FACEMESH_CONTOURS,
                        landmark_drawing_spec=face_landmark_style,
                        connection_drawing_spec=face_connection_style
                    )
                    
                    
                    eye_gesture = detect_eye_gesture(face_landmarks)
                    if eye_gesture == "BLINK_LEFT":
                        pyautogui.click(button='left')
                    elif eye_gesture == "BLINK_RIGHT":
                        pyautogui.click(button='right')

            
            if processed_hands.multi_hand_landmarks:
                for hand_landmarks in processed_hands.multi_hand_landmarks:
                    palm_gesture = detect_palm_gesture(hand_landmarks)
                    if palm_gesture == "FIVE":
                        pyautogui.press('playpause')
                    elif palm_gesture == "FIST":
                        pyautogui.press('space')  

            
            cv2.imshow('Gesture Control', frame)
            
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    finally:
        cap.release()
        cv2.destroyAllWindows()

def get_left_right_landmarks(frame, processed, draw, mpHands, landmark_style, connection_style):
    left_landmark_list = []
    right_landmark_list = []

    if processed.multi_hand_landmarks:
        for hand_idx, handlm in enumerate(processed.multi_hand_landmarks):
            for idx, found_landmark in enumerate(handlm.landmark):
                height, width, _ = frame.shape
                x, y = int(found_landmark.x * width), int(found_landmark.y * height)
                if idx == 4 or idx == 8:
                    landmark = [idx, x, y]
                    if hand_idx == 0:
                        left_landmark_list.append(landmark)
                    elif hand_idx == 1:
                        right_landmark_list.append(landmark)

            
            draw.draw_landmarks(
                frame, 
                handlm, 
                mpHands.HAND_CONNECTIONS, 
                landmark_drawing_spec=landmark_style,
                connection_drawing_spec=connection_style
            )

    return left_landmark_list, right_landmark_list

def get_distance(frame, landmark_list):  
    if len(landmark_list) < 2:
        return None
    (x1, y1), (x2, y2) = (landmark_list[0][1], landmark_list[0][2]), \
        (landmark_list[1][1], landmark_list[1][2])
    cv2.circle(frame, (x1, y1), 7, (0, 255, 0), cv2.FILLED)
    cv2.circle(frame, (x2, y2), 7, (0, 255, 0), cv2.FILLED)
    cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
    L = hypot(x2 - x1, y2 - y1)

    return L

def detect_eye_gesture(face_landmarks):
    left_eye = [face_landmarks.landmark[i] for i in range(33, 42)]
    right_eye = [face_landmarks.landmark[i] for i in range(42, 51)]

    left_eye_height = abs(left_eye[1].y - left_eye[5].y)
    right_eye_height = abs(right_eye[1].y - right_eye[5].y)

    
    print(f"Left eye height: {left_eye_height}, Right eye height: {right_eye_height}")

    
    if left_eye_height < 0.13 and right_eye_height > 0.18:
        print("LEFT BLINK DETECTED!")
        return "BLINK_LEFT"
    elif right_eye_height < 0.13 and left_eye_height > 0.18:
        print("RIGHT BLINK DETECTED!")
        return "BLINK_RIGHT"
    return None

def detect_palm_gesture(hand_landmarks):
    thumb_tip = hand_landmarks.landmark[4]
    index_tip = hand_landmarks.landmark[8]
    middle_tip = hand_landmarks.landmark[12]
    ringfing_tip = hand_landmarks.landmark[16]
    lil_tip = hand_landmarks.landmark[20]

    if all(landmark.y < hand_landmarks.landmark[0].y for landmark in [thumb_tip, index_tip, middle_tip, ringfing_tip, lil_tip]):
        return "FIVE"
    elif all(landmark.y > hand_landmarks.landmark[0].y for landmark in [thumb_tip, index_tip, middle_tip, ringfing_tip, lil_tip]):
        return "FIST"
    return None

if __name__ == '__main__':
    main()



Left eye height: 0.13338756561279297, Right eye height: 0.09196197986602783
Left eye height: 0.14094775915145874, Right eye height: 0.09849655628204346
Left eye height: 0.1438840627670288, Right eye height: 0.09541493654251099
Left eye height: 0.14491426944732666, Right eye height: 0.09906011819839478
Left eye height: 0.14511758089065552, Right eye height: 0.09514981508255005
Left eye height: 0.14538830518722534, Right eye height: 0.09575152397155762
Left eye height: 0.14772170782089233, Right eye height: 0.09716278314590454
Left eye height: 0.1443226933479309, Right eye height: 0.0978732705116272
Left eye height: 0.14057588577270508, Right eye height: 0.10268861055374146
Left eye height: 0.1397433876991272, Right eye height: 0.1028563380241394
Left eye height: 0.14433658123016357, Right eye height: 0.09671813249588013
Left eye height: 0.14345526695251465, Right eye height: 0.09912639856338501
Left eye height: 0.14311158657073975, Right eye height: 0.09460115432739258
Left eye height: 