# 1. Import and Install Dependencies

- Import _library_ yang diperlukan

In [2]:
import cv2
import time
import pygame
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mediapipe as mp
import joblib
from sklearn.preprocessing import StandardScaler
from pygame import mixer

mp_facemesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
denormalize_coordinates = mp_drawing._normalized_to_pixel_coordinates

%matplotlib inline

pygame 2.0.1 (SDL 2.0.14, Python 3.9.0)
Hello from the pygame community. https://www.pygame.org/contribute.html


# 2. Define the Function

- Mendefinisikan titik _landmark_ beserta indeksnya

In [4]:
# Landmark points corresponding to left eye
all_left_eye_idxs = list(mp_facemesh.FACEMESH_LEFT_EYE)
# flatten and remove duplicates
all_left_eye_idxs = set(np.ravel(all_left_eye_idxs)) 
 
# Landmark points corresponding to right eye
all_right_eye_idxs = list(mp_facemesh.FACEMESH_RIGHT_EYE)
all_right_eye_idxs = set(np.ravel(all_right_eye_idxs))
 
# Combined for plotting - Landmark points for both eye
all_idxs = all_left_eye_idxs.union(all_right_eye_idxs)
 
# The chosen 12 points:   P1,  P2,  P3,  P4,  P5,  P6
chosen_left_eye_idxs  = [362, 385, 387, 263, 373, 380]
chosen_right_eye_idxs = [33,  160, 158, 133, 153, 144]
all_chosen_idxs = chosen_left_eye_idxs + chosen_right_eye_idxs

- Mendefinisikan fungsi **distance** untuk menghitung jarak antara dua titik

In [5]:
def distance(point_1, point_2):
    """Calculate l2-norm between two points"""
    dist = sum([(i - j) ** 2 for i, j in zip(point_1, point_2)]) ** 0.5
    return dist

- Mendefinisikan fungsi **get_ear** untuk mengkalkulasi nilai *eye aspect ratio* untuk satu mata

In [6]:
def get_ear(landmarks, refer_idxs, frame_width, frame_height):
    """
    Calculate Eye Aspect Ratio for one eye.
 
    Args:
        landmarks: (list) Detected landmarks list
        refer_idxs: (list) Index positions of the chosen landmarks
                            in order P1, P2, P3, P4, P5, P6
        frame_width: (int) Width of captured frame
        frame_height: (int) Height of captured frame
 
    Returns:
        ear: (float) Eye aspect ratio
    """
    try:
        # Compute the euclidean distance between the horizontal
        coords_points = []
        for i in refer_idxs:
            lm = landmarks[i]
            coord = denormalize_coordinates(lm.x, lm.y, 
                                             frame_width, frame_height)
            coords_points.append(coord)
 
        # Eye landmark (x, y)-coordinates
        P2_P6 = distance(coords_points[1], coords_points[5])
        P3_P5 = distance(coords_points[2], coords_points[4])
        P1_P4 = distance(coords_points[0], coords_points[3])
 
        # Compute the eye aspect ratio
        ear = (P2_P6 + P3_P5) / (2.0 * P1_P4)
 
    except:
        ear = 0.0
        coords_points = None
 
    return ear, coords_points

- Mendefinisikan fungsi **calculate_avg_ear** untuk mengkalkulasi nilai rata - rata *eye aspect ratio* untuk kedua mata

In [7]:
def calculate_avg_ear(landmarks, left_eye_idxs, right_eye_idxs, image_w, image_h):
    """Calculate Eye aspect ratio"""
 
    left_ear, left_lm_coordinates = get_ear(
                                      landmarks, 
                                      left_eye_idxs, 
                                      image_w, 
                                      image_h
                                    )
    right_ear, right_lm_coordinates = get_ear(
                                      landmarks, 
                                      right_eye_idxs, 
                                      image_w, 
                                      image_h
                                    )
    Avg_EAR = (left_ear + right_ear) / 2.0
 
    return Avg_EAR, (left_lm_coordinates, right_lm_coordinates)

- Mendefinisikan variabel untuk _library_ mixer

In [33]:
mixer.init()
step_sound = mixer.Sound('../Sounds/step.wav')
step_sound.set_volume(0.1)
alarm_sound = mixer.Sound('../Sounds/ping.mp3')
alarm_sound.set_volume(0.1)
start_sound = mixer.Sound('../Sounds/mh_notification.mp3')
start_sound.set_volume(0.1)

# 3. Load Model

- Memuat model SVM dan _feature scaling_ dengan akurasi terbaik yang diperoleh pada tahap pelatihan dan pengujian

In [9]:
# Load the pre-trained SVC model
model_path = r"D:\UGM\Teknik Fisika 18\[Someone]\Coding\Kode Final (Fix)\Models\Pelatihan_2\variation_14"

loaded_svc_model = joblib.load(os.path.join(model_path, 'svc_model_14.pkl'))
loaded_scaler = joblib.load(os.path.join(model_path, 'standar_scaler_14.pkl'))

# 4. Make Detection

## 4.1. Make Detection to Video File

- Mendefinisikan variabel yang digunakan dalam proses deteksi

In [10]:
data_EAR = []
data_EAR_awake = []
data_EAR_drowsy = []
data_status = []
data_duration = []

max_drowsy_frame = 45

- Menjalankan proses deteksi pada _input_ berupa data video

In [34]:
drowsy_frame = 0
duration = 0

# change with your video path
video_path = r"D:\UGM\Teknik Fisika 18\[Someone]\Dataset\Data Primer\video\drowsy\glasses\2_female_glasses_drowsy_1.avi"

cap = cv2.VideoCapture(video_path)

# Get the current FPS of the camera
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Print the FPS
print("Camera FPS:", fps)

# Initiate face model
with mp_facemesh.FaceMesh(
    static_image_mode=False,        # Default=False
    max_num_faces=1,                # Default=1
    refine_landmarks=False,         # Default=False
    min_detection_confidence=0.5,   # Default=0.5
    min_tracking_confidence= 0.5,   # Default=0.5
) as face_mesh:
    
    while True:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        imgW = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        imgH = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        
        # Make detections
        results = face_mesh.process(image).multi_face_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Draw Face Landmarks
        if results:
            for face_id, face_landmarks in enumerate(results):
                landmarks = face_landmarks.landmark
                EAR, _ = calculate_avg_ear(
                          landmarks, 
                          chosen_left_eye_idxs, 
                          chosen_right_eye_idxs, 
                          imgW, 
                          imgH
                      )
                
                EAR_data = {
                    "EAR": EAR
                }
                
                df = pd.DataFrame(EAR_data, index=[0])
                
                df = np.array(df['EAR'])
                
                EAR_reshape = df.reshape(-1, 1)
                
                # Feature scaling
                EAR_scaled = loaded_scaler.transform(EAR_reshape)
                
                # Predict using the loaded SVC model
                driver_status = loaded_svc_model.predict(EAR_scaled)
                
                driver_status = driver_status[0]
                
                duration += 1
                
                if driver_status == 'drowsy':
                    drowsy_frame += 1
                    data_EAR_drowsy.append(EAR)
                    
                if driver_status == 'awake':
                    data_EAR_awake.append(EAR)
                    if drowsy_frame > 0 & drowsy_frame <= max_drowsy_frame:
                        drowsy_frame = drowsy_frame - 1
                    if drowsy_frame > max_drowsy_frame:
                        drowsy_frame = max_drowsy_frame
                    if drowsy_frame <= 15:
                        alarm_sound.stop()
    
                if drowsy_frame >= 30:
                    cv2.putText(image,
                               'ALERT, ALERT, ALERT',
                               (150,150),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (255, 255, 255), 2
                               )
                
                    alarm_sound.play()
                
                # Append EAR Value and Driver Status
                data_EAR.append(EAR)
                data_status.append(driver_status)
                data_duration.append(duration)
                
                # Print the EAR Value and Driver Status
                print(f"EAR: {round(EAR, 2)}, Driver Status: {driver_status}, Drowsy Frame: {drowsy_frame}")
                
                # Put the EAR value in Video Frame
                cv2.putText(image, 
                            f"EAR: {round(EAR, 2)}", (1, 24),
                            cv2.FONT_HERSHEY_COMPLEX, 
                            0.9, (255, 255, 255), 2
                )
                
                # Put the Awake / Drowsy Status in Video Frame
                cv2.putText(image,
                           f"Driver status: {driver_status}",
                           (1,48),
                           cv2.FONT_HERSHEY_COMPLEX,
                           0.9, (255, 255, 255), 2)
                
                # Put the Number of Drowsy Frame in Video Frame
                cv2.putText(image,
                           f"Drowsy Frame: {drowsy_frame}",
                           (1,72),
                           cv2.FONT_HERSHEY_COMPLEX,
                           0.9, (255, 255, 255), 2)
                
        cv2.imshow("Webcam Feed", image)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Camera FPS: 30
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 1
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 2
EAR: 0.16, Driver Status: drowsy, Drowsy Frame: 3
EAR: 0.17, Driver Status: drowsy, Drowsy Frame: 4
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 5
EAR: 0.2, Driver Status: drowsy, Drowsy Frame: 6
EAR: 0.2, Driver Status: drowsy, Drowsy Frame: 7
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 8
EAR: 0.19, Driver Status: drowsy, Drowsy Frame: 9
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 10
EAR: 0.19, Driver Status: drowsy, Drowsy Frame: 11
EAR: 0.16, Driver Status: drowsy, Drowsy Frame: 12
EAR: 0.17, Driver Status: drowsy, Drowsy Frame: 13
EAR: 0.17, Driver Status: drowsy, Drowsy Frame: 14
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 15
EAR: 0.19, Driver Status: drowsy, Drowsy Frame: 16
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 17
EAR: 0.18, Driver Status: drowsy, Drowsy Frame: 18
EAR: 0.17, Driver Status: drowsy, Drowsy Frame: 19
EAR: 0.17, Driver Status: d

## 4.2. Make Detection in Real Time

- Mendefinisikan fungsi **set_resolution_with_aspect_ratio** untuk mengubah ukuran dan rasio dari frame kamera

In [20]:
def set_resolution_with_aspect_ratio(cap, target_height, aspect_ratio):
    # Calculate the corresponding width to maintain the aspect ratio
    target_width = int(target_height * aspect_ratio)

    # Set the desired resolution
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, target_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, target_height)

- Mengecek kondisi dan posisi tampilan kamera

In [21]:
cap = cv2.VideoCapture(0)

# Set the resolution 
# set_resolution_with_aspect_ratio(cap, 720, 4/3)

# Add a delay to allow the camera to initialize (optional)
cv2.waitKey(1000)  # 1-second delay

# Get the resolution and fpsto confirm the change
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Camera resolution: {width} x {height}")
print(f"Frames per second: {fps}")

while True:
    ret, frame = cap.read()

    cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Camera resolution: 640 x 480
Frames per second: 30.0


- Mendefinisikan variabel yang digunakan dalam proses deteksi

In [22]:
data_EAR = []
data_EAR_awake = []
data_EAR_drowsy = []
data_status = []
data_duration = []
max_drowsy_frame = 45

- Menjalankan proses deteksi secara _real time_ melalui kamera

In [26]:
drowsy_frame = 0
duration = 0

cap = cv2.VideoCapture(0)

# Set the resolution 
#set_resolution_with_aspect_ratio(cap, 720, 4/3)

# Get the current FPS of the camera
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Print the FPS
print("Camera FPS:", fps)

# Initiate face model
with mp_facemesh.FaceMesh(
    static_image_mode=False,        # Default=False
    max_num_faces=1,                # Default=1
    refine_landmarks=False,         # Default=False
    min_detection_confidence=0.5,   # Default=0.5
    min_tracking_confidence= 0.5,   # Default=0.5
) as face_mesh:
    
    while True:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        imgW = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        imgH = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        
        # Make detections
        results = face_mesh.process(image).multi_face_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Draw Face Landmarks
        if results:
            for face_id, face_landmarks in enumerate(results):
                landmarks = face_landmarks.landmark
                EAR, _ = calculate_avg_ear(
                          landmarks, 
                          chosen_left_eye_idxs, 
                          chosen_right_eye_idxs, 
                          imgW, 
                          imgH
                      )
                
                EAR_data = {
                    "EAR": EAR
                }
                
                df = pd.DataFrame(EAR_data, index=[0])
                
                df = np.array(df['EAR'])
                
                EAR_reshape = df.reshape(-1, 1)
                
                # Feature scaling
                EAR_scaled = loaded_scaler.transform(EAR_reshape)
                
                # Predict using the loaded SVC model
                driver_status = loaded_svc_model.predict(EAR_scaled)
                
                driver_status = driver_status[0]
                
                duration += 1
                
                if driver_status == 'drowsy':
                    drowsy_frame += 1
                    data_EAR_drowsy.append(EAR)
                    
                if driver_status == 'awake':
                    data_EAR_awake.append(EAR)
                    if drowsy_frame > 0 & drowsy_frame <= max_drowsy_frame:
                        drowsy_frame = drowsy_frame - 1
                    if drowsy_frame > max_drowsy_frame:
                        drowsy_frame = max_drowsy_frame
                    if drowsy_frame <= 15:
                        alarm_sound.stop()
    
                if drowsy_frame >= 30:
                    cv2.putText(image,
                               'ALERT, ALERT, ALERT',
                               (150,150),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (255, 255, 255), 2
                               )
                
                    alarm_sound.play()
                
                # Append EAR Value and Driver Status
                data_EAR.append(EAR)
                data_status.append(driver_status)
                data_duration.append(duration)
                
                # Print the EAR Value and Driver Status
                print(f"EAR: {round(EAR, 2)}, Driver Status: {driver_status}, Drowsy Frame: {drowsy_frame}")
                
                # Put the EAR value in Video Frame
                cv2.putText(image, 
                            f"EAR: {round(EAR, 2)}", (1, 24),
                            cv2.FONT_HERSHEY_COMPLEX, 
                            0.9, (255, 255, 255), 2
                )
                
                # Put the Awake / Drowsy Status in Video Frame
                cv2.putText(image,
                           f"Driver status: {driver_status}",
                           (1,48),
                           cv2.FONT_HERSHEY_COMPLEX,
                           0.9, (255, 255, 255), 2)
                
                # Put the Number of Drowsy Frame in Video Frame
                cv2.putText(image,
                           f"Drowsy Frame: {drowsy_frame}",
                           (1,72),
                           cv2.FONT_HERSHEY_COMPLEX,
                           0.9, (255, 255, 255), 2)
                
        cv2.imshow("Webcam Feed", image)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Camera FPS: 30
EAR: 0.28, Driver Status: awake, Drowsy Frame: 0
EAR: 0.27, Driver Status: awake, Drowsy Frame: 0
EAR: 0.44, Driver Status: awake, Drowsy Frame: 0
EAR: 0.43, Driver Status: awake, Drowsy Frame: 0
EAR: 0.33, Driver Status: awake, Drowsy Frame: 0
EAR: 0.31, Driver Status: awake, Drowsy Frame: 0
EAR: 0.29, Driver Status: awake, Drowsy Frame: 0
EAR: 0.35, Driver Status: awake, Drowsy Frame: 0
EAR: 0.28, Driver Status: awake, Drowsy Frame: 0
EAR: 0.31, Driver Status: awake, Drowsy Frame: 0
EAR: 0.29, Driver Status: awake, Drowsy Frame: 0
EAR: 0.3, Driver Status: awake, Drowsy Frame: 0
EAR: 0.3, Driver Status: awake, Drowsy Frame: 0
EAR: 0.31, Driver Status: awake, Drowsy Frame: 0
EAR: 0.24, Driver Status: awake, Drowsy Frame: 0
EAR: 0.3, Driver Status: awake, Drowsy Frame: 0
EAR: 0.26, Driver Status: awake, Drowsy Frame: 0
EAR: 0.38, Driver Status: awake, Drowsy Frame: 0
EAR: 0.34, Driver Status: awake, Drowsy Frame: 0
EAR: 0.33, Driver Status: awake, Drowsy Frame: 0
EAR: 0.2