## ドライバーの眠気検知

### 前処理

In [13]:
import cv2
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


## データセットの確認

In [2]:
import random
# スクリプトがあるディレクトリのパスを取得
base_dir = os.getcwd()

# 相対パスを設定
drowsy_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Drowsy')
non_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Non Drowsy')
#drowsy_path = os.path.join(base_dir, 'dataset', 'Drowsy')
#non_path = os.path.join(base_dir, 'dataset', 'Non Drowsy')

image_extension = '.png'

total_drowsy = len([f for f in os.listdir(drowsy_path) if f.lower().endswith(image_extension)])
total_non = len([f for f in os.listdir(non_path) if f.lower().endswith(image_extension)])

print(f"Number of drowsy images in the folder: {total_drowsy}")
print(f"Number of non drowsy images in the folder: {total_non}")

Number of drowsy images in the folder: 22348
Number of non drowsy images in the folder: 19445


## 前処理

In [3]:
import cv2
import dlib
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
face_detector = dlib.get_frontal_face_detector()
eye_detector = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [4]:
# EAR計算関数
from scipy.spatial import distance
def get_eye_aspect_ratio(eye):
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    return (A + B) / (2.0 * C)

In [5]:
def get_landmarks(shape):
    return [(shape.part(i).x, shape.part(i).y) for i in range(shape.num_parts)]
eye_closed_time = None  # 目を閉じた時刻を追跡

In [6]:
import cv2
import dlib
import numpy as np
import pandas as pd
import os
import random

# dlibの顔検出器とランドマーク予測器
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

scaler_left = StandardScaler()
scaler_right = StandardScaler()

# 68ランドマークにおける目のインデックス
LEFT_EYE_POINTS = list(range(36, 42))
RIGHT_EYE_POINTS = list(range(42, 48))


def extract_ear_from_image(img_path, show=False):
    img = cv2.imread(img_path)
    if img is None:
        return None, None,None
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = detector(gray)
    if len(faces) == 0:
        return None, None,None

    for face in faces:
        landmarks = predictor(gray, face)

        left_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in LEFT_EYE_POINTS])
        right_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in RIGHT_EYE_POINTS])

        left_ear = get_eye_aspect_ratio(left_eye_coords)
        right_ear = get_eye_aspect_ratio(right_eye_coords)
        #total_ear = (left_ear + right_ear) / 2


        return img, left_ear, right_ear

    return None, None

# データ格納用リスト
data = []
random.seed(21)
for folder, label in [(drowsy_path, "Drowsy"), (non_path, "Non Drowsy")]:
    files = [f for f in os.listdir(folder) if f.lower().endswith('.png')]
    random.shuffle(files)
    for file in files[:2000]:  # テスト用
        img_path = os.path.join(folder, file)
        img, left_ear,right_ear = extract_ear_from_image(img_path, show=True)
        if img is not None and left_ear is not None and right_ear is not None:
            data.append({"image": img, "Left EAR": left_ear,"Right EAR":right_ear, "label": label})

# DataFrame化
df = pd.DataFrame(data)


In [7]:
# ===== データの準備 =====
# df["image"] はOpenCVのBGR画像なので、RGBに変換しサイズ統一
IMG_SIZE = (64, 64)

images = []
letf_ears = []
right_ears = []
labels = []

for idx, row in df.iterrows():
    img = cv2.cvtColor(row["image"], cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, IMG_SIZE) / 255.0  # 正規化
    images.append(img)
    letf_ears.append(row["Left EAR"])
    right_ears.append(row["Right EAR"])
    labels.append(1 if row["label"] == "Drowsy" else 0)

images = np.array(images, dtype=np.float32)
lears = np.array(letf_ears, dtype=np.float32).reshape(-1, 1)
rears = np.array(right_ears, dtype=np.float32).reshape(-1, 1)
labels = np.array(labels, dtype=np.int32)

In [8]:
# 訓練・テスト分割
X_img_train, X_img_test, X_lear_train, X_lear_test,X_rear_train, X_rear_test, y_train, y_test = train_test_split(
    images, lears, rears,labels, test_size=0.2, random_state=42
)
scaler_left = StandardScaler()
scaler_right = StandardScaler()
train_ear_left_scaled = scaler_left.fit_transform(X_lear_train.reshape(-1, 1))
train_ear_right_scaled = scaler_right.fit_transform(X_rear_train.reshape(-1, 1))

val_ear_left_scaled = scaler_left.transform(X_lear_test.reshape(-1, 1))
val_ear_right_scaled = scaler_right.transform(X_lear_test.reshape(-1, 1))

# --- EAR差強調 ---
alpha = 3.0  # 差を強調する係数（調整可能）
train_ear_left_scaled *= alpha
train_ear_right_scaled *= alpha
val_ear_left_scaled *= alpha
val_ear_right_scaled *= alpha



In [9]:
import tensorflow as tf

class LossGapEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, threshold=0.0001):
        super().__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        train_loss = logs.get('loss')
        val_loss = logs.get('val_loss')
        if train_loss is None or val_loss is None:
            return
        
        gap = abs(val_loss - train_loss)
        print(f"Epoch {epoch+1}: loss gap = {gap:.6f}")
        
        if gap < self.threshold:
            print(f"Stopping early: loss gap {gap:.6f} < threshold {self.threshold}")
            self.model.stop_training = True

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras.layers import Lambda,Concatenate
# ===== CNNモデルの構築 =====
# 画像入力
img_input = Input(shape=(64, 64, 3))
x = Conv2D(32, (3,3), activation='relu')(img_input)
x = MaxPooling2D()(x)
x = Conv2D(64, (3,3), activation='relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x = Lambda(lambda z: z * 0.3)(x)  # CNN出力を0.5倍

# 左EAR部分
ear_left_input = Input(shape=(1,), name="ear_left_input")
e_left = Dense(16, activation='relu')(ear_left_input)
e_left = Dense(32, activation='relu')(e_left)
e_left = Lambda(lambda z: z * 7.0)(e_left)  # 左EARを2倍

# 右EAR部分
ear_right_input = Input(shape=(1,), name="ear_right_input")
e_right = Dense(16, activation='relu')(ear_right_input)
e_right = Dense(32, activation='relu')(e_right)
e_right = Lambda(lambda z: z * 7.0)(e_right)  # 右EARを2倍

merged = Concatenate()([x, e_left, e_right])
merged = Dense(128, activation='relu')(merged)
output = Dense(1, activation='sigmoid')(merged)


model = models.Model(inputs=[img_input, ear_left_input, ear_right_input], outputs=output)
loss_gap_stop = LossGapEarlyStopping(threshold=0.001)

checkpoint = ModelCheckpoint(
    'best_model_human.keras',    # 保存ファイル名
    monitor='val_loss',    # val_loss を監視
    save_best_only=True,   # 最小値の時だけ保存
    mode='min',            # val_loss は小さいほど良い
    verbose=1
)

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [11]:
history = model.fit(
    [X_img_train, train_ear_left_scaled, train_ear_right_scaled], y_train,
    validation_data=([X_img_test, val_ear_left_scaled, val_ear_right_scaled], y_test),
    epochs=6,
    batch_size=32,
    callbacks=[loss_gap_stop,checkpoint]
)

# ===== 評価 =====
loss, acc = model.evaluate([X_img_test, val_ear_left_scaled, val_ear_right_scaled], y_test)
print(f"Validation Accuracy: {acc:.4f}")

Epoch 1/6




[1m80/81[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 61ms/step - accuracy: 0.6785 - loss: 0.6104Epoch 1: loss gap = 0.201415

Epoch 1: val_loss improved from inf to 0.29925, saving model to best_model.keras
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 67ms/step - accuracy: 0.6805 - loss: 0.6078 - val_accuracy: 0.8663 - val_loss: 0.2992
Epoch 2/6
[1m80/81[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 64ms/step - accuracy: 0.9309 - loss: 0.2047Epoch 2: loss gap = 0.079665

Epoch 2: val_loss improved from 0.29925 to 0.07127, saving model to best_model.keras
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.9314 - loss: 0.2034 - val_accuracy: 0.9860 - val_loss: 0.0713
Epoch 3/6
[1m80/81[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 65ms/step - accuracy: 0.9869 - loss: 0.0525Epoch 3: loss gap = 0.002788

Epoch 3: val_loss improved from 0.07127 to 0.03935, saving model to best_model.keras
[1m81/81[0m [32

In [None]:
model = tf.keras.models.load_model('best_model_human.keras',safe_mode=False)
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)
        
        left_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in LEFT_EYE_POINTS])
        right_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in RIGHT_EYE_POINTS])
        
        left_ear = get_eye_aspect_ratio(left_eye_coords)
        right_ear = get_eye_aspect_ratio(right_eye_coords)

        
        left_ear = scaler_left.transform([[left_ear]])[0][0] * alpha
        right_ear = scaler_right.transform([[right_ear]])[0][0] * alpha



        left_ear *= alpha
        right_ear*= alpha

        # 顔矩形描画
        x, y, w, h = (face.left(), face.top(), face.width(), face.height())
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

        # モデル入力用の画像前処理
        resized_img = cv2.resize(frame, IMG_SIZE)
        img_array = resized_img.astype("float32") / 255.0
        img_array = np.expand_dims(img_array, axis=0)  # shape (1, 32, 32, 3)

        lear_array = np.array([[left_ear]], dtype="float32") 
        rear_array = np.array([[right_ear]], dtype="float32")  # shape (1, 1)

        # 推論
        pred = model.predict([img_array, lear_array,rear_array])
        print(pred[0])
        label = "Drowsy" if pred[0][0] > 0.5 else "Non Drowsy"

        # 結果表示
        cv2.putText(frame, f"{label} (Left EAR: {left_ear:.2f},Right EAR: {right_ear:.2f})", 
                    (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 
                    0.8, (0, 0, 255) if label=="Drowsy" else (0, 255, 0), 2)
    
    cv2.imshow("Drowsiness Detection", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step




[0.00604801]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[0.14118889]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[0.22967827]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[0.14490095]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[0.9567756]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[0.96562845]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[0.3819242]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[0.76501167]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[0.21568704]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[0.6542259]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[0.9954682]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[0.9849085]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

: 



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 411ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [None]:
import os,sys
import cv2
import dlib
from imutils import face_utils
from scipy.spatial import distance

cap = cv2.VideoCapture(0)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt2.xml')
face_parts_detector = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

def calc_ear(eye):
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    eye_ear = (A + B) / (2.0 * C)
    return round(eye_ear, 3)

def eye_marker(face_mat, position):
    for i, ((x, y)) in enumerate(position):
        cv2.circle(face_mat, (x, y), 1, (255, 255, 255), -1)
        cv2.putText(face_mat, str(i), (x + 2, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)

while True:
    tick = cv2.getTickCount()

    ret, rgb = cap.read()
    gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
    faces = face_cascade.detectMultiScale(
        gray, scaleFactor=1.11, minNeighbors=3, minSize=(100, 100))

    if len(faces) == 1:
        x, y, w, h = faces[0, :]
        cv2.rectangle(rgb, (x, y), (x + w, y + h), (255, 0, 0), 2)
        
        face_gray = gray[y :(y + h), x :(x + w)]
        scale = 480 / h
        face_gray_resized = cv2.resize(face_gray, dsize=None, fx=scale, fy=scale)

        face = dlib.rectangle(0, 0, face_gray_resized.shape[1], face_gray_resized.shape[0])
        face_parts = face_parts_detector(face_gray_resized, face)
        face_parts = face_utils.shape_to_np(face_parts)

        left_eye = face_parts[42:48]
        eye_marker(face_gray_resized, left_eye)

        left_eye_ear = calc_ear(left_eye)
        cv2.putText(rgb, "LEFT eye EAR:{} ".format(left_eye_ear), 
            (10, 100), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1, cv2.LINE_AA)

        right_eye = face_parts[36:42]
        eye_marker(face_gray_resized, right_eye)

        right_eye_ear = calc_ear(right_eye)
        cv2.putText(rgb, "RIGHT eye EAR:{} ".format(round(right_eye_ear, 3)), 
            (10, 120), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1, cv2.LINE_AA)

        if (left_eye_ear + right_eye_ear) < 0.55:
            cv2.putText(rgb,"Sleepy eyes. Wake up!",
                (10,180), cv2.FONT_HERSHEY_PLAIN, 3, (0,0,255), 3, 1)

        cv2.imshow('frame_resize', face_gray_resized)

    fps = cv2.getTickFrequency() / (cv2.getTickCount() - tick)
    cv2.putText(rgb, "FPS:{} ".format(int(fps)), 
        (10, 50), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 2, cv2.LINE_AA)

    cv2.imshow('frame', rgb)
    if cv2.waitKey(1) == 27:
        break  # esc to quit

cap.release()
cv2.destroyAllWindows()


: 