## ドライバーの眠気検知

### 前処理

In [8]:
import cv2
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


## データセットの確認

In [9]:
import random
# スクリプトがあるディレクトリのパスを取得
base_dir = os.getcwd()

# 相対パスを設定
drowsy_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Drowsy')
non_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Non Drowsy')
#drowsy_path = os.path.join(base_dir, 'dataset', 'Drowsy')
#non_path = os.path.join(base_dir, 'dataset', 'Non Drowsy')

image_extension = '.png'

total_drowsy = len([f for f in os.listdir(drowsy_path) if f.lower().endswith(image_extension)])
total_non = len([f for f in os.listdir(non_path) if f.lower().endswith(image_extension)])

print(f"Number of drowsy images in the folder: {total_drowsy}")
print(f"Number of non drowsy images in the folder: {total_non}")

Number of drowsy images in the folder: 22348
Number of non drowsy images in the folder: 19445


## 前処理

In [10]:
import cv2
import dlib
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
face_detector = dlib.get_frontal_face_detector()
eye_detector = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [11]:
# EAR計算関数
from scipy.spatial import distance
def get_eye_aspect_ratio(eye):
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    return (A + B) / (2.0 * C)

In [12]:
def get_landmarks(shape):
    return [(shape.part(i).x, shape.part(i).y) for i in range(shape.num_parts)]
eye_closed_time = None  # 目を閉じた時刻を追跡

In [13]:
import cv2
import dlib
import numpy as np
import pandas as pd
import os
import random
from sklearn.preprocessing import StandardScaler

# dlibの顔検出器とランドマーク予測器
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# 左右のEARスケーラー
scaler_left = StandardScaler()
scaler_right = StandardScaler()

# 68ランドマークにおける目のインデックス
LEFT_EYE_POINTS = list(range(36, 42))
RIGHT_EYE_POINTS = list(range(42, 48))

def get_eye_aspect_ratio(eye):
    # EAR計算
    A = np.linalg.norm(eye[1] - eye[5])
    B = np.linalg.norm(eye[2] - eye[4])
    C = np.linalg.norm(eye[0] - eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

def crop_eye_region(image, eye_points, padding=5):
    x_min = np.min(eye_points[:, 0]) - padding
    x_max = np.max(eye_points[:, 0]) + padding
    y_min = np.min(eye_points[:, 1]) - padding
    y_max = np.max(eye_points[:, 1]) + padding

    # 範囲が画像をはみ出さないようにクリップ
    x_min = max(x_min, 0)
    y_min = max(y_min, 0)
    x_max = min(x_max, image.shape[1])
    y_max = min(y_max, image.shape[0])

    eye_img = image[y_min:y_max, x_min:x_max]
    return cv2.resize(eye_img, (64, 64))  # 学習用にリサイズ

def extract_eye_data_from_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None, None, None, None, None
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = detector(gray)
    if len(faces) == 0:
        return None, None, None, None, None

    for face in faces:
        landmarks = predictor(gray, face)

        left_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in LEFT_EYE_POINTS])
        right_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in RIGHT_EYE_POINTS])

        left_ear = get_eye_aspect_ratio(left_eye_coords)
        right_ear = get_eye_aspect_ratio(right_eye_coords)

        left_eye_img = crop_eye_region(img, left_eye_coords)
        right_eye_img = crop_eye_region(img, right_eye_coords)

        return left_eye_img, right_eye_img, left_ear, right_ear, img

    return None, None, None, None, None

# ===== データセット作成 =====
data = []
random.seed(42)

for folder, label in [(drowsy_path, "Drowsy"), (non_path, "Non Drowsy")]:
    files = [f for f in os.listdir(folder) if f.lower().endswith('.png')]
    random.shuffle(files)
    for file in files[:15]:
        img_path = os.path.join(folder, file)
        left_eye_img, right_eye_img, left_ear, right_ear, _ = extract_eye_data_from_image(img_path)
        if left_eye_img is not None:
            data.append({
                "left_eye_img": left_eye_img,
                "right_eye_img": right_eye_img,
                "Left EAR": left_ear,
                "Right EAR": right_ear,
                "label": label
            })

# DataFrame化
df = pd.DataFrame(data)
print(f"データ数: {len(df)}")

データ数: 27


In [14]:
# ===== データの準備（左右別々の目画像バージョン） =====
IMG_SIZE = (64, 64)

left_images = []
right_images = []
left_ears = []
right_ears = []
labels = []

for idx, row in df.iterrows():
    # 左目画像（BGR → RGB → サイズ統一）
    left_eye_img = cv2.cvtColor(row["left_eye_img"], cv2.COLOR_BGR2RGB)
    left_eye_img = cv2.resize(left_eye_img, IMG_SIZE) / 255.0  # 正規化

    # 右目画像
    right_eye_img = cv2.cvtColor(row["right_eye_img"], cv2.COLOR_BGR2RGB)
    right_eye_img = cv2.resize(right_eye_img, IMG_SIZE) / 255.0

    left_images.append(left_eye_img)
    right_images.append(right_eye_img)
    left_ears.append(row["Left EAR"])
    right_ears.append(row["Right EAR"])
    labels.append(1 if row["label"] == "Drowsy" else 0)

# NumPy配列に変換
left_images = np.array(left_images, dtype=np.float32)
right_images = np.array(right_images, dtype=np.float32)
lears = np.array(left_ears, dtype=np.float32).reshape(-1, 1)
rears = np.array(right_ears, dtype=np.float32).reshape(-1, 1)
labels = np.array(labels, dtype=np.int32)



In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 訓練・テスト分割（左右目画像とEAR値を個別に）
X_left_train, X_left_test, \
X_right_train, X_right_test, \
X_lear_train, X_lear_test, \
X_rear_train, X_rear_test, \
y_train, y_test = train_test_split(
    left_images, right_images, lears, rears, labels, 
    test_size=0.2, random_state=42
)

# EARのスケーリング
scaler_left = StandardScaler()
scaler_right = StandardScaler()

train_ear_left_scaled = scaler_left.fit_transform(X_lear_train.reshape(-1, 1))
train_ear_right_scaled = scaler_right.fit_transform(X_rear_train.reshape(-1, 1))

val_ear_left_scaled = scaler_left.transform(X_lear_test.reshape(-1, 1))
val_ear_right_scaled = scaler_right.transform(X_rear_test.reshape(-1, 1))

# --- EAR差強調 ---
alpha = 3.0  # 差を強調する係数（調整可能）
train_ear_left_scaled *= alpha
train_ear_right_scaled *= alpha
val_ear_left_scaled *= alpha
val_ear_right_scaled *= alpha



In [48]:
import tensorflow as tf

class LossGapEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, threshold=0.0001):
        super().__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        train_loss = logs.get('loss')
        val_loss = logs.get('val_loss')
        if train_loss is None or val_loss is None:
            return
        
        gap = abs(val_loss - train_loss)
        print(f"Epoch {epoch+1}: loss gap = {gap:.6f}")
        
        if gap < self.threshold:
            print(f"Stopping early: loss gap {gap:.6f} < threshold {self.threshold}")
            self.model.stop_training = True

In [49]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Lambda, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ===== CNNブロックを関数化（左右で共有） =====
def build_cnn_branch():
    img_input = Input(shape=(64, 64, 3))
    x = Conv2D(32, (3,3), activation='relu')(img_input)
    x = MaxPooling2D()(x)
    x = Conv2D(64, (3,3), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Flatten()(x)
    x = Lambda(lambda z: z * 0.3)(x)  # CNN出力をスケーリング
    return img_input, x

# ===== 左右画像入力 =====
left_img_input, left_features = build_cnn_branch()
right_img_input, right_features = build_cnn_branch()

# ===== EAR入力（左） =====
ear_left_input = Input(shape=(1,), name="ear_left_input")
e_left = Dense(16, activation='relu')(ear_left_input)
e_left = Dense(32, activation='relu')(e_left)
e_left = Lambda(lambda z: z * 7.0)(e_left)  # 左EARスケーリング

# ===== EAR入力（右） =====
ear_right_input = Input(shape=(1,), name="ear_right_input")
e_right = Dense(16, activation='relu')(ear_right_input)
e_right = Dense(32, activation='relu')(e_right)
e_right = Lambda(lambda z: z * 7.0)(e_right)  # 右EARスケーリング

# ===== 特徴量結合 =====
merged = Concatenate()([left_features, right_features, e_left, e_right])
merged = Dense(128, activation='relu')(merged)
output = Dense(1, activation='sigmoid')(merged)

# ===== モデル定義 =====
model = models.Model(
    inputs=[left_img_input, right_img_input, ear_left_input, ear_right_input],
    outputs=output
)

# ===== コールバック =====
checkpoint = ModelCheckpoint(
    'best_model_human_15.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [50]:
# ===== 学習 =====
history = model.fit(
    [X_left_train, X_right_train, train_ear_left_scaled, train_ear_right_scaled],
    y_train,
    validation_data=(
        [X_left_test, X_right_test, val_ear_left_scaled, val_ear_right_scaled],
        y_test
    ),
    epochs=8,
    batch_size=32,
    callbacks=[checkpoint],
    class_weight={0:1, 1:5}
)

# ===== 評価 =====
loss, acc = model.evaluate(
    [X_left_test, X_right_test, val_ear_left_scaled, val_ear_right_scaled],
    y_test
)
print(f"Validation Accuracy: {acc:.4f}")


Epoch 1/8




[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.7547 - loss: 0.7120
Epoch 1: val_loss improved from inf to 0.06442, saving model to best_model_human_15.keras
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 125ms/step - accuracy: 0.7549 - loss: 0.7115 - val_accuracy: 0.9820 - val_loss: 0.0644
Epoch 2/8
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - accuracy: 0.9718 - loss: 0.1149
Epoch 2: val_loss improved from 0.06442 to 0.03817, saving model to best_model_human_15.keras
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 128ms/step - accuracy: 0.9718 - loss: 0.1149 - val_accuracy: 0.9872 - val_loss: 0.0382
Epoch 3/8
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.9858 - loss: 0.0588
Epoch 3: val_loss did not improve from 0.03817
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 124ms/step - accuracy: 0.9858 - loss: 0.0

In [18]:
import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist
from collections import deque
from tensorflow.keras.models import load_model
import pygame
from playsound3 import playsound

# ===== EAR計算関数 =====
def eye_aspect_ratio(eye):
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

# ===== 移動平均クラス =====
class MovingAverage:
    def __init__(self, size=5):
        self.values = deque(maxlen=size)
    def update(self, val):
        self.values.append(val)
        return np.mean(self.values) if self.values else val

# ===== モデル読み込み =====
model = load_model("best_model_human.keras",safe_mode=False)

# ===== Dlib準備 =====
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# 目のランドマークインデックス
LEFT_EYE_IDX = list(range(36, 42))
RIGHT_EYE_IDX = list(range(42, 48))

# ===== 移動平均用インスタンス =====
ma_left = MovingAverage(size=5)
ma_right = MovingAverage(size=5)
ma_pred = MovingAverage(size=3)   # モデル予測値

pygame.mixer.init()
pygame.mixer.music.load("kisho.mp3")

drowsy_counter = 0
drowsy_threshold = 10
alarm_played = False
cap = cv2.VideoCapture(0)


while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray, 0)
    
    for face in faces:
        shape = predictor(gray, face)
        shape_np = np.zeros((68, 2), dtype="int")
        for i in range(68):
            shape_np[i] = (shape.part(i).x, shape.part(i).y)
        
        # 左目
        left_eye_points = shape_np[LEFT_EYE_IDX]
        right_eye_points = shape_np[RIGHT_EYE_IDX]
        
        # EAR計算
        ear_left = ma_left.update(eye_aspect_ratio(left_eye_points))
        ear_right = ma_right.update(eye_aspect_ratio(right_eye_points))

        ear_left = scaler_left.transform([[ear_left]])[0][0] * alpha
        ear_right = scaler_right.transform([[ear_right]])[0][0] * alpha

        
        # 左目画像切り出し
        lx, ly, lw, lh = cv2.boundingRect(left_eye_points)
        left_eye_img = frame[ly:ly+lh, lx:lx+lw]
        left_eye_img = cv2.resize(left_eye_img, (64, 64))
        
        # 右目画像切り出し
        rx, ry, rw, rh = cv2.boundingRect(right_eye_points)
        right_eye_img = frame[ry:ry+rh, rx:rx+rw]
        right_eye_img = cv2.resize(right_eye_img, (64, 64))
        
        # モデル入力形式に変換
        left_eye_img = left_eye_img.astype("float32") / 255.0
        right_eye_img = right_eye_img.astype("float32") / 255.0
        left_eye_img = np.expand_dims(left_eye_img, axis=0)
        right_eye_img = np.expand_dims(right_eye_img, axis=0)
        ear_left_input = np.array([[ear_left]], dtype=np.float32)
        ear_right_input = np.array([[ear_right]], dtype=np.float32)
        
        # 予測
        pred = model.predict([left_eye_img, right_eye_img, ear_left_input, ear_right_input], verbose=0)[0][0]
        pred = ma_pred.update(pred)
        # 判定
        label = "Drowsy" if pred > 0.2 else "Non Drowsy"
        color = (0, 0, 255) if label == "Drowsy" else (0, 255, 0)
        
        # 警告表示
        cv2.putText(frame, f"{label} ({ear_left:.2f})", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
        if label == "Drowsy":
            cv2.putText(frame, "!!! WARNING !!!", (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 3)
            drowsy_counter += 1
        else:
            drowsy_counter = 0
            # Non Drowsy になったら音楽停止
            if alarm_played:
                pygame.mixer.music.stop()
                alarm_played = False
        # 一定時間 Drowsy が続いたら音楽
        if drowsy_counter >= drowsy_threshold and not alarm_played:
            pygame.mixer.music.play(1) 
            alarm_played = True
    
    cv2.imshow("Drowsiness Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



In [52]:
#model.save("my_best_model.keras")

Width: 1280.0, Height: 720.0, FPS: 30.0
