## ドライバーの眠気検知

### 前処理

In [2]:
import cv2
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

## データセットの確認

In [3]:
import random
# スクリプトがあるディレクトリのパスを取得
base_dir = os.getcwd()

# 相対パスを設定
drowsy_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Drowsy')
non_path = os.path.join(base_dir, 'Driver Drowsiness Dataset (DDD)', 'Non Drowsy')

image_extension = '.png'

total_drowsy = len([f for f in os.listdir(drowsy_path) if f.lower().endswith(image_extension)])
total_non = len([f for f in os.listdir(non_path) if f.lower().endswith(image_extension)])

print(f"Number of drowsy images in the folder: {total_drowsy}")
print(f"Number of non drowsy images in the folder: {total_non}")

Number of drowsy images in the folder: 22348
Number of non drowsy images in the folder: 19445


In [4]:
import cv2
import dlib
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
face_detector = dlib.get_frontal_face_detector()
eye_detector = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

## 前処理

In [5]:
# EAR計算関数
from scipy.spatial import distance
def get_eye_aspect_ratio(eye):
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    return (A + B) / (2.0 * C)

def get_landmarks(shape):
    return [(shape.part(i).x, shape.part(i).y) for i in range(shape.num_parts)]
eye_closed_time = None  # 目を閉じた時刻を追跡

In [6]:
import cv2
import dlib
import numpy as np
import pandas as pd
import os
import random
from sklearn.preprocessing import StandardScaler

# dlibの顔検出器とランドマーク予測器
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# 左右のEARスケーラー
scaler_left = StandardScaler()
scaler_right = StandardScaler()

# 68ランドマークにおける目のインデックス
LEFT_EYE_POINTS = list(range(36, 42))
RIGHT_EYE_POINTS = list(range(42, 48))

def get_eye_aspect_ratio(eye):
    # EAR計算
    A = np.linalg.norm(eye[1] - eye[5])
    B = np.linalg.norm(eye[2] - eye[4])
    C = np.linalg.norm(eye[0] - eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

def crop_eye_region(image, eye_points, padding=5):
    x_min = np.min(eye_points[:, 0]) - padding
    x_max = np.max(eye_points[:, 0]) + padding
    y_min = np.min(eye_points[:, 1]) - padding
    y_max = np.max(eye_points[:, 1]) + padding

    # 範囲が画像をはみ出さないようにクリップ
    x_min = max(x_min, 0)
    y_min = max(y_min, 0)
    x_max = min(x_max, image.shape[1])
    y_max = min(y_max, image.shape[0])

    eye_img = image[y_min:y_max, x_min:x_max]
    return cv2.resize(eye_img, (64, 64))  # 学習用にリサイズ

def extract_eye_data_from_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None, None, None, None
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = detector(gray)
    if len(faces) == 0:
        return None, None, None, None

    for face in faces:
        landmarks = predictor(gray, face)

        left_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in LEFT_EYE_POINTS])
        right_eye_coords = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in RIGHT_EYE_POINTS])

        left_ear = get_eye_aspect_ratio(left_eye_coords)
        right_ear = get_eye_aspect_ratio(right_eye_coords)

        left_eye_img = crop_eye_region(img, left_eye_coords)
        right_eye_img = crop_eye_region(img, right_eye_coords)

        # 横に結合
        both_eyes_img = np.hstack((left_eye_img, right_eye_img))  # shape (64, 128, 3)
        #print(both_eyes_img.shape)


        return both_eyes_img, left_ear, right_ear, img

    return None, None, None, None

# ===== データセット作成 =====
data = []
random.seed(42)

for folder, label in [(drowsy_path, "Drowsy"), (non_path, "Non Drowsy")]:
    files = [f for f in os.listdir(folder) if f.lower().endswith('.png')]
    random.shuffle(files)
    for file in files[:10000]:
        img_path = os.path.join(folder, file)
        both_img, left_ear, right_ear, _ = extract_eye_data_from_image(img_path)
        if both_img is not None:
            total_ear = (left_ear+right_ear)/2
            data.append({
                "both_img": both_img,
                "total EAR": total_ear,
                "label": label
            })

# DataFrame化
df = pd.DataFrame(data)
print(f"データ数: {len(df)}")

データ数: 16086


### データセットの準備

In [7]:
IMG_SIZE = (64, 128)

both_img = []
total_ears = []
labels = []

for idx, row in df.iterrows():
    both_eye_img = cv2.cvtColor(row["both_img"], cv2.COLOR_BGR2RGB)
    both_eye_img = both_eye_img / 255.0

    both_img.append(both_eye_img)
    total_ears.append(row["total EAR"])
    labels.append(1 if row["label"] == "Drowsy" else 0)

both_img = np.array(both_img,dtype=np.float32)
total_ears = np.array(total_ears,dtype=np.float32).reshape(-1, 1)
labels = np.array(labels, dtype=np.int32)



In [8]:
X_img_train, X_img_test, X_ear_train, X_ear_test, y_train, y_test = train_test_split(
    both_img, total_ears, labels, test_size=0.2, random_state=42)

# EARのスケーリング
scaler_ear = StandardScaler()

train_ear_scaled = scaler_ear.fit_transform(X_ear_train.reshape(-1, 1))


val_ear_scaled = scaler_ear.transform(X_ear_test.reshape(-1, 1))

# --- EAR差強調 ---
alpha = 3.0  # 差を強調する係数（調整可能）
train_ear_scaled *= alpha
val_ear_scaled *= alpha

In [9]:
import tensorflow as tf

class LossGapEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, threshold=0.0001):
        super().__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        train_loss = logs.get('loss')
        val_loss = logs.get('val_loss')
        if train_loss is None or val_loss is None:
            return
        
        gap = abs(val_loss - train_loss)
        print(f"Epoch {epoch+1}: loss gap = {gap:.6f}")
        
        if gap < self.threshold:
            print(f"Stopping early: loss gap {gap:.6f} < threshold {self.threshold}")
            self.model.stop_training = True

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Lambda, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ===== CNNブランチ（結合画像用） =====
def build_cnn_branch():
    img_input = Input(shape=(64, 128, 3))  # 横に結合された画像
    x = Conv2D(32, (3,3), activation='relu')(img_input)
    x = MaxPooling2D()(x)
    x = Conv2D(64, (3,3), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Flatten()(x)
    x = Lambda(lambda z: z * 0.3)(x)  # CNN出力スケーリング
    return img_input, x

both_img_input, img_features = build_cnn_branch()

# ===== total EAR入力 =====
ear_input = Input(shape=(1,), name="total_ear_input")
ear_features = Dense(16, activation='relu')(ear_input)
ear_features = Dense(32, activation='relu')(ear_features)
ear_features = Lambda(lambda z: z * 10.0)(ear_features)  # スケーリング

# ===== 特徴量結合 =====
merged = Concatenate()([img_features, ear_features])
merged = Dense(128, activation='relu')(merged)
output = Dense(1, activation='sigmoid')(merged)

# ===== モデル定義 =====
model = models.Model(
    inputs=[both_img_input, ear_input],
    outputs=output
)

# ===== コールバック =====
checkpoint = ModelCheckpoint(
    'best_model_combined_totalEAR.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


### モデルの構築

In [11]:
# ===== 学習 =====
history = model.fit(
    [X_img_train, train_ear_scaled],  # 入力は [画像, EAR]
    y_train,
    validation_data=(
        [X_img_test, val_ear_scaled],
        y_test
    ),
    epochs=10,
    batch_size=32,
    callbacks=[checkpoint],
    class_weight={0: 1, 1: 5}  # Drowsyクラスを重み付け
)

# ===== 評価 =====
loss, acc = model.evaluate(
    [X_img_test, val_ear_scaled],
    y_test
)
print(f"Validation Accuracy: {acc:.4f}")

Epoch 1/10




[1m402/403[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 146ms/step - accuracy: 0.6973 - loss: 0.8546
Epoch 1: val_loss improved from inf to 0.07304, saving model to best_model_combined_totalEAR.keras
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 157ms/step - accuracy: 0.6980 - loss: 0.8529 - val_accuracy: 0.9748 - val_loss: 0.0730
Epoch 2/10
[1m402/403[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 155ms/step - accuracy: 0.9675 - loss: 0.1452
Epoch 2: val_loss improved from 0.07304 to 0.02835, saving model to best_model_combined_totalEAR.keras
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 165ms/step - accuracy: 0.9675 - loss: 0.1450 - val_accuracy: 0.9913 - val_loss: 0.0284
Epoch 3/10
[1m402/403[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 149ms/step - accuracy: 0.9893 - loss: 0.0560
Epoch 3: val_loss did not improve from 0.02835
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 159ms/step - accuracy

In [None]:
import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist
from collections import deque
from tensorflow.keras.models import load_model

# ===== EAR計算関数 =====
def eye_aspect_ratio(eye):
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

# ===== 移動平均クラス =====
class MovingAverage:
    def __init__(self, size=5):
        self.values = deque(maxlen=size)
    def update(self, val):
        self.values.append(val)
        return np.mean(self.values) if self.values else val


# ===== Dlib準備 =====
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

#model = load_model("best_model_combined_totalEAR.keras",safe_mode=False)

# 目のランドマークインデックス
LEFT_EYE_IDX = list(range(36, 42))
RIGHT_EYE_IDX = list(range(42, 48))

# ===== 移動平均用インスタンス =====
ma_left = MovingAverage(size=3)
ma_right = MovingAverage(size=3)

# ===== カメラ開始 =====
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray, 0)
    
    for face in faces:
        shape = predictor(gray, face)
        shape_np = np.zeros((68, 2), dtype="int")
        for i in range(68):
            shape_np[i] = (shape.part(i).x, shape.part(i).y)
        
        # 左右の目座標
        left_eye_points = shape_np[LEFT_EYE_IDX]
        right_eye_points = shape_np[RIGHT_EYE_IDX]
        
        # EAR計算（移動平均付き）
        ear_left = ma_left.update(eye_aspect_ratio(left_eye_points))
        ear_right = ma_right.update(eye_aspect_ratio(right_eye_points))
        total_ear = (ear_left + ear_right) / 2
        
        # total_ear をスケーリング（学習時の平均と標準偏差を適用）
        total_ear_scaled = scaler_ear.transform([[total_ear]])[0][0]*alpha
        ear_input = np.array([[total_ear_scaled]],dtype=np.float32)

        # 左目切り出し
        lx, ly, lw, lh = cv2.boundingRect(left_eye_points)
        left_eye_img = frame[ly:ly+lh, lx:lx+lw]
        left_eye_img = cv2.resize(left_eye_img, (64, 64))
        
        # 右目切り出し
        rx, ry, rw, rh = cv2.boundingRect(right_eye_points)
        right_eye_img = frame[ry:ry+rh, rx:rx+rw]
        right_eye_img = cv2.resize(right_eye_img, (64, 64))

        # 正規化 & 次元追加
        left_eye_img = left_eye_img.astype("float32") / 255.0
        right_eye_img = right_eye_img.astype("float32") / 255.0
        
        # 横に結合（128x64x3 の画像）
        combined_images = np.hstack((left_eye_img, right_eye_img))
        combined_images = np.expand_dims(combined_images, axis=0)

        # モデル予測
        pred = model.predict([combined_images, ear_input], verbose=0)[0][0]
        
        # 判定
        label = "Drowsy" if pred > 0.2 else "Non Drowsy"
        color = (0, 0, 255) if label == "Drowsy" else (0, 255, 0)
        
        # 表示
        cv2.putText(frame, f"{label} ({pred:.2f})", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
        if label == "Drowsy":
            cv2.putText(frame, "!!! WARNING !!!", (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 3)
    
    cv2.imshow("Drowsiness Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




: 