In [1]:
import kagglehub
import shutil
import os

downloaded_path = kagglehub.dataset_download("ikhlaselhamly/nthu-ddd")
print("Downloaded to:", downloaded_path)

LOCAL_DATASET_PATH = "/content/nthu_data"

if not os.path.exists(LOCAL_DATASET_PATH):
    shutil.copytree(downloaded_path, LOCAL_DATASET_PATH)
    print("Data moved to fast local storage!")

DATASET_PATH = LOCAL_DATASET_PATH

Downloading from https://www.kaggle.com/api/v1/datasets/download/ikhlaselhamly/nthu-ddd?dataset_version_number=1...


100%|██████████| 777M/777M [00:19<00:00, 41.7MB/s]

Extracting files...





Downloaded to: /root/.cache/kagglehub/datasets/ikhlaselhamly/nthu-ddd/versions/1
Data moved to fast local storage!


In [3]:
DATASET_PATH = "/content/nthu_data/NTHU-DDD"

In [4]:
#importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization,Input
from keras.optimizers import Adam

In [5]:
#importing the dataset for training and splitting for validation
train_ds= tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="training",
    seed=123,
    batch_size=32,
    image_size=(224,224), #the images will be squished as they are 640*480 initially
    shuffle=True
)

Found 18000 files belonging to 2 classes.
Using 14400 files for training.


In [6]:
class_names = train_ds.class_names
print(f"Classes found: {class_names}")

Classes found: ['drowsy', 'notdrowsy']


In [8]:
val_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

Found 18000 files belonging to 2 classes.
Using 3600 files for validation.


In [11]:
from keras.applications import MobileNetV2

In [12]:
data_augmentation = Sequential([
    # REMOVED vertical flip because human eyes don't go upside down!
    layers.RandomFlip("horizontal", input_shape=(224, 224, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
], name="data_augmentation_layer")

  super().__init__(**kwargs)


In [13]:
# 2. Loading the Pre-trained Base
base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [14]:
# 3. Building the Model Pipeline
model = Sequential([
    data_augmentation,

    layers.Lambda(tf.keras.applications.mobilenet_v2.preprocess_input),

    base_model,

    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.4), # High dropout to combat the label noise
    layers.Dense(1, activation='sigmoid')
])

In [15]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [17]:
model.summary()

In [18]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath='best_mobilenet_drowsiness.keras',
        save_best_only=True,
        monitor='val_accuracy',
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,          # Stop if the model doesn't improve for 3 epochs
        restore_best_weights=True,
        verbose=1
    )
]

In [20]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=callbacks
)

Epoch 1/30
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.5761 - loss: 0.6964
Epoch 1: val_accuracy improved from -inf to 0.64778, saving model to best_mobilenet_drowsiness.keras
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 84ms/step - accuracy: 0.5762 - loss: 0.6963 - val_accuracy: 0.6478 - val_loss: 0.6071
Epoch 2/30
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.6934 - loss: 0.5760
Epoch 2: val_accuracy improved from 0.64778 to 0.70389, saving model to best_mobilenet_drowsiness.keras
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 80ms/step - accuracy: 0.6934 - loss: 0.5760 - val_accuracy: 0.7039 - val_loss: 0.5691
Epoch 3/30
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.7227 - loss: 0.5383
Epoch 3: val_accuracy did not improve from 0.70389
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 86ms/ste