In [1]:
# Import Libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from pathlib import Path
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import kagglehub
# confusion matrix
from sklearn.metrics import confusion_matrix
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG19
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
# Download latest version
path = kagglehub.dataset_download("msambare/fer2013")

print("Path to dataset files:", path)

Path to dataset files: /Users/macos/.cache/kagglehub/datasets/msambare/fer2013/versions/1


In [4]:
# Set Parameters
IMG_SIZE = 224  # Width and height
BATCH_SIZE = 64
SEED = 42
DATA_DIR = path

# Emotion categories will be inferred automatically from folder names

In [16]:
color_mode = 'rgb' # Use 'grayscale' for grayscale images, 'rgb' for color images

# Load training dataset with validation split (before normalization)
raw_train_ds = tf.keras.utils.image_dataset_from_directory(
    directory=os.path.join(DATA_DIR, "train"),
    labels='inferred',
    label_mode='int',  # integer labels
    color_mode=color_mode,
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=True,
    seed=SEED,
    validation_split=0.2,
    subset='training'
)

raw_val_ds = tf.keras.utils.image_dataset_from_directory(
    directory=os.path.join(DATA_DIR, "train"),
    labels='inferred',
    label_mode='int',
    color_mode=color_mode,
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=True,
    seed=SEED,
    validation_split=0.2,
    subset='validation'
)

# 4️⃣ Load test dataset (no validation split)
raw_test_ds = tf.keras.utils.image_dataset_from_directory(
    directory=os.path.join(DATA_DIR, "test"),
    labels='inferred',
    label_mode='int',
    color_mode=color_mode,
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE),
    shuffle=False
)

Found 28709 files belonging to 7 classes.
Using 22968 files for training.
Found 28709 files belonging to 7 classes.
Using 5741 files for validation.
Found 7178 files belonging to 7 classes.


In [6]:
# Get class names before mapping (IMPORTANT)
class_names = raw_train_ds.class_names
class_indices = dict(zip(class_names, range(len(class_names))))
print("Emotion Label Mapping:", class_indices)

Emotion Label Mapping: {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'neutral': 4, 'sad': 5, 'surprise': 6}


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Use this only during model training
cnn_augmentation = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

In [9]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers


In [20]:
# Load the base MobileNetV3Large model
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers

base_model = MobileNetV3Large(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

num_classes = len(class_names)

# ✅ Make ALL layers trainable
for layer in base_model.layers:
    layer.trainable = True

# Add custom layers
x = GlobalAveragePooling2D()(base_model.output)
# Optional: Add dropout for regularization
x = Dropout(0.2)(x)
output = Dense(num_classes, activation='softmax', name='out_layer')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile the model with a slightly higher learning rate for MobileNet
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),  # Increased from 1e-4
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Show summary
model.summary()

print(f"Total parameters: {model.count_params():,}")
print(f"Trainable parameters: {sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]):,}")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5
[1m12683000/12683000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


Total parameters: 3,003,079
Trainable parameters: 2,978,679


In [21]:
# Normalize datasets using Rescaling
normalization_layer = tf.keras.layers.Rescaling(1./255)

train_ds = raw_train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = raw_val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = raw_test_ds.map(lambda x, y: (normalization_layer(x), y))

# Convert to NumPy arrays (for use with traditional ML models)
def convert_to_numpy(dataset):
    images = []
    labels = []
    for batch_images, batch_labels in dataset:
        images.append(batch_images.numpy())
        labels.append(batch_labels.numpy())
    return np.concatenate(images), np.concatenate(labels)

X_train_np, y_train_np = convert_to_numpy(train_ds)
X_val_np, y_val_np = convert_to_numpy(val_ds)
X_test_np, y_test_np = convert_to_numpy(test_ds)

print("Train shape:", X_train_np.shape)
print("Validation shape:", X_val_np.shape)
print("Test shape:", X_test_np.shape)

# One-hot encode labels for deep learning models
y_train_oh = to_categorical(y_train_np, num_classes=7)
y_val_oh = to_categorical(y_val_np, num_classes=7)
y_test_oh = to_categorical(y_test_np, num_classes=7)

Train shape: (22968, 224, 224, 3)
Validation shape: (5741, 224, 224, 3)
Test shape: (7178, 224, 224, 3)


In [22]:
# Plot Training and Validation Curves
def plot_history(history, title="Model"):
    plt.figure(figsize=(12, 5))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title(f'{title} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title(f'{title} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()


In [23]:
# Use in model training

checkpoint = ModelCheckpoint(
    'best_emotion_model03_01_checkpoint.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

history_cnn01 = model.fit(
    cnn_augmentation.flow(X_train_np, y_train_oh, batch_size=32),
    steps_per_epoch=len(X_train_np) // 32,
    epochs=40,
    validation_data=(X_val_np, y_val_oh),
    callbacks=[checkpoint]
)

model.save("fer2013_cnn_model3_01.h5")

plot_history(history_cnn01, title="CNN 03 with Augmentation")

y_pred_probs = model.predict(X_test_np)
y_pred_cnn = y_pred_probs.argmax(axis=1)

# Accuracy
print("Test Accuracy:", accuracy_score(y_test_np, y_pred_cnn))

  self._warn_if_super_not_called()


Epoch 1/40
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 682ms/step - accuracy: 0.4867 - loss: 1.3565
Epoch 1: val_accuracy improved from -inf to 0.11862, saving model to best_emotion_model03_01_checkpoint.h5




[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m527s[0m 720ms/step - accuracy: 0.4867 - loss: 1.3563 - val_accuracy: 0.1186 - val_loss: 12.0841
Epoch 2/40
[1m  1/717[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11:04[0m 928ms/step - accuracy: 0.6562 - loss: 1.0276




Epoch 2: val_accuracy did not improve from 0.11862
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 36ms/step - accuracy: 0.6562 - loss: 1.0276 - val_accuracy: 0.1186 - val_loss: 9.9281
Epoch 3/40
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687ms/step - accuracy: 0.6128 - loss: 1.0284
Epoch 3: val_accuracy improved from 0.11862 to 0.17349, saving model to best_emotion_model03_01_checkpoint.h5




[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m518s[0m 723ms/step - accuracy: 0.6128 - loss: 1.0284 - val_accuracy: 0.1735 - val_loss: 2.1352
Epoch 4/40
[1m  1/717[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:45[0m 650ms/step - accuracy: 0.6250 - loss: 0.8808
Epoch 4: val_accuracy did not improve from 0.17349
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 35ms/step - accuracy: 0.6250 - loss: 0.8808 - val_accuracy: 0.1735 - val_loss: 2.1425
Epoch 5/40
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665ms/step - accuracy: 0.6536 - loss: 0.9395
Epoch 5: val_accuracy did not improve from 0.17349
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 700ms/step - accuracy: 0.6536 - loss: 0.9396 - val_accuracy: 0.1700 - val_loss: 399.2848
Epoch 6/40
[1m  1/717[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:24[0m 705ms/step - accuracy: 0.5000 - loss: 1.0718
Epoch 6: val_accuracy improved from 0.17349 to 0.18307, saving model to best_emot



[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 36ms/step - accuracy: 0.5000 - loss: 1.0718 - val_accuracy: 0.1831 - val_loss: 439.4085
Epoch 7/40
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 669ms/step - accuracy: 0.6677 - loss: 0.8869
Epoch 7: val_accuracy did not improve from 0.18307
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m507s[0m 708ms/step - accuracy: 0.6677 - loss: 0.8869 - val_accuracy: 0.1784 - val_loss: 300974.9688
Epoch 8/40
[1m  1/717[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:41[0m 729ms/step - accuracy: 0.6250 - loss: 1.0177
Epoch 8: val_accuracy did not improve from 0.18307
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - accuracy: 0.6250 - loss: 1.0177 - val_accuracy: 0.1737 - val_loss: 308762.0625
Epoch 9/40
[1m717/717[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 675ms/step - accuracy: 0.6960 - loss: 0.8264
Epoch 9: val_accuracy did not improve from 0.18307
[1m717/717[0m

KeyboardInterrupt: 

In [None]:
last_model = model
best_model_path = 'best_emotion_model03_01_checkpoint.h5'
if os.path.exists(best_model_path):
    model.load_weights(best_model_path)
    print("Loaded best model weights from:", best_model_path)

y_pred_probs = model.predict(X_test_np)
y_pred_cnn = y_pred_probs.argmax(axis=1)

# Accuracy
print("Test Accuracy:", accuracy_score(y_test_np, y_pred_cnn))