## 2 Models here: baseline and og with 2 conv layers

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import tensorflow as tf
import os
from PIL import Image
import kaggle
from sklearn.model_selection import train_test_split
import pickle # to save training history

In [2]:
data_dir = './human-face-emotions/data'

In [3]:
full_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    label_mode="int",
    image_size=(48, 48),
    color_mode="grayscale",
    batch_size=None,      # return one (img, label) at a time
    shuffle=True,
    seed=42
)

class_names = full_ds.class_names
num_classes = len(class_names)

print("Classes:", class_names)

Found 57756 files belonging to 5 classes.
Classes: ['Angry', 'Fear', 'Happy', 'Sad', 'Suprise']


In [4]:
full_data = list(full_ds.as_numpy_iterator())

images = [x[0] for x in full_data]  # list of arrays
labels = [x[1] for x in full_data]  # list of ints

In [5]:
# First split: train vs temp (val+test)
x_train, x_temp, y_train, y_temp = train_test_split(
    images, labels,
    test_size=0.30,         # 30% → val+test
    random_state=42,
    stratify=labels         # keeps class proportions
)

# Second split: val vs test (each = 15%)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp,
    test_size=0.50,         # half of 30% = 15%
    random_state=42,
    stratify=y_temp
)

In [6]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
val_ds   = tf.data.Dataset.from_tensor_slices((x_val, y_val))
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [7]:
batch_size = 128

train_ds = train_ds.shuffle(10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_ds   = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_ds  = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [8]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

### baseline cnn

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(48, 48, 1)),

    tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(48, 48, 1)),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    #optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=[callback]
)

Epoch 1/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 40ms/step - accuracy: 0.3246 - loss: 8.8451 - val_accuracy: 0.4632 - val_loss: 1.3013
Epoch 2/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.4850 - loss: 1.2577 - val_accuracy: 0.5019 - val_loss: 1.2292
Epoch 3/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.5537 - loss: 1.1096 - val_accuracy: 0.5571 - val_loss: 1.1403
Epoch 4/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.6073 - loss: 0.9903 - val_accuracy: 0.5879 - val_loss: 1.0827
Epoch 5/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 38ms/step - accuracy: 0.6737 - loss: 0.8480 - val_accuracy: 0.6193 - val_loss: 1.0440
Epoch 6/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 38ms/step - accuracy: 0.7147 - loss: 0.7413 - val_accuracy: 0.6405 - val_loss: 1.0440
Epoch 7/50
[1m3

In [None]:
test_loss, test_acc = model.evaluate(test_ds)
print("Final Test Accuracy:", test_acc)

[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6868 - loss: 0.9656
Final Test Accuracy: 0.6805170774459839


In [29]:
model.save('cnn_baseline.keras')

In [30]:
with open("history_cnn_baseline.pkl", "wb") as f:
    pickle.dump(history.history, f)

### Residual CNN: Mini-ResNet

In [17]:
def residual_block(x, filters):
    shortcut = x

    x = tf.keras.layers.Conv2D(filters, 3, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    x = tf.keras.layers.Conv2D(filters, 3, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)

    if shortcut.shape[-1] != filters: # if filters don't mach
        shortcut = tf.keras.layers.Conv2D(filters, 1, padding="same")(shortcut)
        shortcut = tf.keras.layers.BatchNormalization()(shortcut)

    x = tf.keras.layers.Add()([shortcut, x])
    x = tf.keras.layers.ReLU()(x)

    return x

In [18]:
def build_mini_resnet(num_classes=5):
    inputs = tf.keras.Input(shape=(48, 48, 1))

    x = tf.keras.layers.Conv2D(32, 3, padding="same")(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    x = residual_block(x, 32) # 32 filters
    x = tf.keras.layers.MaxPooling2D()(x)  # -> 24×24

    x = residual_block(x, 64)
    x = tf.keras.layers.MaxPooling2D()(x)  # -> 12×12

    x = residual_block(x, 128)
    x = tf.keras.layers.MaxPooling2D()(x)  # -> 6×6

    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(x)

    return tf.keras.Model(inputs, outputs)



In [19]:
model_resnet = build_mini_resnet(num_classes=5)
model_resnet.summary()

In [None]:
model_resnet.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss="sparse_categorical_crossentropy", # loss function for integer labels (not ohe)
    metrics=["accuracy"]
)


In [21]:
history_resnet = model_resnet.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[callback]
)

Epoch 1/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 394ms/step - accuracy: 0.2841 - loss: 2.4692 - val_accuracy: 0.4331 - val_loss: 1.3889
Epoch 2/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 413ms/step - accuracy: 0.3876 - loss: 1.3821 - val_accuracy: 0.5091 - val_loss: 1.2342
Epoch 3/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 449ms/step - accuracy: 0.4672 - loss: 1.2578 - val_accuracy: 0.5354 - val_loss: 1.1492
Epoch 4/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 401ms/step - accuracy: 0.4968 - loss: 1.1959 - val_accuracy: 0.5624 - val_loss: 1.0738
Epoch 5/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 393ms/step - accuracy: 0.5113 - loss: 1.1439 - val_accuracy: 0.5824 - val_loss: 1.0166
Epoch 6/30
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 397ms/step - accuracy: 0.5322 - loss: 1.0956 - val_accuracy: 0.5679 - val_loss: 1.0269
Epoc

In [22]:
test_loss, test_acc = model_resnet.evaluate(test_ds)
print("Final Test Accuracy:", test_acc)

[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 79ms/step - accuracy: 0.6478 - loss: 0.8285
Final Test Accuracy: 0.6442751884460449


In [23]:
model_resnet.save('cnn_resnet.keras')

In [24]:
with open("history_cnn_resnet.pkl", "wb") as f:
    pickle.dump(history_resnet.history, f)

### Tuned resnet

In [None]:
def build_mini_resnet_2():
    inputs = tf.keras.Input(shape=(48, 48, 1))
    # Stem
    x = tf.keras.layers.Conv2D(32, 3, padding="same")(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    # Residual blocks
    x = residual_block(x, 32)
    x = tf.keras.layers.MaxPooling2D()(x)   # -> 24×24

    x = residual_block(x, 64)
    x = tf.keras.layers.MaxPooling2D()(x)   # -> 12×12

    x = residual_block(x, 128)
    x = tf.keras.layers.MaxPooling2D()(x)   # -> 6×6

    # improved head
    x = tf.keras.layers.GlobalAveragePooling2D()(x)     # replaces Flatten()

    x = tf.keras.layers.Dense(256, activation="relu")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)

    outputs = tf.keras.layers.Dense(5, activation="softmax")(x)

    return tf.keras.Model(inputs, outputs)

In [26]:
model_resnet_tuned = build_mini_resnet_2()
model_resnet_tuned.summary()

In [27]:
model_resnet_tuned.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss="sparse_categorical_crossentropy", # loss function for integer labels (not ohe)
    metrics=["accuracy"]
)

In [28]:
callback_2 = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

In [None]:
history_resnet_tuned = model_resnet_tuned.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[callback_2]
)