* separate train | val | test sets
* I deleted images with original size > 48x48 to get rid of full body & multiple people images. That cost 1343 removed images (57756 kept)

In [5]:
import tensorflow as tf
import os
from PIL import Image
import kaggle
from sklearn.model_selection import train_test_split


In [None]:
# DOWNLOAD THE DATASET
# Computer must have a unique kaggle.json file downloaded and have this path: "C:\Users\youruser\.kaggle\kaggle.json"

dataset_name = 'samithsachidanandan/human-face-emotions'
target_folder = 'human-face-emotions'

kaggle.api.dataset_download_files(dataset_name, path=target_folder, unzip=True)
print("Download complete!")

In [2]:
data_dir = './human-face-emotions/data'

In [None]:
base_folder = data_dir

summary = {}   # to store removed / kept per class
total_removed = 0
total_kept = 0

for label in os.listdir(base_folder):
    class_path = os.path.join(base_folder, label)
    removed = 0
    kept = 0

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)

        try:
            img = Image.open(img_path)
            w, h = img.size
            img.close()  # ← IMPORTANT: close file so Windows unlocks it

            if (w, h) != (48, 48):
                os.remove(img_path)
                removed += 1
            else:
                kept += 1

        except:
            # corrupted or unreadable → try closing + remove
            try:
                img.close()
            except:
                pass
            os.remove(img_path)
            removed += 1
      

    summary[label] = {"removed": removed, "kept": kept}
    total_removed += removed
    total_kept += kept

# Print class-by-class report
print("=== Removal Report by Class ===")
for label, stats in summary.items():
    print(f"{label}: removed {stats['removed']}, kept {stats['kept']}")

print("\n=== TOTAL ===")
print("Total removed:", total_removed)
print("Total kept:", total_kept)

In [3]:
full_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    label_mode="int",
    image_size=(48, 48),
    color_mode="grayscale",
    batch_size=None,      # return one (img, label) at a time
    shuffle=True,
    seed=42
)

class_names = full_ds.class_names
num_classes = len(class_names)

print("Classes:", class_names)

Found 57756 files belonging to 5 classes.
Classes: ['Angry', 'Fear', 'Happy', 'Sad', 'Suprise']


In [4]:
full_data = list(full_ds.as_numpy_iterator())

images = [x[0] for x in full_data]  # list of arrays
labels = [x[1] for x in full_data]  # list of ints

In [6]:
# First split: train vs temp (val+test)
x_train, x_temp, y_train, y_temp = train_test_split(
    images, labels,
    test_size=0.30,         # 30% → val+test
    random_state=42,
    stratify=labels         # keeps class proportions
)

# Second split: val vs test (each = 15%)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp,
    test_size=0.50,         # half of 30% = 15%
    random_state=42,
    stratify=y_temp
)

In [7]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
val_ds   = tf.data.Dataset.from_tensor_slices((x_val, y_val))
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [8]:
batch_size = 128

train_ds = train_ds.shuffle(10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_ds   = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_ds  = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(48, 48, 1)),

    tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(48, 48, 1)),
    tf.keras.layers.MaxPooling2D(),
    # 32: number of filters -> 32 different pattern detectors
    # (3×3) = kernel (=filter) size in pixels - like a (3×3) window
    # padding='same' : 48×48 input → 48×48 output
    # MaxPooling : reduces image size by taking the maximum value in each 3x3 window
    # e.g. pooling keeps the strongest signals
    

    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Flatten(),

    # --- Dense layers ---
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15
)

Epoch 1/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.2979 - loss: 3.9497 - val_accuracy: 0.4368 - val_loss: 1.3614
Epoch 2/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 58ms/step - accuracy: 0.4315 - loss: 1.3652 - val_accuracy: 0.5181 - val_loss: 1.1748
Epoch 3/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 55ms/step - accuracy: 0.5080 - loss: 1.2127 - val_accuracy: 0.5530 - val_loss: 1.1071
Epoch 4/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 68ms/step - accuracy: 0.5470 - loss: 1.1118 - val_accuracy: 0.5940 - val_loss: 1.0143
Epoch 5/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 66ms/step - accuracy: 0.5923 - loss: 1.0200 - val_accuracy: 0.6076 - val_loss: 0.9914
Epoch 6/15
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 63ms/step - accuracy: 0.6222 - loss: 0.9459 - val_accuracy: 0.6446 - val_loss: 0.9237
Epoch 7/15
[1m3

In [11]:
test_loss, test_acc = model.evaluate(test_ds)
print("Final Test Accuracy:", test_acc)

[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7852 - loss: 0.6892
Final Test Accuracy: 0.7834718227386475
