# 02_Train_Baseline.ipynb
### Train a simple CNN baseline on 48×48 grayscale images (FER2013)
This notebook trains the small baseline CNN (quick to run) and saves `models/baseline_cnn.h5`.

In [None]:
# Install required packages (run once)
!pip install tensorflow mtcnn opencv-python matplotlib numpy pandas tqdm scikit-learn


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf

DATA_DIR = 'data/cropped_faces'  # expected structure: data/cropped_faces/train/<class>/..., data/cropped_faces/val/<class>/...
IMG_SIZE = (48, 48)
BATCH_SIZE = 64
EPOCHS = 25
OUT_MODEL = 'models/baseline_cnn.h5'
os.makedirs('models', exist_ok=True)


In [None]:
# Define baseline CNN
def build_baseline_cnn(input_shape=(48,48,1), num_classes=7):
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model


In [None]:
# Prepare data generators
train_dir = os.path.join(DATA_DIR, 'train')
val_dir = os.path.join(DATA_DIR, 'val')

train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    color_mode='grayscale',
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)
val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    color_mode='grayscale',
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)
num_classes = train_gen.num_classes
print('Num classes:', num_classes)


In [None]:
# Build, compile and summarize model
model = build_baseline_cnn(input_shape=(48,48,1), num_classes=num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Callbacks
callbacks = [
    ModelCheckpoint(OUT_MODEL, save_best_only=True, monitor='val_loss'),
    EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(factor=0.5, patience=2, monitor='val_loss')
]


In [None]:
# Train
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=callbacks
)


In [None]:
# Save final model (best already saved by checkpoint)
model.save(OUT_MODEL)
print('Saved model to', OUT_MODEL)


In [None]:
# Plot training curves
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend(); plt.title('Loss')
plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend(); plt.title('Accuracy')
plt.show()


In [None]:
# Quick evaluation on validation set
val_steps = val_gen.samples // val_gen.batch_size
results = model.evaluate(val_gen, steps=max(1, val_steps))
print('Validation results (loss, acc):', results)