# 03_Train_Transfer.ipynb
### Transfer Learning (MobileNetV2 / ResNet50) on FER2013
This notebook trains a transfer-learning model on preprocessed images (224×224 RGB) and saves the best checkpoint.
Expected data layout: `data/cropped_faces/train/<class>/...` and `data/cropped_faces/val/<class>/...`

In [None]:
# Install dependencies (run once)
!pip install tensorflow opencv-python matplotlib numpy pandas tqdm seaborn


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2, ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


In [None]:
# Parameters (edit as needed)
DATA_DIR = 'data/cropped_faces'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 30
BACKBONE = 'mobilenet'  # options: 'mobilenet' or 'resnet50'
OUT_MODEL = 'models/mobilenet_emotion.h5'
UNFREEZE_LAYERS = 30  # number of layers from base to keep trainable when fine-tuning
os.makedirs('models', exist_ok=True)


In [None]:
# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)
val_gen = val_datagen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)
NUM_CLASSES = train_gen.num_classes
CLASS_INDICES = train_gen.class_indices
print('Classes:', CLASS_INDICES)


In [None]:
# Build model function
def build_model(backbone='mobilenet', input_shape=(224,224,3), num_classes=7, dropout=0.5):
    if backbone == 'mobilenet':
        base = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    elif backbone == 'resnet50':
        base = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    else:
        raise ValueError('Unsupported backbone')
    base.trainable = False
    inputs = Input(shape=input_shape)
    x = base(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(dropout)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model, base


In [None]:
# Create model
model, base = build_model(backbone=BACKBONE, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), num_classes=NUM_CLASSES)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Callbacks
callbacks = [
    ModelCheckpoint(OUT_MODEL, save_best_only=True, monitor='val_loss'),
    EarlyStopping(patience=7, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(factor=0.5, patience=3, monitor='val_loss')
]


In [None]:
# Train (feature extraction)
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=callbacks
)


In [None]:
# Fine-tune: unfreeze last UNFREEZE_LAYERS of base
base.trainable = True
if UNFREEZE_LAYERS > 0:
    for layer in base.layers[:-UNFREEZE_LAYERS]:
        layer.trainable = False

model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
ft_history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=callbacks
)
model.save(OUT_MODEL)
print('Saved model to', OUT_MODEL)


In [None]:
# Plot training curves (combined)
def plot_hist(hist, label_prefix=''):
    h = hist.history
    return h.get('loss', []), h.get('val_loss', []), h.get('accuracy', []), h.get('val_accuracy', [])

loss1, vloss1, acc1, vacc1 = plot_hist(history)
loss2, vloss2, acc2, vacc2 = plot_hist(ft_history)

loss = loss1 + loss2
vloss = vloss1 + vloss2
acc = acc1 + acc2
vacc = vacc1 + vacc2

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(loss, label='train_loss')
plt.plot(vloss, label='val_loss')
plt.legend(); plt.title('Loss')
plt.subplot(1,2,2)
plt.plot(acc, label='train_acc')
plt.plot(vacc, label='val_acc')
plt.legend(); plt.title('Accuracy')
plt.show()


In [None]:
# Quick evaluation and confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
val_steps = val_gen.samples // val_gen.batch_size
preds = model.predict(val_gen, verbose=1)
y_pred = np.argmax(preds, axis=1)
y_true = val_gen.classes
labels = list(val_gen.class_indices.keys())
print('\nClassification Report:')
print(classification_report(y_true, y_pred, target_names=labels))
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


## Notes
- If you have limited GPU, reduce `BATCH_SIZE` and `EPOCHS`.
- Use class weights for imbalanced classes (compute from `train_gen.class_indices`).
- Consider using `tf.data` pipelines for faster loading on large datasets.