In [2]:
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print("CUDA Built: ", tf.test.is_built_with_cuda())
print("GPU: ", tf.config.list_physical_devices("GPU"))

Tensorflow version:  2.10.1
CUDA Built:  True
GPU:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# Setting memory growth
# By Default, Tensorflow may allocate all GPU memory at once, which can cause issue if 
# you're running multiple GPU applications
# set memory growth tells Tensorflow to only allocate memory as needed, dynamically growing the memory footprint as needed
# This helps avoid out-of-memory errors and allows multiple programs to share GPU efficiently/safely

physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)
    print("Memory Growth Set")

Memory Growth Set


In [4]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Activation, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras.applications.mobilenet import preprocess_input as preprocess_mobile
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficient


from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2 as cv
import time


In [5]:
import dagshub
import mlflow

mlflow.set_tracking_uri('https://dagshub.com/varun966/EmotionRecognition.mlflow')
dagshub.init(repo_owner='varun966', repo_name='EmotionRecognition', mlflow=True)

mlflow.set_experiment("Mobile Net Experiment")

<Experiment: artifact_location='mlflow-artifacts:/5463f7ad9066475d87089177ad7424de', creation_time=1752684143899, experiment_id='0', last_update_time=1752684143899, lifecycle_stage='active', name='Mobile Net Experiment', tags={}>

In [6]:
import logging
import time

In [10]:
#CONST
img_shape = (128,128,3)
drop_layers = -5
trainable_layers = 50
Epochs = 30
Verbose = 1
batch_size = 16
train_path = r'D:/AIML/fer2013/train'
test_path = r'D:/AIML/fer2013/test'

In [8]:
#Before model training, clear Keras session to free old graphs and memory.
from tensorflow.keras import backend as K
K.clear_session()


In [17]:
def preprocess_mobile(x):
    from keras.applications.mobilenet import preprocess_input
    return preprocess_input(x)

with mlflow.start_run():
    start_time = time.time()

    try:
        logging.info("Creating the model")

        # MobileNet Model
        mobile = MobileNet(weights='imagenet', input_shape=img_shape)
        mobile_model = Sequential()

        mlflow.log_param("input_shape", img_shape)
        mlflow.log_param("pre_loaded_weights", "imagenet")
        mlflow.log_param("drop_layers", drop_layers)

        for layer in mobile.layers[:drop_layers]:
            mobile_model.add(layer)

        if trainable_layers == 0:
            mobile_model.trainable = False
        elif trainable_layers == 1:
            mobile_model.trainable = True
        elif trainable_layers < 0:
            for layer in mobile_model.layers[:trainable_layers]:
                layer.trainable = False
            for layer in mobile_model.layers[trainable_layers:]:
                layer.trainable = True

        mlflow.log_param("trainable_layers", trainable_layers)

        trainable_params = np.sum([np.prod(v.get_shape()) for v in mobile_model.trainable_weights])
        non_trainable_params = np.sum([np.prod(v.get_shape()) for v in mobile_model.non_trainable_weights])
        total_params = trainable_params + non_trainable_params

        mlflow.log_param("trainable_params", int(trainable_params))
        mlflow.log_param("non_trainable_params", int(non_trainable_params))
        mlflow.log_param("total_params", int(total_params))

        # Add custom layers
        mobile_model.add(GlobalAveragePooling2D())
        mobile_model.add(Dropout(0.5, name='dropout_x'))
        mobile_model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='dense_1'))
        mobile_model.add(Dropout(0.3, name='dropout_2'))
        mobile_model.add(Dense(7, activation='softmax', name='output', dtype='float32'))

        mobile_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['accuracy'])

        # Data generators
        train_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_mobile,
            rotation_range=10,
            zoom_range=0.1,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.1,
            horizontal_flip=True,
            fill_mode='nearest',
            validation_split=0.2
        )

        val_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_mobile,
            validation_split=0.2
        )

        train_generator = train_datagen.flow_from_directory(
            directory=train_path,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='training',
            shuffle=True,
            seed=42
        )

        val_generator = val_datagen.flow_from_directory(
            directory=train_path,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation',
            shuffle=False,
            seed=42
        )

        class_weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(train_generator.classes),
            y=train_generator.classes)

        class_weights = dict(enumerate(class_weights))

        # Callbacks
        lr_schedule = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)
        early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

        # Train model
        history = mobile_model.fit(
            x=train_generator,
            validation_data=val_generator,
            epochs=Epochs,
            verbose=Verbose,
            class_weight=class_weights,
            callbacks=[lr_schedule, early_stop]
        )

        # Log training metrics
        mlflow.log_metric("train_accuracy", history.history['accuracy'][-1])
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][-1])
        mlflow.log_metric("train_loss", history.history['loss'][-1])
        mlflow.log_metric("val_loss", history.history['val_loss'][-1])

        # Test metrics
        test_batches = ImageDataGenerator(
            preprocessing_function=preprocess_mobile).flow_from_directory(
            directory=test_path,
            target_size=(224, 224),
            batch_size=10,
            shuffle=False,
        )

        test_labels = test_batches.classes

        predictions = mobile_model.predict(x=test_batches, verbose=0)
        predicted_labels = np.argmax(predictions, axis=1)

        test_accuracy = accuracy_score(test_labels, predicted_labels)
        test_f1_weighted = f1_score(test_labels, predicted_labels, average='weighted')
        test_f1_macro = f1_score(test_labels, predicted_labels, average='macro')

        mlflow.log_metric("test_accuracy", test_accuracy)
        mlflow.log_metric("test_f1_weighted", test_f1_weighted)
        mlflow.log_metric("test_f1_macro", test_f1_macro)

    except Exception as e:
        mlflow.log_param("error", str(e))
        logging.error("Training failed: %s", str(e))


ERROR:root:Training failed: When setting `include_top=True` and loading `imagenet` weights, `input_shape` should be (224, 224, 3).  Received: input_shape=(96, 96, 3)


🏃 View run bright-croc-158 at: https://dagshub.com/varun966/EmotionRecognition.mlflow/#/experiments/0/runs/78938aa9cb764e4e8d25bf1bf40f53fb
🧪 View experiment at: https://dagshub.com/varun966/EmotionRecognition.mlflow/#/experiments/0


In [None]:

# -------------------- Start MLflow Run --------------------
with mlflow.start_run():
    start_time = time.time()

    try:
        logging.info("Creating the model")

        # Load base model
        base_model = MobileNet(weights='imagenet', input_shape=img_shape)
        model = Sequential()

        mlflow.log_param("input_shape", img_shape)
        mlflow.log_param("pre_loaded_weights", "imagenet")
        mlflow.log_param("drop_layers", drop_layers)

        for layer in base_model.layers[:drop_layers]:
            model.add(layer)

        if trainable_layers == 0:
            model.trainable = False
        elif trainable_layers == 1:
            model.trainable = True
        elif trainable_layers < 0:
            for layer in model.layers[:trainable_layers]:
                layer.trainable = False
            for layer in model.layers[trainable_layers:]:
                layer.trainable = True

        mlflow.log_param("trainable_layers", trainable_layers)

        trainable_params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])
        non_trainable_params = np.sum([np.prod(v.get_shape()) for v in model.non_trainable_weights])
        total_params = trainable_params + non_trainable_params

        mlflow.log_param("trainable_params", int(trainable_params))
        mlflow.log_param("non_trainable_params", int(non_trainable_params))
        mlflow.log_param("total_params", int(total_params))

        # Custom Layers (logged individually)
        model.add(GlobalAveragePooling2D(name='global_avg_pool'))
        model.add(Dropout(0.5, name='dropout_x'))
        model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='dense_1'))
        model.add(Dropout(0.3, name='dropout_2'))
        model.add(Dense(7, activation='softmax', name='output', dtype='float32'))

        mlflow.log_param("custom_layers", [
            "GlobalAveragePooling2D",
            "Dropout(0.5)",
            "Dense(128, relu, L2=0.001)",
            "Dropout(0.3)",
            "Dense(7, softmax)"
        ])

        model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(learning_rate=1e-4),
            metrics=['accuracy']
        )

        # -------------------- Data Augmentation --------------------
        augmentation_params = {
            "rotation_range": 10,
            "zoom_range": 0.1,
            "width_shift_range": 0.1,
            "height_shift_range": 0.1,
            "shear_range": 0.1,
            "horizontal_flip": True,
            "fill_mode": 'nearest'
        }
        mlflow.log_params(augmentation_params)

        train_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_mobile,
            validation_split=0.2,
            **augmentation_params
        )

        val_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_mobile,
            validation_split=0.2
        )

        train_generator = train_datagen.flow_from_directory(
            directory=train_path,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='training',
            shuffle=True,
            seed=42
        )

        val_generator = val_datagen.flow_from_directory(
            directory=train_path,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation',
            shuffle=False,
            seed=42
        )

        class_weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(train_generator.classes),
            y=train_generator.classes)
        class_weights = dict(enumerate(class_weights))

        # -------------------- Callbacks --------------------
        lr_schedule = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)
        early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

        # -------------------- Training --------------------
        history = model.fit(
            x=train_generator,
            validation_data=val_generator,
            epochs=Epochs,
            verbose=Verbose,
            class_weight=class_weights,
            callbacks=[lr_schedule, early_stop]
        )

        mlflow.log_metrics({
            "train_accuracy": history.history['accuracy'][-1],
            "val_accuracy": history.history['val_accuracy'][-1],
            "train_loss": history.history['loss'][-1],
            "val_loss": history.history['val_loss'][-1]
        })

        # -------------------- Save Model Summary --------------------
        with open("model_summary.txt", "w") as f:
            model.summary(print_fn=lambda x: f.write(x + '\n'))
        mlflow.log_artifact("model_summary.txt")

        # -------------------- Testing --------------------
        test_batches = ImageDataGenerator(preprocessing_function=preprocess_mobile).flow_from_directory(
            directory=test_path,
            target_size=(224, 224),
            batch_size=10,
            shuffle=False
        )

        test_labels = test_batches.classes
        predictions = model.predict(x=test_batches, verbose=0)
        predicted_labels = np.argmax(predictions, axis=1)

        test_accuracy = accuracy_score(test_labels, predicted_labels)
        test_f1_weighted = f1_score(test_labels, predicted_labels, average='weighted')
        test_f1_macro = f1_score(test_labels, predicted_labels, average='macro')

        mlflow.log_metrics({
            "test_accuracy": test_accuracy,
            "test_f1_weighted": test_f1_weighted,
            "test_f1_macro": test_f1_macro
        })

        # -------------------- Classification Report --------------------
        class_report = classification_report(test_labels, predicted_labels, output_dict=False)
        with open("classification_report.txt", "w") as f:
            f.write(class_report)
        mlflow.log_artifact("classification_report.txt")

        # -------------------- Confusion Matrix --------------------
        cm = confusion_matrix(test_labels, predicted_labels)
        np.savetxt("confusion_matrix.csv", cm, delimiter=",", fmt="%d")
        mlflow.log_artifact("confusion_matrix.csv")

    except Exception as e:
        mlflow.log_param("error", str(e))
        logging.error("Training failed: %s", str(e))


Found 22968 images belonging to 7 classes.
Found 5741 images belonging to 7 classes.
Epoch 1/30
  81/1436 [>.............................] - ETA: 2:02 - loss: 2.8849 - accuracy: 0.1759