In [1]:
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print("CUDA Built: ", tf.test.is_built_with_cuda())
print("GPU: ", tf.config.list_physical_devices("GPU"))

Tensorflow version:  2.10.1
CUDA Built:  True
GPU:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# Setting memory growth
# By Default, Tensorflow may allocate all GPU memory at once, which can cause issue if 
# you're running multiple GPU applications
# set memory growth tells Tensorflow to only allocate memory as needed, dynamically growing the memory footprint as needed
# This helps avoid out-of-memory errors and allows multiple programs to share GPU efficiently/safely

physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)
    print("Memory Growth Set")

Memory Growth Set


In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Activation, GlobalAveragePooling2D, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras.applications.mobilenet import preprocess_input as preprocess_mobile
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficient


from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2 as cv
import time


In [4]:
import dagshub
import mlflow

mlflow.set_tracking_uri('https://dagshub.com/varun966/EmotionRecognition.mlflow')
dagshub.init(repo_owner='varun966', repo_name='EmotionRecognition', mlflow=True)

mlflow.set_experiment("Efficient Net Experiment")

<Experiment: artifact_location='mlflow-artifacts:/c2681ea7d2494a459bebc5b988ddf649', creation_time=1753187636321, experiment_id='1', last_update_time=1753187636321, lifecycle_stage='active', name='Efficient Net Experiment', tags={}>

In [5]:
import logging
import time

In [6]:
#CONST
img_shape = (224,224,3)
drop_layers = -5
trainable_layers = -1
Epochs = 50
Verbose = 1
batch_size = 8
train_path = r'D:/AIML/fer2013/train'
test_path = r'D:/AIML/fer2013/test'

In [7]:
#Before model training, clear Keras session to free old graphs and memory.
from tensorflow.keras import backend as K
K.clear_session()


In [11]:
# Preprocess the images to equalize the Histogram

preprocess_train =  r'D:/AIML/fer2013/preprocess/train'
preprocess_test = r'D:/AIML/fer2013/preprocess/test'

os.makedirs(preprocess_test, exist_ok=True)
os.makedirs(preprocess_test, exist_ok=True)

for filename in os.listdir(train_path):
    os.makedirs(os.path.join(preprocess_train, filename), exist_ok=True)

    print(filename)
    for file in os.listdir(os.path.join(train_path, filename)):
        if not os.path.exists(os.path.join(preprocess_train, filename, file)):
            img = cv.imread(os.path.join(train_path, filename, file))
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)  # as OpenCv is reading by default in BGR

            eq_img = cv.equalizeHist(gray)


            cv.imwrite(os.path.join(preprocess_train, filename, file), eq_img)


for filename in os.listdir(test_path):
    os.makedirs(os.path.join(preprocess_test, filename), exist_ok=True)

    print(filename)
    for file in os.listdir(os.path.join(test_path, filename)):
        if not os.path.exists(os.path.join(preprocess_test, filename, file)):
            img = cv.imread(os.path.join(test_path, filename, file))
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)  # as OpenCv is reading by default in BGR
            eq_img = cv.equalizeHist(gray)



            cv.imwrite(os.path.join(preprocess_test, filename, file), eq_img)





angry
disgust
fear
happy
neutral
sad
surprise
angry
disgust
fear
happy
neutral
sad
surprise


In [9]:
preprocess_train =  r'D:/AIML/fer2013/preprocess/train'
preprocess_test = r'D:/AIML/fer2013/preprocess/test'


# -------------------- Start MLflow Run --------------------
with mlflow.start_run():
    start_time = time.time()

    # mlflow.log_param("Preprocessing", [
    #     "Grayscale",
    #     "Histogram Equalization",
    #     "Resize((224,224), interpolation=cv.INTER_CUBIC) ",
    #     #"Blurring, (cv.GaussianBlur(resized_img, (3, 3), 0))",
    #     "Blurring, (cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)))",
    #     "Shapening, (cv.addWeighted(blurred, 1.5, blurred, -0.5, 0))"

    # ])

    try:
        logging.info("Creating the model")

        # Load EfficientNetB0 as base model
        base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=img_shape)
        base_model.trainable = True
        mlflow.log_param("input_shape", img_shape)
        mlflow.log_param("pre_loaded_weights", "imagenet")
        mlflow.log_param("drop_layers", drop_layers)

        # Freeze all layers initially
        # for layer in base_model.layers:
        #     layer.trainable = True

        # # Unfreeze last `trainable_layers` if specified
        # if trainable_layers > 0:
        #     for layer in base_model.layers[-trainable_layers:]:
        #         layer.trainable = True
        # elif trainable_layers == -1:
        #     for layer in base_model.layers:
        #         layer.trainable = True

        mlflow.log_param("trainable_layers", trainable_layers)

        # Build full model with custom head
        x = base_model.output
        x = GlobalAveragePooling2D(name='global_avg_pool')(x)
        x = Dropout(0.4, name='dropout_x')(x)
        # x = Dense(256, activation='relu', kernel_regularizer=l2(0.001), name='dense_1')(x)
        # x = Dropout(0.3, name='dropout_2')(x)
        x = Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='dense_2')(x)
        x = Dropout(0.3, name='dropout_3')(x)
        outputs = Dense(7, activation='softmax', name='output', dtype='float32')(x)

        model = Model(inputs=base_model.input, outputs=outputs)

        trainable_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
        non_trainable_params = np.sum([np.prod(v.shape) for v in model.non_trainable_weights])
        total_params = trainable_params + non_trainable_params

        mlflow.log_param("trainable_params", int(trainable_params))
        mlflow.log_param("non_trainable_params", int(non_trainable_params))
        mlflow.log_param("total_params", int(total_params))

        mlflow.log_param("custom_layers", [
            "GlobalAveragePooling2D",
            "Dropout(0.4)",
            # "Dense(256, relu, L2=0.001)",
            # "Dropout(0.3)",
            "Dense(128, relu, L2=0.001)",
            "Dropout(0.3)",
            "Dense(7, softmax)"
        ])

        model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(learning_rate=1e-4),
            metrics=['accuracy']
        )

        # -------------------- Data Augmentation --------------------
        # augmentation_params = {
        #     "rotation_range": 10,
        #     "zoom_range": 0.1,
        #     "width_shift_range": 0.1,
        #     "height_shift_range": 0.1,
        #     "shear_range": 0.1,
        #     "horizontal_flip": True,
        #     "fill_mode": 'nearest'
        # }

        augmentation_params = {
            "rotation_range": 10,
            "zoom_range": [0.1, 1.2],
            "width_shift_range": 0.1,
            "height_shift_range": 0.1,
            "shear_range": 0.1,
            "horizontal_flip": True,
            "fill_mode": 'nearest',
            "brightness_range": [0.8, 1.2],
            "channel_shift_range": 30.0
        }

        # augmentation_params = {
        # "rotation_range" :  15,
        # "zoom_range" : 0.15,
        # "width_shift_range": 0.1,
        # "height_shift_range": 0.1,
        # "shear_range" : 0.1,
        # "horizontal_flip" : True,
        # "fill_mode" : 'nearest',

        # }
        mlflow.log_params(augmentation_params)

        train_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_efficient,
            validation_split=0.2,
            **augmentation_params
        )

        val_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_efficient,
            validation_split=0.2
        )

        train_generator = train_datagen.flow_from_directory(
            directory=preprocess_train,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='training',
            shuffle=True,
            seed=42
        )

        val_generator = val_datagen.flow_from_directory(
            directory=preprocess_train,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation',
            shuffle=False,
            seed=42
        )

        class_weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(train_generator.classes),
            y=train_generator.classes)
        class_weights = dict(enumerate(class_weights))

        # -------------------- Callbacks --------------------
        lr_schedule = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)
        early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

        # -------------------- Training --------------------
        history = model.fit(
            x=train_generator,
            validation_data=val_generator,
            epochs=Epochs,
            verbose=Verbose,
            class_weight=class_weights,
            callbacks=[lr_schedule, early_stop]
        )

        end_time = time.time()
        training_duration = end_time - start_time

        # 📝 Log training time as metric in seconds or minutes
        mlflow.log_metric("training_time_seconds", training_duration)
        mlflow.log_metric("training_time_minutes", training_duration / 60)

        mlflow.log_metrics({
            "train_accuracy": history.history['accuracy'][-1],
            "val_accuracy": history.history['val_accuracy'][-1],
            "train_loss": history.history['loss'][-1],
            "val_loss": history.history['val_loss'][-1]
        })

        # -------------------- Save Model Summary --------------------
        with open("model_summary.txt", "w") as f:
            model.summary(print_fn=lambda x: f.write(x + '\n'))
        mlflow.log_artifact("model_summary.txt")

        # -------------------- Testing --------------------
        start_time_test = time.time()
        test_batches = ImageDataGenerator(preprocessing_function=preprocess_efficient).flow_from_directory(
            directory=preprocess_test,
            target_size=(224, 224),
            batch_size=10,
            shuffle=False
        )

        num_test_records = test_batches.n

        test_labels = test_batches.classes
        predictions = model.predict(x=test_batches, verbose=0)
        predicted_labels = np.argmax(predictions, axis=1)

        end_time_test = time.time()

        total_test_time = (end_time_test - start_time_test)
        test_time_per_image = total_test_time / num_test_records
        fps = num_test_records / total_test_time


        test_accuracy = accuracy_score(test_labels, predicted_labels)
        test_f1_weighted = f1_score(test_labels, predicted_labels, average='weighted')
        test_f1_macro = f1_score(test_labels, predicted_labels, average='macro')

        mlflow.log_metrics({
            "test_accuracy": test_accuracy,
            "test_f1_weighted": test_f1_weighted,
            "test_f1_macro": test_f1_macro,
            "total_test_duration": total_test_time,
            "test_time_per_image": test_time_per_image,
            "number_of_test_records": num_test_records,
            "FPS": fps

        })

        # -------------------- Classification Report --------------------
        class_report = classification_report(test_labels, predicted_labels, output_dict=False)
        with open("classification_report.txt", "w") as f:
            f.write(class_report)
        mlflow.log_artifact("classification_report.txt")

        # -------------------- Confusion Matrix --------------------
        cm = confusion_matrix(test_labels, predicted_labels)
        np.savetxt("confusion_matrix.csv", cm, delimiter=",", fmt="%d")
        mlflow.log_artifact("confusion_matrix.csv")


        # ----------------Model Logging
        model.save('effnet_best_model.h5')
        model.save_weights("effnet_model_saved_weights.h5")
        mlflow.log_artifact('effnet_best_model.h5')
        mlflow.log_artifact('effnet_model_saved_weights.h5')



    

    except Exception as e:
        mlflow.log_param("error", str(e))
        logging.error("Training failed: %s", str(e))


Found 22968 images belonging to 7 classes.
Found 5741 images belonging to 7 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 25: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 28: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 29/50
Found 7178 images belonging to 7 classes.


ERROR:root:Training failed: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.


🏃 View run aged-owl-95 at: https://dagshub.com/varun966/EmotionRecognition.mlflow/#/experiments/1/runs/3d86dc5895ce4d35bc3d7b2b2ea97034
🧪 View experiment at: https://dagshub.com/varun966/EmotionRecognition.mlflow/#/experiments/1


In [10]:
model.save_weights("effnet_model_saved_weights.h5")

# Ran up until epoch 24, saved the model
# re load the weights and run the model again
