In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import os
import matplotlib.pyplot as plt
import datetime
import time
import psutil
from tensorflow.keras.callbacks import ModelCheckpoint

# Load CSV data
csv_path = 'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/data.csv'
df = pd.read_csv(csv_path)
df = df[df['label'].isin(['Happy', 'Sad', 'Angry'])]

# Display the total number of unique labels in the 'label' column
unique_labels = df['label'].unique()
total_unique_labels = len(unique_labels)

df = df.drop(df.columns[0], axis=1)

dataset_size = len(df)
df = df.sample(dataset_size).reset_index(drop=True) # limit number of values and shuffle

# Adjust paths
df['path'] = df['path'].apply(lambda x: f"gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/{x.split('/')[-2]}/{x.split('/')[-1]}")

# Label encoding for ML processing
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(df['label'])
one_hot_encoded_labels = to_categorical(integer_encoded)

# Dataset preparation function
def load_image(file_path, label):
    """Load and preprocess images from file paths, handling different formats based on extensions."""
    try:
        image_data = tf.io.read_file(file_path)
        
        # Conditionally decode based on the file extension
        def decode_jpeg():
            return tf.image.decode_jpeg(image_data, channels=3)
        
        def decode_png():
            return tf.image.decode_png(image_data, channels=3)
        
        # Default to using decode_image which works for most formats but does not return a shape statically
        def decode_fallback():

            image = tf.image.decode_image(image_data, channels=3, expand_animations=False)
            print("Image shape:", image.shape)

            return image
        
        # Check the file extension and decode accordingly
        image = tf.cond(
            tf.strings.regex_full_match(file_path, ".*\.jpeg$|.*\.jpg$"),
            true_fn=decode_jpeg,
            false_fn=lambda: tf.cond(
                tf.strings.regex_full_match(file_path, ".*\.png$"),
                true_fn=decode_png,
                false_fn=decode_fallback
            )
        )
        
        image = tf.image.resize(image, [224, 224])
        return image, label
    except tf.errors.NotFoundError:
        print(f"Failed to load image at: {file_path}")
        return None, label
    except Exception as e:
        print(f"Error processing image at: {file_path}", str(e))
        return None, label

# Create TensorFlow datasets
full_dataset = tf.data.Dataset.from_tensor_slices((df['path'].tolist(), one_hot_encoded_labels))
full_dataset = full_dataset.map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) # This line of code applies the load_image function to each element in the full_dataset using the map method, with the num_parallel_calls argument set to tf.data.experimental.AUTOTUNE
full_dataset = full_dataset.filter(lambda x, y: x is not None and y is not None) #clean dataset with clean data
full_dataset = full_dataset.batch(16, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
'''
Batching: full_dataset.batch(32): This transformation groups the dataset into batches of 32 elements each. This means that instead of processing individual elements, the dataset will now be processed in batches of 32 elements.
Prefetching: .prefetch(tf.data.experimental.AUTOTUNE): This transformation prefetches a certain number of batches from the dataset and stores them in memory. By setting tf.data.experimental.AUTOTUNE, TensorFlow will automatically determine the optimal number of batches to prefetch based on the available resources (e.g., CPU, memory).
'''

# Split dataset into training and validation
dataset_size = len(df)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

train_dataset = full_dataset.take(train_size)
val_dataset = full_dataset.take(train_size)

for images, labels in val_dataset.take(5):  # Take the first batch from the validation dataset
    print("Images batch shape:", images.shape)
    print("Labels batch shape:", labels.shape)
    # Show the first image and label as an example
    if images.shape[0] > 0:  # Check if there are any images in the batch
        plt.imshow(images[0].numpy().astype("uint8"))
        plt.title(f"Sample Label: {labels[0].numpy()}")
        plt.axis('off')
        plt.show()
        
def tf_model(num_classes):
    print("num_classes value is:", num_classes)
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), padding='same', input_shape=(224, 224, 3)),
        layers.LeakyReLU(alpha=0.1),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.LeakyReLU(alpha=0.1),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.LeakyReLU(alpha=0.1),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64),
        layers.LeakyReLU(alpha=0.1),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# this one 12 million parameters, trains a model and completes but with 500 records
# def tf_model(num_classes):
#     print("num_classes value is :", num_classes)
#     model = models.Sequential([
#         layers.Conv2D(32, (3, 3), padding='same', input_shape=(224, 224, 3)),
#         layers.LeakyReLU(alpha=0.1),
#         layers.MaxPooling2D((2, 2)),
#         layers.Conv2D(64, (3, 3), padding='same'),
#         layers.LeakyReLU(alpha=0.1),
#         layers.MaxPooling2D((2, 2)),
#         layers.Conv2D(128, (3, 3), padding='same'),
#         layers.LeakyReLU(alpha=0.1),
#         layers.MaxPooling2D((2, 2)),
#         layers.Flatten(),
#         layers.Dense(128),
#         layers.LeakyReLU(alpha=0.1),
#         layers.Dense(num_classes, activation='softmax')
#     ])
#     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#     return model

class PerformanceCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start = time.time()
        self.process = psutil.Process(os.getpid())

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_start
        memory_usage = self.process.memory_info().rss / (1024 ** 2)  # Memory in MB
        print(f"Epoch {epoch+1} ended. Time: {epoch_time:.2f}s, Memory Usage: {memory_usage:.2f} MB")
        if 'val_loss' in logs:
            print(f"Validation Loss: {logs['val_loss']}")

model_name = "Emotion_Detection_AI_20240803_205050_v1.h5"

local_model_path = f"/tmp/{model_name}"

# Create a new directory for models if it doesn't exist
model_dir = "gs://storage_for_all/models"
full_model_path = f"{model_dir}/{model_name}"

num_classes = len(label_encoder.classes_)  # Number of unique classes
model = tf_model(num_classes)

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    local_model_path,
    save_weights_only=False,
    save_best_only=False,
    verbose=1  # Logs output whenever the model is saved.
)

# Define a custom callback to copy the model to GCS
class CopyModelToGCS(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Save the model locally
        self.model.save(local_model_path, save_format='h5')

        # Copy the model to Google Cloud Storage
        gcs_model_path = f"gs://storage_for_all/models/{model_name}"
        os.system(f"gsutil cp {local_model_path} {gcs_model_path}")

        # Verify the model was copied
        if tf.io.gfile.exists(gcs_model_path):
            print(f"Model saved successfully to {gcs_model_path}")
        else:
            print("Failed to save the model to GCS")

# Create a list of callbacks
callbacks = [checkpoint_callback, CopyModelToGCS()]

# Print the model summary
model.summary()

# # Model training
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=val_dataset,
    callbacks=[checkpoint_callback, PerformanceCallback()],
    use_multiprocessing=True,
    workers=3,
    verbose=1
)


# Model training
# history = model.fit(
#     train_dataset,
#     epochs=10,
#     validation_data=val_dataset,
#     callbacks=[PerformanceCallback()],
#     use_multiprocessing=True,
#     workers=3,
#     verbose=1
# )

# Plot training and validation accuracy and loss
def plot_history(history):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    if 'val_accuracy' in history.history:
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    if 'val_accuracy' in history.history:
        plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

plot_history(history)

# Save the model locally
model.save(local_model_path, save_format='h5')

# Copy the model to Google Cloud Storage
gcs_model_path = f"gs://storage_for_all/models/{model_name}"
os.system(f"gsutil cp {local_model_path} {gcs_model_path}")

# Verify the model was copied
if tf.io.gfile.exists(gcs_model_path):
    print(f"Model saved successfully to {gcs_model_path}")
else:
    print("Failed to save the model to GCS")

    
# Define the model name and GCS path
#model_name = "Emotion_Detection_AI_20240803_040927_v1.h5"  # Replace with your actual model name

gcs_model_path = f"gs://storage_for_all/models/{model_name}"
local_model_path = f"/tmp/{model_name}"

# Copy the model from GCS to local path
os.system(f"gsutil cp {gcs_model_path} {local_model_path}")



# Verify the model was copied
if not tf.io.gfile.exists(local_model_path):
    print("Failed to load the model from GCS")
else:
    print(f"Model loaded successfully from {gcs_model_path}")

    # Load the model from local path
    model = tf.keras.models.load_model(gcs_model_path)
    print("Model loaded into TensorFlow")
    try:
        # Evaluate the model on the validation dataset
        loss, accuracy = model.evaluate(val_dataset)

        # Print the evaluation results
        print(f"Test Loss: {loss:.3f}")
        print(f"Test Accuracy: {accuracy:.3f}")

    except Exception as e:
        print(f"Error evaluating model: {e}")
        

    def predict_emotion(image_path):
        image_data = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image_data, channels=3)
        image = tf.image.resize(image, [224, 224])
        image = tf.expand_dims(image, 0)  # Make batch of 1

        predictions = model.predict(image)
        predicted_class = np.argmax(predictions[0])
        emotion_label = label_encoder.inverse_transform([predicted_class])[0]
        emotion_probabilities = predictions[0]

        # Display the image with the predicted emotion and probabilities
        plt.figure(figsize=(4, 4))
        plt.imshow(image[0].numpy().astype("uint8"))
        plt.title(f"Predicted Emotion: {emotion_label}")
        plt.axis('off')
        plt.show()

        print("Emotion Probabilities:")
        for i, probability in enumerate(emotion_probabilities):
            print(f"{label_encoder.inverse_transform([i])[0]}: {probability:.2f}")

        return emotion_label, emotion_probabilities
    # Example usage     
    image_paths = [
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Happy/00a0084c2be84daa7b57c35c3e7563a62b716a71b6f1bc8c8aa577b5.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Happy/00a1e228b74ad9663ec8cb9aad62d0e1ed89d29f45c1a55efd73e0fa.JPG',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Happy/00b597a317f73e5832275a0a5f9aa8250f1a4450bd55ac20387f2c9d.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Sad/003b76ac33cfdfff858d3230ed1f3e56a75def52ae0d309a9f8cc169.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Sad/01b1763812bc6d9932343b0122aefff73ed1a0cce2f252f8b3a80546.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Angry/096f0eb818fd7021214c56995115cfa006a1893e5b9a9104f0c1fc96~angry.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Neutral/00aef13bc323b8abd5ec47a100132bc65c2a5e6e4136591e744f62b3f.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Neutral/00b1de6cda41141282f8a14da526cb2332eeb123c8185a7a9eaf30abf.jpg',
        'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Neutral/00d6150365face753577d829406317a2c36cb11ff7be5146a9298cfcf.jpg',
                 'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Custom/img_sad.JPG',
 'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/dataset/Custom/img_happy.JPG'
    ]

    for image_path in image_paths:
        predicted_emotion, emotion_probabilities = predict_emotion(image_path)
        print("Image Path:", image_path)
        print("Predicted Emotion:", predicted_emotion)
        print("Emotion Probabilities:", emotion_probabilities)
        print()
