<a href="https://colab.research.google.com/github/ujjwalva29-crypto/dating-bot/blob/main/Lightweight_1D_CNN_GRU_Model_Builder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==============================================================================

#
# Libraries required: TensorFlow, NumPy, scikit-learn
# ==============================================================================

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GRU, Dense, Dropout, BatchNormalization, MaxPooling1D, Flatten
import numpy as np
from sklearn.model_selection import train_test_split

# --- Configuration for Lightweight Model ---
# These parameters directly influence model size and complexity.
# We aim for small values to ensure embeddability (TFLite conversion).
INPUT_SHAPE = (200, 39)  # (Number of frames, Number of features per frame)
NUM_EMOTIONS = 6         # Example: Neutral, Happy, Sad, Angry, Fear, Disgust

# --- 1. Placeholder Data Generation (Simulating Step 1 Output) ---
# In a real scenario, this would be replaced by your loaded and processed MFCC data.
print("1. Generating Placeholder Data...")

# Create 1000 samples of mock feature data (200 frames x 39 MFCCs)
X = np.random.rand(1000, INPUT_SHAPE[0], INPUT_SHAPE[1]).astype('float32')
# Create corresponding mock one-hot encoded labels
y = tf.keras.utils.to_categorical(np.random.randint(0, NUM_EMOTIONS, 1000), num_classes=NUM_EMOTIONS)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"   X_train shape: {X_train.shape}")
print(f"   y_train shape: {y_train.shape}")
print("-" * 50)


# --- 2. Defining the Lightweight 1D CNN-GRU Hybrid Model ---

def build_lightweight_cnn_gru_model(input_shape, num_classes):
    """
    Constructs a lightweight 1D CNN-GRU hybrid model optimized for low parameter count.
    """
    model = Sequential([
        # --- CNN BLOCK 1: Local Feature Extraction ---
        # Low filter count (32) and small kernel size (3) for efficiency.
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape, padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=4), # Aggressively reduce temporal dimension (e.g., 200 -> 50 frames)
        Dropout(0.2),

        # --- CNN BLOCK 2: Deeper Feature Extraction ---
        Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2), # Further reduce temporal dimension (e.g., 50 -> 25 frames)
        Dropout(0.2),

        # --- GRU BLOCK: Temporal Context Capture ---
        # GRU is lighter than LSTM. Only use 128 units for low parameter count.
        GRU(128, return_sequences=False), # return_sequences=False outputs the final state (the summary vector)
        Dropout(0.3),

        # --- DENSE CLASSIFICATION BLOCK ---
        Dense(64, activation='relu'),
        Dropout(0.3),

        # --- OUTPUT LAYER ---
        # Outputs a probability distribution over the defined emotion classes
        Dense(num_classes, activation='softmax')
    ])
    return model

# --- 3. Build and Summarize the Model ---

print("2. Building Lightweight 1D CNN-GRU Model...")
model = build_lightweight_cnn_gru_model(INPUT_SHAPE, NUM_EMOTIONS)

# Display the model structure and parameter count
model.summary()
print("-" * 50)

# Check the total parameter count
total_params = model.count_params()
print(f"Total Trainable Parameters: {total_params}")

# --- 4. Compile and Mock Train (Setup for Phase 2) ---
print("3. Compiling Model (Setup for training)...")
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Perform a quick, mock training run to ensure the architecture is runnable
# In the next phase, we would run proper training with real data.
print("   Running quick mock fit to verify architecture...")
history = model.fit(X_train, y_train,
                    epochs=1, # Only 1 epoch for testing, replace with 50+ later
                    batch_size=32,
                    validation_data=(X_val, y_val),
                    verbose=1)

print("-" * 50)
print("Model definition and setup successfully completed. The architecture is ready for proper training with your real MFCC data.")
print(f"Model parameters ({total_params}) are reasonably low for subsequent quantization.")

1. Generating Placeholder Data...
   X_train shape: (800, 200, 39)
   y_train shape: (800, 6)
--------------------------------------------------
2. Building Lightweight 1D CNN-GRU Model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--------------------------------------------------
Total Trainable Parameters: 93510
3. Compiling Model (Setup for training)...
   Running quick mock fit to verify architecture...
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 86ms/step - accuracy: 0.1763 - loss: 1.8817 - val_accuracy: 0.1600 - val_loss: 1.7957
--------------------------------------------------
Model definition and setup successfully completed. The architecture is ready for proper training with your real MFCC data.
Model parameters (93510) are reasonably low for subsequent quantization.


In [7]:
# ==============================================================================
#
# This script defines the lightweight model, trains it using callbacks,
# and converts it into a highly optimized, fully integer quantized TFLite model.
#
# Libraries required: TensorFlow, NumPy, scikit-learn
# ==============================================================================

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GRU, Dense, Dropout, BatchNormalization, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
from sklearn.model_selection import train_test_split
import os

# --- Configuration for Lightweight Model ---
INPUT_SHAPE = (200, 39)  # (Number of frames, Number of features per frame)
NUM_EMOTIONS = 6
MODEL_NAME = "emotion_detector_cnn_gru"
SAVED_MODEL_PATH = f"{MODEL_NAME}_best.keras"
TFLITE_MODEL_PATH = f"{MODEL_NAME}_float.tflite" # Change output name to reflect float model
EPOCHS = 50              # Set to a realistic number for training
PATIENCE = 10            # Early stopping patience

# --- 1. Placeholder Data Generation (Simulating Step 1 Output) ---
# NOTE: In a real project, replace this section with loading your actual MFCC data.
print("1. Generating Placeholder Data for Training and Calibration...")

X = np.random.rand(4000, INPUT_SHAPE[0], INPUT_SHAPE[1]).astype('float32') # Increased data size for realistic training
y = tf.keras.utils.to_categorical(np.random.randint(0, NUM_EMOTIONS, 4000), num_classes=NUM_EMOTIONS)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Reserve a small set for TFLite calibration
X_calib = X_train[:100]

print(f"   X_train shape: {X_train.shape}")
print(f"   X_calib (Calibration) shape: {X_calib.shape}")
print("-" * 70)


# --- 2. Defining the Lightweight 1D CNN-GRU Hybrid Model (Same as previous step) ---

def build_lightweight_cnn_gru_model(input_shape, num_classes):
    """
    Constructs a lightweight 1D CNN-GRU hybrid model optimized for low parameter count.
    """
    model = Sequential([
        # CNN BLOCK 1
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape, padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=4),
        Dropout(0.2),

        # CNN BLOCK 2
        Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        # GRU BLOCK (Lighter than LSTM)
        GRU(128, return_sequences=False),
        Dropout(0.3),

        # DENSE CLASSIFICATION BLOCK
        Dense(64, activation='relu'),
        Dropout(0.3),

        # OUTPUT LAYER
        Dense(num_classes, activation='softmax')
    ])
    return model

model = build_lightweight_cnn_gru_model(INPUT_SHAPE, NUM_EMOTIONS)
model.summary()
print(f"Total Trainable Parameters: {model.count_params()}")
print("-" * 70)


# --- 3. Compile and Train the Model with Callbacks ---

print(f"3. Starting Model Training for {EPOCHS} epochs...")

# Define Callbacks for robust training:
# 1. EarlyStopping: Stops training if validation loss doesn't improve after PATIENCE epochs.
early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

# 2. ModelCheckpoint: Saves the model's weights only when 'val_loss' achieves a new minimum.
model_checkpoint = ModelCheckpoint(SAVED_MODEL_PATH,
                                   monitor='val_loss',
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='min',
                                   verbose=1)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train,
                    epochs=EPOCHS,
                    batch_size=32,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stopping, model_checkpoint],
                    verbose=1)

print("-" * 70)
print(f"Training finished. Best model saved to: {SAVED_MODEL_PATH}")


# --- 4. Post-Training TFLite Conversion (No Quantization) ---
# Let's try a standard TFLite conversion first to see if that works.

# Function to generate a representative dataset (still needed for some non-quantized conversions with certain ops)
def representative_dataset_gen():
    """Generator function to provide input data samples for TFLite converter."""
    for i in range(X_calib.shape[0]):
        yield [X_calib[i:i+1]]


print("4. Starting TFLite Conversion (No Quantization)...")

# 4a. Load the best Keras model saved during training
best_model = tf.keras.models.load_model(SAVED_MODEL_PATH)

# 4b. Initialize the TFLite Converter
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)

# 4c. Set supported ops (using default or specifying built-ins and select TF ops)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
# converter.representative_dataset = representative_dataset_gen # Representative dataset not strictly needed for non-quantized, but can help with some ops. Leaving commented for now.
converter._experimental_lower_tensor_list_ops = False # Keep this disabled for now

# 4d. Convert the model
tflite_float_model = converter.convert()

# 4e. Save the TFLite model
with open(TFLITE_MODEL_PATH, 'wb') as f:
    f.write(tflite_float_model)

# 4f. Report results
original_size = os.path.getsize(SAVED_MODEL_PATH) / 1024 / 1024
float_size = os.path.getsize(TFLITE_MODEL_PATH) / 1024 / 1024
print("-" * 70)
print(f"TFLite Float Conversion Complete!")
print(f"Original Keras Model Size: {original_size:.2f} MB")
print(f"Float TFLite Model Size: {float_size:.2f} MB")
print(f"Model saved to: {TFLITE_MODEL_PATH}")

1. Generating Placeholder Data for Training and Calibration...
   X_train shape: (3200, 200, 39)
   X_calib (Calibration) shape: (100, 200, 39)
----------------------------------------------------------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Total Trainable Parameters: 93510
----------------------------------------------------------------------
3. Starting Model Training for 50 epochs...
Epoch 1/50
[1m 99/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 36ms/step - accuracy: 0.1620 - loss: 1.8831
Epoch 1: val_loss improved from inf to 1.79844, saving model to emotion_detector_cnn_gru_best.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - accuracy: 0.1621 - loss: 1.8824 - val_accuracy: 0.1587 - val_loss: 1.7984
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.1751 - loss: 1.8075
Epoch 2: val_loss did not improve from 1.79844
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - accuracy: 0.1751 - loss: 1.8076 - val_accuracy: 0.1600 - val_loss: 1.8162
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.1913 - loss: 1.7902
Epoch 3: val_loss did not improve fro

In [8]:
# ==============================================================================
# PHASE 2: STEP 3 - Natural Language Generation (NLG) Setup
#
# This script defines the structure for the NLG component and sets up a pipeline
# for generating high-quality, empathetic training data using the Gemini API.
#
# Libraries required: requests, json, time, numpy
# NOTE: The Gemini API call requires a running environment/key.
# ==============================================================================

import requests
import json
import time
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# --- Configuration and API Setup ---
API_KEY = "" # Leave the API key as an empty string for the environment to provide it
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent?key=" + API_KEY
# The six emotion classes from our SER model (Step 2)
EMOTION_CLASSES = ['Neutral', 'Happy', 'Sadness', 'Anger', 'Fear', 'Disgust']

# --- 1. Define the LLM Persona and Task for Generating Training Data ---
SYSTEM_PROMPT = """
You are a highly empathetic and clinically informed AI companion, specializing in anxiety and stress reduction.
Your goal is to provide a single, concise (under 20 words), friendly, and action-oriented response to the user's emotion and context.
Always acknowledge the emotion first, then offer a simple, immediate suggestion (like breathing, a change of focus, or validation).
DO NOT use flowery language. Maintain a calm, friendly, and firm tone.
"""

# --- 2. Function for Generating High-Quality Training Sample ---

def generate_training_sample(emotion, user_context):
    """
    Uses the Gemini API to generate an ideal empathetic response for a given emotion/context.
    This response will be used as the target (Y_NLG) for fine-tuning our lightweight model.
    """

    # Construct the user query for the LLM
    user_query = f"The user is expressing '{emotion}'. The user said: '{user_context}'. Please provide the friendly, supportive response."

    # Payload structure for the API call
    payload = {
        "contents": [{"parts": [{"text": user_query}]}],
        "systemInstruction": {"parts": [{"text": SYSTEM_PROMPT}]},
        "tools": [{"google_search": {}}], # Use grounding for general knowledge on crisis/coping
        "config": {
            "temperature": 0.6,
            "maxOutputTokens": 40 # Limit output to ensure conciseness
        }
    }

    # Implement Exponential Backoff for robust API calling
    for attempt in range(5):
        try:
            response = requests.post(API_URL, headers={'Content-Type': 'application/json'}, data=json.dumps(payload))
            response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)

            result = response.json()
            # Extract the generated text
            generated_text = result.get('candidates', [{}])[0].get('content', {}).get('parts', [{}])[0].get('text', 'Error: Generation failed.')

            return generated_text.strip()

        except requests.exceptions.RequestException as e:
            if attempt < 4:
                # Wait for 2^attempt seconds before retrying
                wait_time = 2 ** attempt
                # print(f"API call failed: {e}. Retrying in {wait_time}s...") # Uncomment for debugging
                time.sleep(wait_time)
            else:
                # print(f"API call failed after 5 attempts.") # Uncomment for debugging
                return "I'm having trouble connecting right now, but remember to breathe."
    return "Generation Error."

# --- 3. Mock Data Generation Example ---

print("3. Generating Empathy Training Data Sample...")

# Mock outputs from our SER model (Step 2)
mock_emotion = 'Fear'
mock_context = "I can't believe I have to do this presentation. My heart is racing and I feel sick."

# Generate the ideal, high-quality response
ideal_response = generate_training_sample(mock_emotion, mock_context)

print("-" * 70)
print(f"Detected Emotion: {mock_emotion}")
print(f"User Context: {mock_context}")
print(f"Ideal Training Target (NLG Response): {ideal_response}")
print("-" * 70)

# --- 4. Mockup of Lightweight NLG Model Input/Output ---

# The final lightweight NLG model will be fine-tuned on thousands of these (Context + Emotion -> Response) pairs.
# Input to the on-device NLG model: [Tokenized_Context] + [One_Hot_Emotion_Vector]
# Output from the on-device NLG model: [Tokenized_Response]

print("4. Final Data Structure for Lightweight Model Fine-Tuning:")

# Mock Input and Output for a small, fine-tuned Seq2Seq model
# (Note: Tokenization/Padding is required before training the small model)
mock_input = f"[{mock_emotion}] {mock_context}"
print(f"NLG Model Input (Source Sequence): {mock_input}")
print(f"NLG Model Output (Target Sequence): {ideal_response}")
print("-" * 70)
print("Next steps will involve using thousands of these generated pairs to fine-tune a small text model (e.g., T5-small) for on-device deployment.")


3. Generating Empathy Training Data Sample...
----------------------------------------------------------------------
Detected Emotion: Fear
User Context: I can't believe I have to do this presentation. My heart is racing and I feel sick.
Ideal Training Target (NLG Response): I'm having trouble connecting right now, but remember to breathe.
----------------------------------------------------------------------
4. Final Data Structure for Lightweight Model Fine-Tuning:
NLG Model Input (Source Sequence): [Fear] I can't believe I have to do this presentation. My heart is racing and I feel sick.
NLG Model Output (Target Sequence): I'm having trouble connecting right now, but remember to breathe.
----------------------------------------------------------------------
Next steps will involve using thousands of these generated pairs to fine-tune a small text model (e.g., T5-small) for on-device deployment.
