# Dynamic Emotion Prediction with LSTMs

This notebook focuses on training a dynamic emotion prediction model using a Long Short-Term Memory (LSTM) network. Unlike static models that predict a single emotion value for an entire track, dynamic models predict how emotion (valence and arousal) evolves over time.

### Pipeline:
1.  **Load Data**: Load time-series features and corresponding dynamic emotion annotations.
2.  **Preprocess Data**: Create sequences from the time-series data suitable for an LSTM.
3.  **Build Model**: Construct an LSTM model using Keras/TensorFlow.
4.  **Train Model**: Train the model on the sequenced data.
5.  **Evaluate Model**: Evaluate the model's performance on a test set.
6.  **Visualize Results**: Plot the predicted vs. actual emotion values over time for a sample song.

In [None]:
# 2. Setup: Import Libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob

# Setup plotting style
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (15, 7)

print("TensorFlow Version:", tf.__version__)

In [None]:
# 3. Data Loading
ANNOTATIONS_PATH_1 = "../dataset/DEAM/annotations/annotations per song/dynamic_annotations_averaged_songs_1_2000.csv"
ANNOTATIONS_PATH_2 = "../dataset/DEAM/annotations/annotations per song/dynamic_annotations_averaged_songs_2000_2058.csv"
FEATURES_DIR = "../selected"

def load_dynamic_annotations():
    """Loads and merges dynamic annotation files."""
    try:
        ann1 = pd.read_csv(ANNOTATIONS_PATH_1)
        ann2 = pd.read_csv(ANNOTATIONS_PATH_2)
        
        # Clean column names
        ann1.columns = ann1.columns.str.strip()
        ann2.columns = ann2.columns.str.strip()
        
        annotations = pd.concat([ann1, ann2], ignore_index=True)
        
        # Scale annotations from [1,9] to [-1,1]
        for col in ['valence_mean', 'arousal_mean']:
            annotations[col] = (annotations[col] - 5.0) / 4.0
            
        annotations = annotations.rename(columns={
            'valence_mean': 'valence',
            'arousal_mean': 'arousal'
        })
        
        annotations['song_id'] = annotations['song_id'].astype(str)
        return annotations
    except Exception as e:
        print(f"Error loading annotations: {e}")
        return None

dynamic_annotations_df = load_dynamic_annotations()

if dynamic_annotations_df is not None:
    print("Dynamic annotations loaded successfully.")
    display(dynamic_annotations_df.head())

In [None]:
# 4. Data Processing: Create Sequences for LSTM

def create_sequences(features, labels, sequence_length=10):
    """Creates overlapping sequences of features and labels."""
    X, y = [], []
    for i in range(len(features) - sequence_length):
        X.append(features[i:(i + sequence_length)])
        y.append(labels[i + sequence_length])
    return np.array(X), np.array(y)

def process_all_songs(feature_dir, annotations_df, sequence_length=10, max_songs=50):
    """Processes all songs to create a dataset of sequences."""
    all_X, all_y = [], []
    
    feature_files = glob(os.path.join(feature_dir, "*_selected.csv"))[:max_songs]
    
    for file_path in feature_files:
        song_id = os.path.basename(file_path).split('_')[0]
        
        # Get features and labels for the current song
        song_features_df = pd.read_csv(file_path)
        song_labels_df = annotations_df[annotations_df['song_id'] == song_id]
        
        if song_labels_df.empty:
            continue
            
        # Align features and labels based on time
        # This is a simplified alignment; a more robust solution would interpolate
        merged_data = pd.merge_asof(song_features_df.sort_values('frameTime'), 
                                    song_labels_df.sort_values('time'), 
                                    left_on='frameTime', 
                                    right_on='time', 
                                    direction='nearest')
        
        feature_cols = [col for col in song_features_df.columns if col != 'frameTime']
        label_cols = ['valence', 'arousal']
        
        features = merged_data[feature_cols].values
        labels = merged_data[label_cols].values
        
        # Create sequences for this song
        X_song, y_song = create_sequences(features, labels, sequence_length)
        
        if X_song.shape[0] > 0:
            all_X.append(X_song)
            all_y.append(y_song)
            
    return np.vstack(all_X), np.vstack(all_y)

# Create the dataset
# Using a subset of songs (max_songs=50) to keep processing time reasonable
X_seq, y_seq = process_all_songs(FEATURES_DIR, dynamic_annotations_df, sequence_length=10, max_songs=50)

print(f"Sequenced data shapes: X={X_seq.shape}, y={y_seq.shape}")

In [None]:
# 5. Model Training

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# Scale features
# Reshape data for scaler: from (n_samples, n_timesteps, n_features) to (n_samples * n_timesteps, n_features)
nsamples, nx, ny = X_train.shape
X_train_reshaped = X_train.reshape((nsamples * nx, ny))

scaler = StandardScaler()
X_train_scaled_reshaped = scaler.fit_transform(X_train_reshaped)

# Reshape back to original
X_train_scaled = X_train_scaled_reshaped.reshape(X_train.shape)

# Scale test data
nsamples_test, nx_test, ny_test = X_test.shape
X_test_reshaped = X_test.reshape((nsamples_test * nx_test, ny_test))
X_test_scaled_reshaped = scaler.transform(X_test_reshaped)
X_test_scaled = X_test_scaled_reshaped.reshape(X_test.shape)

print("Data scaled and ready for training.")
print(f"X_train_scaled shape: {X_train_scaled.shape}")

# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])),
    LSTM(32),
    Dense(2)  # Output layer for valence and arousal
])

model.compile(optimizer='adam', loss='mse')
model.summary()

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=64, validation_split=0.2, verbose=1)

In [None]:
# 6. Evaluation
y_pred = model.predict(X_test_scaled)

# Calculate R² score and RMSE
r2_valence = r2_score(y_test[:, 0], y_pred[:, 0])
r2_arousal = r2_score(y_test[:, 1], y_pred[:, 1])
rmse_valence = np.sqrt(mean_squared_error(y_test[:, 0], y_pred[:, 0]))
rmse_arousal = np.sqrt(mean_squared_error(y_test[:, 1], y_pred[:, 1]))

print("Model Evaluation:")
print(f"Valence - R²: {r2_valence:.4f}, RMSE: {rmse_valence:.4f}")
print(f"Arousal - R²: {r2_arousal:.4f}, RMSE: {rmse_arousal:.4f}")