# 05. Model LSTM
LSTM model implementation.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, BatchNormalization, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import utils
import os

# Load Processed Data
processed_file = 'data/processed_data/processed_trip_data.csv'
if os.path.exists(processed_file):
    df_process = pd.read_csv(processed_file)
    df_process['start_date'] = df_process['start_date'].astype(str)
    print(f"Loaded processed data from {processed_file}")

    # Split
    unique_dates = sorted(df_process['start_date'].unique())
    split_idx = int(len(unique_dates) * 0.8)
    train_dates = unique_dates[:split_idx]
    test_dates = unique_dates[split_idx:]

    df_train = df_process[df_process['start_date'].isin(train_dates)].copy()
    df_test = df_process[df_process['start_date'].isin(test_dates)].copy()
else:
    print(f"File not found: {processed_file}. Please run 02_process_data.ipynb first.")
    df_process = None

In [None]:
# Create Sequences
if df_process is not None:
    n_past_trips = 5
    stops_dict = {}
    for rd_key in df_process['route_direction_key'].unique():
        rd_df = df_process[df_process['route_direction_key'] == rd_key]
        stops_dict[rd_key] = sorted(rd_df['stop_sequence'].unique())

    X_delays_train, X_features_train, X_agg_train, y_train, _, _, n_stops = utils.create_trip_based_sequences_multi_route(
        df_train, n_past_trips, stops_dict=stops_dict
    )
    X_delays_test, X_features_test, X_agg_test, y_test, _, _, _ = utils.create_trip_based_sequences_multi_route(
        df_test, n_past_trips, stops_dict=stops_dict
    )

In [None]:
# Scaling
if df_process is not None:
    delay_scaler = StandardScaler()
    X_delays_train_scaled = delay_scaler.fit_transform(X_delays_train.reshape(-1, n_stops)).reshape(X_delays_train.shape)
    X_delays_test_scaled = delay_scaler.transform(X_delays_test.reshape(-1, n_stops)).reshape(X_delays_test.shape)

    y_train_scaled = delay_scaler.transform(y_train)
    y_test_scaled = delay_scaler.transform(y_test)

    feature_scaler = StandardScaler()
    X_combined_train = np.concatenate([X_features_train, X_agg_train], axis=1)
    X_combined_test = np.concatenate([X_features_test, X_agg_test], axis=1)

    X_combined_train_scaled = feature_scaler.fit_transform(X_combined_train)
    X_combined_test_scaled = feature_scaler.transform(X_combined_test)

In [None]:
# Build LSTM Model
def build_trip_lstm_model(n_past_trips, n_stops, n_features):
    delay_input = Input(shape=(n_past_trips, n_stops), name='delay_input')
    x = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.001))(delay_input)
    x = Dropout(0.3)(x)
    x = LSTM(32, return_sequences=False, kernel_regularizer=l2(0.001))(x)
    x = Dropout(0.2)(x)

    feature_input = Input(shape=(n_features,), name='feature_input')
    f = Dense(32, activation='relu')(feature_input)
    f = Dropout(0.2)(f)
    f = Dense(16, activation='relu')(f)

    combined = Concatenate()([x, f])
    combined = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(combined)
    combined = BatchNormalization()(combined)
    combined = Dropout(0.3)(combined)
    combined = Dense(32, activation='relu')(combined)

    output = Dense(n_stops, activation='linear')(combined)

    model = Model(inputs=[delay_input, feature_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    return model

if df_process is not None:
    n_features = X_combined_train.shape[1]
    model = build_trip_lstm_model(n_past_trips, n_stops, n_features)
    model.summary()

In [None]:
# Train
if df_process is not None:
    history = model.fit(
        [X_delays_train_scaled, X_combined_train_scaled],
        y_train_scaled,
        validation_split=0.2,
        epochs=20,
        batch_size=64,
        verbose=1
    )

    # Evaluate
    y_pred_scaled = model.predict([X_delays_test_scaled, X_combined_test_scaled])
    y_pred = delay_scaler.inverse_transform(y_pred_scaled)

    mae_lstm = mean_absolute_error(y_test.flatten(), y_pred.flatten())
    r2_lstm = r2_score(y_test.flatten(), y_pred.flatten())

    print(f"LSTM MAE: {mae_lstm:.2f}, R2: {r2_lstm:.4f}")