# LSTM Model Development With Adam Optimization

# Import Libraries and Root Configuration

In [1]:
""" Configure the utilities module path for imports """
import sys
import os
from pathlib import Path

# get project root as parent of current working directory
project_root = Path(os.getcwd()).parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [2]:
""" Import libraries to develop XGBoost model """
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from src.utilities import StockDataProcessor, Evaluator, ModelPersister

# Feature and Training Setup

## Artifacts Setup

In [3]:
# read dataset from file
file = Path(r'../data/AAPL_preprocessed.csv')
data = pd.read_csv(file)

In [4]:
# split data into train, validation, and test sets
train, test = StockDataProcessor.time_based_split(data)

# feature scaling
scaler = MinMaxScaler()
y_scaled = scaler.fit_transform(data[['Close']])

SEQ_LEN = 10

In [5]:
# scale train and test data
train_scale = y_scaled[:len(train)]
test_scale = y_scaled[len(train):]

In [6]:
# create sequences
processor = StockDataProcessor()

x_train, y_train = processor.create_sequences(train_scale, seq_length=SEQ_LEN)
x_test, y_test = processor.create_sequences(test_scale, seq_length=SEQ_LEN)

In [7]:
# reshape sequences for LSTM input
x_train = x_train.reshape((x_train.shape[0], SEQ_LEN, 1))
x_test = x_test.reshape((x_test.shape[0], SEQ_LEN, 1))

# Model Training with Hyperparameter Optimization

In [8]:
# define different LSTM hyperparameter configurations for experimentation.
lstm_configs = [
    {'units': 50, 'dropout': 0.2, 'lr': 0.001},
    {'units': 100, 'dropout': 0.3, 'lr': 0.001},
    {'units': 50, 'dropout': 0.2, 'lr': 0.0005},
    {'units': 100, 'dropout': 0.3, 'lr': 0.0005}
]

best_val_loss = float('inf')
best_config = None
best_model = None

In [9]:
# iterate through each  LSTM configuration to train and evaluation models
for config in lstm_configs:
    model = Sequential([
        LSTM(config['units'], return_sequences=True, input_shape=(SEQ_LEN, 1)),
        Dropout(config['dropout']),
        LSTM(config['units'], return_sequences=False),
        Dropout(config['dropout']),
        Dense(1)
    ])

    model.compile(optimizer=Adam(learning_rate=config['lr']), loss='mse')
    history = model.fit(x_train, y_train, validation_data=(x_test, y_test),
                        epochs=50, batch_size=32, callbacks=[EarlyStopping(patience=10)], verbose=0)
    
    if min(history.history['val_loss']) < best_val_loss:
        best_val_loss = min(history.history['val_loss'])
        best_config = config
        best_model = model

## Apply Model to make predictions

In [10]:
model = best_model

# train set prediction
train_pred_scaled = model.predict(x_train).flatten()
train_pred = scaler.inverse_transform(train_pred_scaled.reshape(-1, 1)).flatten()

# test set prediction
test_pred_scaled = model.predict(x_test).flatten()
test_pred = scaler.inverse_transform(test_pred_scaled.reshape(-1, 1)).flatten()

[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


## Evaluating The Model Performance

In [11]:
# align with actual
y_train_al = data['Close'].iloc[SEQ_LEN:len(train)].values
y_test_al = data['Close'].iloc[len(train) + SEQ_LEN:].values

train_pred = train_pred[-len(y_train_al):]
test_pred = test_pred[-len(y_test_al):]

In [12]:
# return MSE, MAE, RMSE, R2 and MAPE results as a list
train_metrics = Evaluator.calculate_metrics(y_train_al, train_pred)
test_metrics = Evaluator.calculate_metrics(y_test_al, test_pred)

In [13]:
# unpack metrics
train_mse, train_mae, train_rmse, train_r2, train_mape = train_metrics
test_mse, test_mae, test_rmse, test_r2, test_mape = test_metrics

In [14]:
#  train vs test performance
model_performance = Evaluator.print_evaluation_tables("LSTM", train_metrics, test_metrics)
model_performance

--- Performance Comparison: Train vs Test (LSTM) ---


Unnamed: 0,Metric,Training,Test
0,MSE,7.029,75.864
1,MAE,1.478,7.595
2,RMSE,2.651,8.71
3,R2 Score,0.996,0.919
4,MAPE,2.503,4.117


# Cross-Validation, Summary and Overfitting Analysis

## Cross Validation with TimeSeriesSplit

In [15]:
# TimeSeriesSplit CV on training set as diagnostic.
tscv = TimeSeriesSplit(n_splits=5)

for fold, (train_idx, test_idx) in enumerate(tscv.split(x_train), start=1):
    x_tr, x_tst = x_train[train_idx], x_train[test_idx]
    y_tr, y_tst = y_train[train_idx], y_train[test_idx]

    pred = np.full_like(y_tst, y_tr[-1])
    cv_metrics = Evaluator.calculate_metrics(y_tst, pred)

In [16]:
# unpack metrics
cv_mse, cv_mae, cv_rmse, cv_r2, cv_mape = cv_metrics

# cross validation performance
model_cv = pd.DataFrame({
    'Model': ['LSTM'],
    'CV_MSE': [cv_mse],
    'CV_MAE': [cv_mae],
    'CV_RMSE':[cv_rmse],
    'CV_R2': [cv_r2],
    'CV_MAPE': [cv_mape]
}).round(3)

In [17]:
print("Cross-Validation Metrics (Training folds):")
model_cv

Cross-Validation Metrics (Training folds):


Unnamed: 0,Model,CV_MSE,CV_MAE,CV_RMSE,CV_R2,CV_MAPE
0,LSTM,0.007,0.07,0.086,-0.191,12.897


## Summary of the Model Performance

In [18]:
perf_summary = pd.DataFrame({
    'Metrics' : ['MSE', 'MAE', 'RMSE', 'R2-Score', 'MAPE'],
    'Train': train_metrics,
    'CV': cv_metrics,
    'Test': test_metrics
}).round(3)

In [19]:
print("=== Summary of The Model Evaluation ===")
perf_summary

=== Summary of The Model Evaluation ===


Unnamed: 0,Metrics,Train,CV,Test
0,MSE,7.029,0.007,75.864
1,MAE,1.478,0.07,7.595
2,RMSE,2.651,0.086,8.71
3,R2-Score,0.996,-0.191,0.919
4,MAPE,2.503,12.897,4.117


## Overfitting Analysis

In [20]:
# Overfitting analysis (compare CV_RMSE to Test_RMSE)
overfit = {
    'Model': 'LSTM',
    'CV_RMSE': float(cv_rmse),
    'Test_RMSE': float(test_rmse),
    'RMSE_Increase': float(test_rmse - cv_rmse) if (not np.isnan(cv_rmse) and not np.isnan(test_rmse)) else np.nan,
    'Overfitting_Ratio': float(test_rmse / (cv_rmse + 1e-8)) if not np.isnan(cv_rmse) else np.nan
}

overfit_df = pd.DataFrame([overfit]).round(3)

In [21]:
print("=== Overfitting Analysis (LSTM Model) ===")
overfit_df

=== Overfitting Analysis (LSTM Model) ===


Unnamed: 0,Model,CV_RMSE,Test_RMSE,RMSE_Increase,Overfitting_Ratio
0,LSTM,0.086,8.71,8.624,101.567


In [22]:
# aggrageted model performance
agg_perf = pd.DataFrame({
    'Model': ['LSTM'],
    'Test MAE' : test_mae,
    'Test R2-Score': test_r2,
    'Test MAPE' : test_mape,
    'CV MAE' : cv_mae,
    'CV R2' : cv_r2,
    'CV MAPE' : cv_mape,
    'RMSE Increase' : overfit.get('RMSE_Increase', np.nan),
    'Overfitting Ratio' : overfit.get('Overfitting_Ratio', np.nan)
}).round(3)

In [23]:
agg_perf

Unnamed: 0,Model,Test MAE,Test R2-Score,Test MAPE,CV MAE,CV R2,CV MAPE,RMSE Increase,Overfitting Ratio
0,LSTM,7.595,0.919,4.117,0.07,-0.191,12.897,8.624,101.567


# Model Performance and Persistence

In [24]:
# model persistor object
persister = ModelPersister(model_name="LSTM")

In [25]:
# aggregate model performance
persister.aggregated_performance(agg_perf)

Appended to aggregated performance: ..\artifacts\model-performance\a_ModelPerformance.csv


In [26]:
# save ariXGBoost model performance
persister.save_performance(perf_summary)

LSTM performance saved: ..\artifacts\model-performance\lstmPerformance.csv


In [27]:
# save overfitting analysis
persister.append_overfitting(overfit_df)

Appended to overfitting analysis: ..\artifacts\model-performance\a_overfittingAnalysis.csv


In [28]:
# save model
persister.save_model(model)

Model saved: ..\artifacts\models/lstm.pkl
