# Model Performance Analysis

This notebook evaluates the performance of the forecasting models and analyzes their prediction accuracy.

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from scipy.stats import pearsonr

# Add src directory to path
import sys
sys.path.append('../')

# Import our modules
from src.data_loader import fetch_data, prepare_dataset
from src.model import build_model

## Load Data

In [None]:
# Load configuration
import yaml
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Fetch data
ticker = config['data']['tickers'][1]  # S&P500
start_date = config['data']['start_date']
end_date = config['data']['end_date']

print(f"Fetching data for {ticker} from {start_date} to {end_date}")
data = fetch_data(ticker, start_date, end_date)
print(f"Data shape: {data.shape}")

# Display the first few rows
data.head()

## Prepare Dataset

In [None]:
# Prepare dataset with a sliding window
window_size = 60
X, y = prepare_dataset(data, window_size)

# Split dataset into training and testing sets (80/20 split)
split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Testing set: {X_test.shape}, {y_test.shape}")

## Train Model

In [None]:
# Build and train LSTM model
model = build_model(
    input_shape=(X_train.shape[1], X_train.shape[2]),
    lstm_units=config['model']['lstm_units'],
    dropout_rate=config['model']['dropout'],
    model_type='LSTM'
)

# Train model
history = model.fit(
    X_train, y_train,
    epochs=config['model']['epochs'],
    batch_size=config['model']['batch_size'],
    validation_data=(X_test, y_test),
    verbose=1
)

## Plot Training History

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Model Training History')
plt.legend()
plt.grid(True)
plt.show()

## Evaluate Model Performance

In [None]:
# Generate predictions
y_pred = model.predict(X_test).flatten()
y_true = y_test

# Calculate error metrics
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
corr, _ = pearsonr(y_true, y_pred)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print(f"Correlation Coefficient: {corr:.4f}")

## Visualize Predictions

In [None]:
# Create a DataFrame with actual and predicted values
test_data = data.iloc[split_index + window_size:].copy()
test_data = test_data.iloc[:len(y_pred)]
test_data['Predicted'] = y_pred

# Plot actual vs predicted prices
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, test_data['Price'], label='Actual Price')
plt.plot(test_data.index, test_data['Predicted'], label='Predicted Price', alpha=0.7)
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Actual vs Predicted Price')
plt.legend()
plt.grid(True)
plt.show()

## Prediction Error Analysis

In [None]:
# Calculate prediction errors
test_data['Error'] = test_data['Price'] - test_data['Predicted']
test_data['Percent_Error'] = (test_data['Error'] / test_data['Price']) * 100

# Plot error distribution
plt.figure(figsize=(12, 10))

# Error over time
plt.subplot(2, 1, 1)
plt.plot(test_data.index, test_data['Error'])
plt.axhline(y=0, color='r', linestyle='-')
plt.xlabel('Date')
plt.ylabel('Error')
plt.title('Prediction Error Over Time')
plt.grid(True)

# Error histogram
plt.subplot(2, 1, 2)
sns.histplot(test_data['Percent_Error'], kde=True)
plt.xlabel('Percent Error (%)')
plt.ylabel('Frequency')
plt.title('Prediction Error Distribution')
plt.grid(True)

plt.tight_layout()
plt.show()

## Direction Prediction Accuracy

In [None]:
# Calculate price changes
test_data['Actual_Change'] = test_data['Price'].diff()
test_data['Predicted_Change'] = test_data['Predicted'].diff()

# Determine if direction prediction was correct
test_data['Direction_Match'] = (test_data['Actual_Change'] * test_data['Predicted_Change']) > 0

# Calculate accuracy
direction_accuracy = test_data['Direction_Match'].mean()
print(f"Direction Prediction Accuracy: {direction_accuracy:.4f} ({direction_accuracy*100:.2f}%)")

# Confusion matrix for direction prediction
actual_up = test_data['Actual_Change'] > 0
predicted_up = test_data['Predicted_Change'] > 0

true_up = (actual_up & predicted_up).sum()
false_up = (~actual_up & predicted_up).sum()
true_down = (~actual_up & ~predicted_up).sum()
false_down = (actual_up & ~predicted_up).sum()

print("\nDirection Prediction Confusion Matrix:")
print(f"True Up: {true_up}, False Up: {false_up}")
print(f"True Down: {true_down}, False Down: {false_down}")

## Save the Model

In [None]:
# Save the model
os.makedirs('../models', exist_ok=True)
model_path = '../models/lstm_model'
model.save(model_path)
print(f"Model saved to {model_path}")