# ARIMA Model Development With Auto Arima Optimization

# Import Libraries and Root Configuration

In [1]:
""" Configure the utilities module path for imports """
import sys
import os
from pathlib import Path

# get project root as parent of current working directory
project_root = Path(os.getcwd()).parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [2]:
""" Import libraries to develop ARIMA model """
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from pmdarima import auto_arima
from sklearn.model_selection import TimeSeriesSplit
from src.utilities import StockDataProcessor, Evaluator, ModelPersister
import joblib

# Model Training and Evaluation

## Artifacts Setup

In [3]:
# read dataset from file
file = Path(r'../data/AAPL_preprocessed.csv')
data = pd.read_csv(file)

In [4]:
# split data into train, validation, and test sets
train, test = StockDataProcessor.time_based_split(data)

y_train = train['Close'].copy()
y_test = test['Close'].copy()

## Train Model

In [5]:
# train and fit the auto_arima model
model =  auto_arima(y_train.values,
                    seasonal=False,
                    stepwise=True,
                    suppress_warnings=True,
                    error_action='ignore',
                    max_p=5,
                    max_d=5,
                    max_q=5,
                    random_state=42)

In [6]:
# model summary (coefficients information)
model.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0490,0.024,2.046,0.041,0.002,0.096
ma.L1,-0.0687,0.009,-7.695,0.000,-0.086,-0.051
sigma2,1.7148,0.018,97.208,0.000,1.680,1.749


## Apply Model to Make Prediction

### Make Prediction on Train Set

In [7]:
# In-sample predictions: align predictions to actual training index safely
try:
    sample_pred = model.predict_in_sample()
    sample_pred = np.asarray(sample_pred, dtype=float)

except Exception as e:
    # if predict_in_sample fails, fallback to fittedvalues if available, else NaNs
    print("predict_in_sample() failed with:", e)
    
    try:
        sample_pred = model.arima_res_.fittedvalues  # may be available
        sample_pred = np.asarray(sample_pred, dtype=float)
    
    except Exception:
        sample_pred = np.full(len(y_train), np.nan, dtype=float)

In [8]:
# ALIGNMENT: ensure we compare true and predicted with the same length and order
if len(sample_pred) == len(y_train):
    train_pred = y_train.values

else:
    # prefer tail-alignment: predict_in_sample often aligns to the end of fitted sample
    train_pred = y_train.values[-len(sample_pred):]
    print(f"In-sample prediction length {len(sample_pred)} differs from train length {len(y_train)}; aligned on tail.")

In [9]:
# Evaluate on training (in-sample) using your Evaluator (truth first, preds second)
train_metrics = Evaluator.calculate_metrics(train_pred, sample_pred)

### Make Prediction on Test Set

In [10]:
n_test = len(y_test)
test_pred = model.predict(n_periods=n_test)
test_pred = np.asarray(test_pred, dtype=float)

# Align forecast to test index if test has index
if isinstance(y_test, pd.Series) and y_test.index is not None:
    test_pred_series = pd.Series(test_pred, index=y_test.index[:len(test_pred)])

else:
    test_pred_series = pd.Series(test_pred)

In [11]:
# test metrics evaluation
test_metrics = Evaluator.calculate_metrics(y_test.values[:len(test_pred)], test_pred)

In [12]:
# show a table of actual vs pred
comparison = pd.DataFrame({
    'y_actual': y_test.values[:len(test_pred)],
    'y_pred': test_pred
})

comparison.head(10)

Unnamed: 0,y_actual,y_pred
0,137.987091,140.370557
1,138.144409,140.419509
2,141.349289,140.468461
3,147.110229,140.517413
4,146.323761,140.566365
5,146.195969,140.615318
6,148.653702,140.66427
7,142.922241,140.713222
8,143.669403,140.762174
9,146.195969,140.811126


In [13]:
# Train vs Test Performance
model_performance = Evaluator.print_evaluation_tables("ARIMA", train_metrics, test_metrics)
model_performance

--- Performance Comparison: Train vs Test (ARIMA) ---


Unnamed: 0,Train,Test
MSE,1.772,1013.019
MAE,0.694,24.663
RMSE,1.331,31.828
R2,0.999,-0.071
MAPE,1.297,12.54


## Time-Series Cross-Validation on Training Set

In [14]:
# TimeSeriesSplit CV on training set as diagnostic.
tscv = TimeSeriesSplit(n_splits=5)
cv_metrics_list = []

for fold, (train_idx, val_idx) in enumerate(tscv.split(y_train.values), start=1):
    y_trn = y_train.values[train_idx]
    y_test = y_train.values[val_idx]
    
    try:
        cv_model = auto_arima(
            y_trn,
            seasonal=False,
            stepwise=True,
            suppress_warnings=True,
            error_action='warn',
            max_p=3, max_d=2, max_q=3,
            random_state=42
        )

        cv_pred = cv_model.predict(n_periods=len(y_test))
    
    except Exception as e:
        # Log the error and use persistence forecast as fallback, but keep running
        print(f"Fold {fold}: auto_arima failed with {e}; using persistence fallback.")
        cv_pred = np.full(shape=len(y_test), fill_value=float(y_trn[-1]), dtype=float)

    metrics = Evaluator.calculate_metrics(y_test, cv_pred)
    cv_metrics_list.append(metrics)

In [15]:
# Aggregate CV metrics (mean for each metric)
cv_agg = {k: float(np.nanmean([m[k] for m in cv_metrics_list])) for k in cv_metrics_list[0].keys()}
model_cv = pd.DataFrame(cv_agg, index=["ARIMA"]).round(3)
model_cv.columns = ['CV_MSE', 'CV_MAE', 'CV_RMSE', 'CV_R2', 'CV_MAPE']

In [16]:
print("Cross-Validation Metrics (Training folds):")
model_cv

Cross-Validation Metrics (Training folds):


Unnamed: 0,CV_MSE,CV_MAE,CV_RMSE,CV_R2,CV_MAPE
ARIMA,3911.595,32.545,37.569,-12.031,36.658


## Summary of The Model Evaluation

In [17]:
perf_summary = pd.DataFrame({
    'Train': train_metrics,
    'CV': cv_agg,
    'Test': test_metrics
}).round(3)

In [18]:
print("=== Summary of The Model Evaluation ===")
perf_summary

=== Summary of The Model Evaluation ===


Unnamed: 0,Train,CV,Test
MSE,1.772,3911.595,1013.019
MAE,0.694,32.545,24.663
RMSE,1.331,37.569,31.828
R2,0.999,-12.031,-0.071
MAPE,1.297,36.658,12.54


# Overfitting Analysis

In [19]:
# Overfitting analysis (compare CV_RMSE to Test_RMSE)
cv_rmse = cv_agg.get('RMSE', cv_agg.get('RMSE', np.nan))  # double fallback
test_rmse = test_metrics.get('RMSE') if isinstance(test_metrics, dict) else (test_metrics['RMSE'] if hasattr(test_metrics,'__getitem__') else np.nan)

overfit = {
    'Model': 'ARIMA',
    'CV_RMSE': float(cv_rmse),
    'Test_RMSE': float(test_rmse),
    'RMSE_Increase': float(test_rmse - cv_rmse) if (not np.isnan(cv_rmse) and not np.isnan(test_rmse)) else np.nan,
    'Overfitting_Ratio': float(test_rmse / (cv_rmse + 1e-8)) if not np.isnan(cv_rmse) else np.nan
}

overfit_df = pd.DataFrame([overfit]).round(3)

In [20]:
print("=== Overfitting Analysis (ARIMA Model) ===")
overfit_df

=== Overfitting Analysis (ARIMA Model) ===


Unnamed: 0,Model,CV_RMSE,Test_RMSE,RMSE_Increase,Overfitting_Ratio
0,ARIMA,37.569,31.828,-5.741,0.847


# Model Performance and Persistence

In [21]:
# flattening datarames for persisting
model_cv_flat = {k: list(v.values())[0] for k, v in model_cv.to_dict().items()}
overfit_df_flat = {k: list(v.values())[0] for k, v in overfit_df.to_dict().items()}

In [22]:
model_perf = {'Model': 'ARIMA', **model_cv_flat, **test_metrics}
overfit_perf = {'Model': 'ARIMA', **overfit_df_flat}

# model persistor object
persister = ModelPersister(model_name="Arima")

In [23]:
# aggregate model performance
persister.aggregated_performance(model_perf)

Appended to aggregated performance: ..\artifacts\model-performance\a_ModelPerformance.csv


In [24]:
# save ariXGBoost model performance
persister.save_performance(perf_summary)

Arima performance saved: ..\artifacts\model-performance\arimaPerformance.csv


In [25]:
# save overfitting analysis
persister.append_overfitting(overfit_perf)

Appended to overfitting analysis: ..\artifacts\model-performance\a_overfittingAnalysis.csv


In [26]:
# save model
persister.save_model(model)

Model saved: ..\artifacts\models/arima.pkl
