In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
import mlflow
import mlflow.sklearn

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv("completed_Apples_stock price dataset.csv" )
df.shape

(50592, 12)

In [5]:
import json
try:
    with open("model_scores.json", "r") as f:
            final_scores = json.load(f)
except FileNotFoundError: 
    final_scores = {}   # start fresh if file doesn't exist

In [6]:
y = df['stock_price']
x = df[["nasdaq_index", "sp500_index", "inflation_rate", "unemployment_rate", "interest_rate", "market_sentiment"]]

In [7]:
# Example: 80% train, 20% test
train_size = int(len(y) * 0.8)
train, test = y[:train_size], y[train_size:]
train_exog, test_exog = x[:train_size], x[train_size:]
train_size, train.shape, test.shape, train_exog.shape, test_exog.shape

(40473, (40473,), (10119,), (40473, 6), (10119, 6))

In [8]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, root_mean_squared_error, r2_score, mean_absolute_error
# 7. Fit ARIMA model (example order, adjust p and q after ACF/PACF)
orders = [(1,1,1), (2,1,2), (3,1,1)]

best_config = None
best_aic = float("inf")
best_rmse = float("inf")
best_r2 = float("inf")
best_mae = float("inf")
#with mlflow.start_run(run_name="RMSE Comparison"):

for i,order in enumerate(orders):    
    arima_model = ARIMA(train, order=order, exog=train_exog)
    arima_model_fit = arima_model.fit()
    arima_forecast = arima_model_fit.forecast(steps=len(test), exog=test_exog)
    AIC  = arima_model_fit.aic
    BIC  = arima_model_fit.bic
    RMSE = root_mean_squared_error(test, arima_forecast)
    MAE = mean_absolute_error(test, arima_forecast)
    r2   = r2_score(test, arima_forecast)
    print(i, order, AIC, BIC, RMSE, r2)
    
    # Track best model
    if AIC < best_aic:
        best_aic = AIC
        best_rmse = RMSE
        best_r2 = r2
        best_mae = MAE
        best_config = (order)
    
    # --- Plot comparison ---
    plt.figure(figsize=(12,6))
    plt.plot(train.index, train, label="Train Data")
    plt.plot(test.index, test, label="Test Data", color="black")    
    plt.plot(test.index, arima_forecast, label="ARIMAX Forecast", color="red")
    plt.legend()
    plt.title(f" ARIMAX Forecast_{order}_{AIC}_{r2} ")
    plt.savefig(f"ARIMAX_{order}_{AIC}_{r2}.png")     
    #plt.show()
    plt.close() 

    with mlflow.start_run(run_name=f"ARIMAX_{order}"):
        mlflow.log_param("order", order)        
        mlflow.log_metric("AIC", float(AIC))
        mlflow.log_metric("BIC", float(BIC))
        mlflow.log_metric("RMSE", float(RMSE))
        mlflow.log_metric("r2Square", float(r2))
        mlflow.log_artifact(f"ARIMAX_{order}_{AIC}_{r2}.png") 
        mlflow.sklearn.log_model(arima_model, name="ARIMAX")
    
# Successfully fitted an AutoRegressive Integrated Moving Average (ARIMA) model.

0 (1, 1, 1) 266619.7236578196 266697.1989487234 35.51936301606545 0.13265723445059785


2026/01/31 08:33:59 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/31 08:33:59 INFO mlflow.store.db.utils: Updating database tables
2026/01/31 08:33:59 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/31 08:33:59 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/31 08:33:59 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/31 08:33:59 INFO alembic.runtime.migration: Will assume non-transactional DDL.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exce

1 (2, 1, 2) 266623.4031579351 266718.0951801508 35.495303155840894 0.13383186576008532
2 (3, 1, 1) 266616.79921800364 266711.49124021933 35.350584786374995 0.14088039866336077


In [9]:
final_scores["ARIMAX"] = {"AIC": best_aic, "RMSE": best_rmse, "R2": best_r2, "MAE": best_mae }

In [10]:
import json

# Save model score
with open("model_scores.json", "w") as f:
    json.dump(final_scores, f, indent=4)