[pmdarima](https://alkaline-ml.com/pmdarima/usecases/stocks.html)

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pmdarima as pm
from pmdarima.arima import ndiffs
from pmdarima.metrics import smape
from collections import defaultdict

In [2]:
# Make plot directories
for dirname in ['arima/']:
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

In [17]:
# Import price data
df = pd.read_csv('prices.txt', engine='python', sep='   ', header=None)
(nt, nInst) = df.shape

df_train, df_test = df.iloc[:500], df.iloc[500:]
print(df_train.shape)
print(df_test.shape)

(500, 50)
(250, 50)


In [66]:
model_cache = defaultdict(None)

def arima_train(train_prices: pd.Series, stock_name: str) -> None:
    global model_cache
    kpss_diffs = ndiffs(train_prices, alpha=0.05, test='kpss', max_d=20)
    adf_diffs = ndiffs(train_prices, alpha=0.05, test='adf', max_d=20)
    n_diffs = max(adf_diffs, kpss_diffs)

    model = pm.auto_arima(train_prices, d=n_diffs, seasonal=True, stepwise=True,
                        suppress_warnings=True, error_action='ignore',
                        max_p=10, max_q=10,
                        max_order=None, trace=True)
    model_cache[stock_name] = model

def arima_update(observations: pd.Series, stock_name: str) -> None:
    model_cache[stock_name].update(observations)

def arima_forecast(stock_name, window=10) -> None | tuple[np.array, np.array]:
    return model_cache[stock_name].predict(n_periods=window).tolist()[-1]

stock = 27
stock_name = f"stock{stock}"
window = 10

arima_train(df_train[stock], stock_name)

latest = df_train[stock].iloc[-1]
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[9]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")

latest = actual_price
arima_update(df_test[stock].iloc[:10], stock_name)
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[19]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")

latest = actual_price
arima_update(df_test[stock].iloc[10:20], stock_name)
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[29]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-2846.542, Time=0.22 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-2805.526, Time=0.07 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-2852.508, Time=0.06 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-2847.567, Time=0.09 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-2773.538, Time=0.02 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=-2850.556, Time=0.07 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-2850.528, Time=0.05 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-2848.567, Time=0.25 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=-2837.783, Time=0.04 sec

Best model:  ARIMA(1,1,0)(0,0,0)[0] intercept
Total fit time: 0.869 seconds
forecast=-0.040903453140511914 | actual=-0.10000000000000142
forecast=-0.03319842070084533 | actual=-0.019999999999999574
forecast=-0.03727812789778184 | actual=-0.07000000000000028


In [18]:
def get_all_smape_error():
    smapes = np.zeros(50)
    for i in range(50):
        window_actual = prcSoFar.iloc[500:750]
        window_forecast = model_cache[i]
        smapes[i] = smape(window_actual, window_forecast)

    return smapes

def plot_arima(stock_name, forecasts, actuals):
    difference = np.subtract(forecasts, actuals)

    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.plot(forecasts, color='red', label='Forecast')
    ax1.plot(actuals, color='blue', label='Actual')
    ax1.set_ylabel('Price')

    ax2 = ax1.twinx()
    ax2.plot(difference, color='black', label='Difference', linestyle='--', alpha=0.2)
    ax2.set_yticks(np.arange(-1.0, 1.5, 0.5))

    plt.title(stock_name)
    fig.legend(fancybox=True, framealpha=0.5)
    fig.tight_layout()
    plt.savefig(f'arima/{stock_name}.png')
    plt.close(fig)