[pmdarima](https://alkaline-ml.com/pmdarima/usecases/stocks.html)

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pmdarima as pm
from pmdarima.arima import ndiffs
from pmdarima.metrics import smape
from collections import defaultdict

In [2]:
# Make plot directories
for dirname in ['arima/']:
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

In [3]:
# Import price data
df = pd.read_csv('prices.txt', engine='python', sep='   ', header=None)
(nt, nInst) = df.shape

df_train, df_test = df.iloc[:500], df.iloc[500:]
print(df_train.shape)
print(df_test.shape)

(500, 50)
(250, 50)


In [12]:
model_cache = defaultdict(None)

def arima_train(stock_name, train_prices: pd.Series) -> None:
    global model_cache
    kpss_diffs = ndiffs(train_prices, alpha=0.05, test='kpss', max_d=20)
    adf_diffs = ndiffs(train_prices, alpha=0.05, test='adf', max_d=20)
    n_diffs = max(adf_diffs, kpss_diffs)

    model = pm.auto_arima(train_prices, d=n_diffs, seasonal=True, stepwise=True,
                        suppress_warnings=True, error_action='ignore',
                        max_p=10, max_q=10,
                        max_order=None, trace=True)
    model_cache[stock_name] = model

def arima_update(stock_name, observations) -> None:
    model_cache[stock_name].update(observations)

def arima_forecast(stock_name, window=10) -> None | tuple[np.array, np.array]:
    return model_cache[stock_name].predict(n_periods=window).tolist()

def get_all_smape_error(prcSoFar):
    smapes = np.zeros(50)
    for i in range(50):
        window_actual = prcSoFar.iloc[500:750]
        window_forecast = model_cache[i]
        smapes[i] = smape(window_actual, window_forecast)

    return smapes

def plot_arima(stock_name, forecasts: np.ndarray, actuals: np.ndarray):
    difference = np.subtract(forecasts, actuals)

    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.plot(forecasts, color='red', label='Forecast')
    ax1.plot(actuals, color='blue', label='Actual')
    ax1.set_ylabel('Price')

    ax2 = ax1.twinx()
    ax2.plot(difference, color='black', label='Difference', linestyle='--', alpha=0.2)
    ax2.set_yticks(np.arange(-1.0, 1.5, 0.5))

    plt.title(stock_name)
    fig.legend(fancybox=True, framealpha=0.5)
    fig.tight_layout()
    plt.savefig(f'arima/{stock_name}.png')
    plt.close(fig)

In [7]:
# Plot ARIMA forecast against actual data
def compare_arima_forecasts(stock):
    arima_train(stock, df_train[stock])

    forecasts = []
    for new_price in df_test[stock]:
        forecasts.append(arima_forecast(stock, window=1)[-1])
        arima_update(stock, new_price)

    plot_arima(stock, np.array(forecasts), df_test[stock].values)

for stock in df.columns:
    compare_arima_forecasts(stock)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-1019.492, Time=0.56 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-1018.602, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-1017.723, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-1017.930, Time=0.07 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-1020.601, Time=0.07 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-1016.929, Time=0.14 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.939 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=525.545, Time=0.58 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=520.944, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=522.702, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=522.695, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=519.171, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=523.031, Time=0.20 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]         

In [23]:
# Plot ARIMA forecast over a window against actual data
def compare_arima_forecasts_window(stock, window=1, duration=250):
    arima_train(stock, df_train[stock])

    forecasts = []
    previous = []
    for i, new_price in enumerate(df_test[stock]):
        if i >= duration:
            break

        if i % window == 0:
            if len(previous) > 0:
                arima_update(stock, previous)
                previous.clear()
            forecasts += arima_forecast(stock, window=window)
        previous.append(new_price)

    print(forecasts)
    plot_arima(stock, np.array(forecasts[:duration]), df_test[stock].iloc[:duration].values)

compare_arima_forecasts_window(0, window=1, duration=50)
# compare_arima_forecasts(0)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-1019.492, Time=0.63 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-1018.602, Time=0.07 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-1017.723, Time=0.07 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-1017.930, Time=0.11 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-1020.601, Time=0.08 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-1016.929, Time=0.23 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 1.188 seconds
[13.4, 13.29, 13.25, 13.42, 13.51, 13.47, 13.47, 13.56, 13.59, 13.45, 13.58, 13.57, 13.59, 13.73, 13.62, 13.46, 13.41, 13.43, 13.55, 13.46, 13.38, 13.42, 13.21, 13.21, 13.14, 13.12, 13.15, 13.24, 13.1, 13.01, 13.04, 12.93, 12.94, 12.89, 12.95, 12.94, 12.9, 12.8, 12.89, 12.81, 12.77, 12.91, 12.91, 12.94, 12.9, 12.84, 12.86, 12.82, 12.76, 12.75]


In [19]:
stock = 27
stock_name = f"stock{stock}"
window = 10

arima_train(df_train[stock], stock_name)

latest = df_train[stock].iloc[-1]
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[9]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")

latest = actual_price
arima_update(df_test[stock].iloc[:10], stock_name)
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[19]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")

latest = actual_price
arima_update(df_test[stock].iloc[10:20], stock_name)
forecast_price = arima_forecast(stock_name, window=10)
actual_price = df_test[stock].iloc[29]
print(f"forecast={forecast_price - latest} | actual={actual_price - latest}")