[pmdarima](https://alkaline-ml.com/pmdarima/usecases/stocks.html)

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pmdarima as pm
from pmdarima.arima import ndiffs
from pmdarima.metrics import smape
from collections import defaultdict

In [2]:
# Make plot directories
for dirname in ['arima/']:
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

In [3]:
# Import price data
df = pd.read_csv('prices.txt', engine='python', sep='   ', header=None, names=[f"stock{i}" for i in range(50)])
df

Unnamed: 0,stock0,stock1,stock2,stock3,stock4,stock5,stock6,stock7,stock8,stock9,...,stock40,stock41,stock42,stock43,stock44,stock45,stock46,stock47,stock48,stock49
0,13.46,71.65,48.46,50.52,52.10,13.00,18.98,47.71,69.49,49.96,...,32.64,55.76,14.46,58.94,36.71,52.62,49.33,36.22,49.00,56.09
1,13.48,72.10,48.52,50.50,52.06,12.95,18.95,47.84,69.73,49.93,...,32.52,55.97,14.44,59.81,36.64,52.58,49.20,36.27,48.84,56.08
2,13.47,72.35,48.48,50.62,51.80,12.79,18.98,47.98,69.60,49.33,...,32.48,56.34,14.50,59.04,36.89,52.49,49.48,36.39,48.56,55.90
3,13.53,72.51,48.42,50.75,51.66,12.66,18.96,48.74,69.54,49.67,...,32.59,56.32,14.40,58.73,36.94,52.40,49.42,36.41,49.00,56.14
4,13.64,71.99,48.40,50.65,51.97,12.62,18.89,48.88,69.68,49.46,...,32.64,56.32,14.36,59.01,37.03,52.44,49.79,36.42,48.14,55.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,10.32,63.28,45.54,43.35,52.64,7.91,17.30,55.30,67.83,58.73,...,28.81,79.78,10.85,61.76,30.12,48.51,82.09,34.95,35.80,52.57
746,10.32,63.34,45.56,43.28,52.73,7.87,17.26,54.92,67.95,58.62,...,28.78,80.46,10.77,61.49,29.85,48.40,81.43,34.99,35.58,53.10
747,10.32,63.23,45.55,43.25,52.66,7.83,17.32,54.67,67.94,59.15,...,28.73,81.15,10.75,60.36,29.77,48.41,81.90,35.00,35.45,53.21
748,10.20,63.12,45.56,43.19,52.51,7.71,17.33,55.18,67.99,59.64,...,28.73,82.22,10.81,59.24,29.61,48.42,81.33,35.19,35.57,53.01


In [13]:
nInst = 50
cache = defaultdict(None)

def apply_arima(prices, stock_name):
    """
    Apply ARIMA model to predict stock prices.

    Parameters:
    - prices: Series, input stock data

    Returns:
    - prediction: int, ARIMA-predicted stock price
    """
    if stock_name in cache:
        model = cache[stock_name]
        model.update(prices.iloc[-1])
    else:
        kpss_diffs = ndiffs(prices, alpha=0.05, test='kpss', max_d=20)
        adf_diffs = ndiffs(prices, alpha=0.05, test='adf', max_d=20)
        n_diffs = max(adf_diffs, kpss_diffs)

        model = pm.auto_arima(prices, d=n_diffs, seasonal=True, stepwise=True,
                            suppress_warnings=True, error_action='ignore',
                            max_p=10, max_q=10,
                            max_order=None, trace=True)
        cache[stock_name] = model

    fc, conf_int = model.predict(n_periods=1, return_conf_int=True)

    return fc.tolist()[0], np.asarray(conf_int).tolist()[0]

# arima_pred = apply_arima(df['stock0'], 'stock0')
# print(arima_pred)
# print(df['stock0'].iloc[-1])

In [14]:
smapes = []

# for stock_name in df.columns:
for stock_name in ['stock0']:
    prcSoFar = pd.DataFrame(df[stock_name])

    # Apply ARIMA predictions
    forecasts, confidence_intervals = [], []
    for t in range(500, 750):
        prcHistSoFar = prcSoFar.iloc[:t]
        new_fc, new_conf_int = apply_arima(prcHistSoFar, stock_name)
        forecasts.append(new_fc)
        confidence_intervals.append(new_conf_int)

    smape_error = smape(prcSoFar.iloc[500:750], forecasts)
    smapes.append(smape_error)

    forecasts = np.array(forecasts)
    actuals = prcSoFar.iloc[-250:].values.squeeze()
    difference = np.subtract(forecasts, actuals)

    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.plot(forecasts, color='red', label='Forecast')
    ax1.plot(actuals, color='blue', label='Actual')
    ax1.set_ylabel('Price')

    ax2 = ax1.twinx()
    ax2.plot(difference, color='black', label='Difference', linestyle='--', alpha=0.2)
    ax2.set_yticks(np.arange(-1.0, 1.5, 0.5))

    plt.title(stock_name)
    fig.legend(fancybox=True, framealpha=0.5)
    fig.tight_layout()
    plt.savefig(f'arima/{stock_name}.png')
    plt.close(fig)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-1019.497, Time=0.62 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-1018.602, Time=0.04 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-1017.723, Time=0.04 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-1017.930, Time=0.09 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-1020.601, Time=0.06 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-1016.929, Time=0.15 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 1.007 seconds


In [9]:
smapes = pd.DataFrame(smapes)
smapes.max()

0    0.789703
dtype: float64