In [17]:
import pandas as pd
import pandas_datareader as pdr
from pandas.tseries.offsets import BDay

import numpy as np

import plotly.graph_objects as go

from pmdarima import ARIMA

import pickle

In [18]:
# Set desiered parameters here and run all cells
# check models folder for pickle created file named after 'ticker'

ticker = 'AAPL' # Apple: AAPL, Google: GOOG, Amazon: AMZN
date_start =  '2005-06-01'
date_end = '2020-06-01'

predict_days = 28

In [19]:
# obtain data from yahoo
df = pdr.DataReader(ticker, data_source = 'yahoo', start = date_start, end = date_end)
df = df.sort_values(by='Date')
df.columns = [val.lower().replace(' ', '_') for val in df.columns]
train = df['adj_close']

In [20]:
# fit model
# if ConvergenceWarning occurs, try to increase maxiter
model1 = ARIMA(order=(2, 1, 4))
model1.fit(train, maxiter=100)


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.



ARIMA(maxiter=50, method='lbfgs', order=(2, 1, 4), out_of_sample_size=0,
      scoring='mse', scoring_args=None, seasonal_order=(0, 0, 0, 0),
      with_intercept=True)

In [21]:
# predict in sample + 'predict_days' into the future
pred_vals_insample = model1.predict_in_sample(dynamic=False, return_conf_int=False)
pred_vals = model1.predict(n_periods=predict_days, return_conf_int=False)

In [22]:
# create dataframe
pred_vals_all = np.append(pred_vals_insample, pred_vals)

time_range_all = train.index[:].append(
                    pd.bdate_range(start=train.index[-1]+BDay(1),
                                   end=train.index[-1]+BDay(predict_days))
                    )
preds = pd.Series(pred_vals_all, index=time_range_all)
train.head()

Date
2005-06-01    4.983943
2005-06-02    4.951789
2005-06-03    4.729182
2005-06-06    4.689605
2005-06-07    4.518939
Name: adj_close, dtype: float64

In [23]:
# check if everything is ok
fig = go.Figure()
fig.add_trace(go.Scatter(x=preds.index, y=preds, name="pred"))
fig.add_trace(go.Scatter(x=train.index, y=train, name="train"))
fig.show()

In [24]:
# Serialize with Pickle
pickle_data = {"model": model1, 'preds_df': preds, 'train_df': train, 'ticker': ticker}
with open('models/'+ticker+'.pkl', 'wb') as pkl:
    pickle.dump(pickle_data, pkl)