In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

# Time Series Modeling (ARIMA Methods) Assignment

In [80]:
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

<IPython.core.display.Javascript object>

In [23]:
def ilinechart(df, x, y, groups=None, title=""):
    fig = px.line(df, x=x, y=y, color=groups, title=title, template="none").update(
        layout=dict(title=dict(x=0.5))
    )
    fig.show()

<IPython.core.display.Javascript object>

### Import the Amazon stock price data set (AMZN_data.csv) and plot a line chart with the observed daily closing prices.

In [4]:
data = pd.read_csv(
    "https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/AMZN_data.csv"
)

<IPython.core.display.Javascript object>

In [5]:
data.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,261.4,265.25,260.555,261.95,3879078,AMZN
1,2013-02-11,263.2,263.25,256.6,257.21,3403403,AMZN
2,2013-02-12,259.19,260.16,257.0,258.7,2938660,AMZN
3,2013-02-13,261.53,269.96,260.3,269.47,5292996,AMZN
4,2013-02-14,267.37,270.65,265.4,269.24,3462780,AMZN


<IPython.core.display.Javascript object>

In [7]:
data["date"] = pd.to_datetime(data["date"])

<IPython.core.display.Javascript object>

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1259 non-null   datetime64[ns]
 1   open    1259 non-null   float64       
 2   high    1259 non-null   float64       
 3   low     1259 non-null   float64       
 4   close   1259 non-null   float64       
 5   volume  1259 non-null   int64         
 6   Name    1259 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 69.0+ KB


<IPython.core.display.Javascript object>

### Run an Autoregression (AR) model on the series and add the results to a column in the dataframe.

In [22]:
ar_model = AutoReg(data["close"], lags=10).fit()
data["close AR"] = ar_model.predict()
data.tail()

Unnamed: 0,date,open,high,low,close,volume,Name,close AR
1254,2018-02-01,1445.0,1459.88,1385.14,1390.0,9113808,AMZN,1451.520275
1255,2018-02-02,1477.39,1498.0,1414.0,1429.95,11125722,AMZN,1394.020921
1256,2018-02-05,1402.62,1458.98,1320.72,1390.0,11494985,AMZN,1429.977517
1257,2018-02-06,1361.46,1443.99,1351.79,1442.84,11066819,AMZN,1398.315987
1258,2018-02-07,1449.0,1460.99,1415.15,1416.78,7162741,AMZN,1444.128294


<IPython.core.display.Javascript object>

### Plot a multi-line chart comparing the AR model's results with the observed values.

In [25]:
melted = pd.melt(data, id_vars="date", value_vars=["close", "close AR"])
melted

Unnamed: 0,date,variable,value
0,2013-02-08,close,261.950000
1,2013-02-11,close,257.210000
2,2013-02-12,close,258.700000
3,2013-02-13,close,269.470000
4,2013-02-14,close,269.240000
...,...,...,...
2513,2018-02-01,close AR,1451.520275
2514,2018-02-02,close AR,1394.020921
2515,2018-02-05,close AR,1429.977517
2516,2018-02-06,close AR,1398.315987


<IPython.core.display.Javascript object>

In [26]:
ilinechart(melted, "date", "value", groups="variable")

<IPython.core.display.Javascript object>

### Run a Moving Average (MA) model on the series and add the results to a column in the dataframe.

In [39]:
ma_model = ARMA(data["close"], order=(0, 1)).fit()
data["MA"] = ma_model.predict()

<IPython.core.display.Javascript object>

### Print the model summary and add the model's results to the line chart with observations and the AR model results.

In [40]:
print(ma_model.summary())

                              ARMA Model Results                              
Dep. Variable:                  close   No. Observations:                 1259
Model:                     ARMA(0, 1)   Log Likelihood               -8047.769
Method:                       css-mle   S.D. of innovations            144.300
Date:                Thu, 04 Jun 2020   AIC                          16101.538
Time:                        11:10:46   BIC                          16116.952
Sample:                             0   HQIC                         16107.331
                                                                              
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const         576.9836      8.063     71.561      0.000     561.181     592.786
ma.L1.close     0.9834      0.004    226.362      0.000       0.975       0.992
                                    Roots       

<IPython.core.display.Javascript object>

In [41]:
melted = pd.melt(data, id_vars="date", value_vars=["close", "close AR", "MA"])
melted

Unnamed: 0,date,variable,value
0,2013-02-08,close,261.950000
1,2013-02-11,close,257.210000
2,2013-02-12,close,258.700000
3,2013-02-13,close,269.470000
4,2013-02-14,close,269.240000
...,...,...,...
3772,2018-02-01,MA,1038.634605
3773,2018-02-02,MA,922.505592
3774,2018-02-05,MA,1075.988931
3775,2018-02-06,MA,885.772487


<IPython.core.display.Javascript object>

In [42]:
ilinechart(melted, "date", "value", groups="variable")

<IPython.core.display.Javascript object>

### Run an Autoregressive Moving Average (ARMA) model on the series and add the results to a column in the dataframe.

Hint: You will need to make the time series stationary in order to run the ARMA model on it. The most common way to do this is by differncing, or subtracting the previous observed value from the current one.

In [46]:
data["close difference"] = data["close"].diff()
data = data.iloc[1:]
data.head()

Unnamed: 0,date,open,high,low,close,volume,Name,close AR,MA,close difference
1,2013-02-11,263.2,263.25,256.6,257.21,3403403,AMZN,,419.488939,-4.74
2,2013-02-12,259.19,260.16,257.0,258.7,2938660,AMZN,,468.822973,1.49
3,2013-02-13,261.53,269.96,260.3,269.47,5292996,AMZN,,419.443041,10.77
4,2013-02-14,267.37,270.65,265.4,269.24,3462780,AMZN,,457.055734,-0.23
5,2013-02-15,267.63,268.92,263.11,265.09,3979832,AMZN,,420.551104,-4.15


<IPython.core.display.Javascript object>

In [48]:
arma_model = ARMA(data["close difference"], order=(0, 1)).fit()
data["ARMA"] = arma_model.predict()

<IPython.core.display.Javascript object>

### Print the model summary and generate a multi-line chart that compares the ARMA model's results with those from the AR and MA models.

Hint: You will need to de-difference the model results by adding the previous observed values to them.

In [50]:
print(arma_model.summary())

                              ARMA Model Results                              
Dep. Variable:       close difference   No. Observations:                 1258
Model:                     ARMA(0, 1)   Log Likelihood               -4734.799
Method:                       css-mle   S.D. of innovations             10.431
Date:                Thu, 04 Jun 2020   AIC                           9475.598
Time:                        11:15:36   BIC                           9491.010
Sample:                             0   HQIC                          9481.390
                                                                              
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0.9182      0.292      3.147      0.002       0.346       1.490
ma.L1.close difference    -0.0081      0.027     -0.294      0.769      -0.062       0.046
    

<IPython.core.display.Javascript object>

In [51]:
data.head()

Unnamed: 0,date,open,high,low,close,volume,Name,close AR,MA,close difference,ARMA
1,2013-02-11,263.2,263.25,256.6,257.21,3403403,AMZN,,419.488939,-4.74,
2,2013-02-12,259.19,260.16,257.0,258.7,2938660,AMZN,,468.822973,1.49,0.96391
3,2013-02-13,261.53,269.96,260.3,269.47,5292996,AMZN,,419.443041,10.77,0.913942
4,2013-02-14,267.37,270.65,265.4,269.24,3462780,AMZN,,457.055734,-0.23,0.838553
5,2013-02-15,267.63,268.92,263.11,265.09,3979832,AMZN,,420.551104,-4.15,0.926827


<IPython.core.display.Javascript object>

### Run an Autoregressive Integrated Moving Average (ARIMA) model on the series and add the results to a column in the dataframe.

In [53]:
arima_model = ARIMA(data["close"], order=(1, 1, 1)).fit()
data["ARIMA"] = arima_model.predict(typ="levels")

<IPython.core.display.Javascript object>

### Print the model summary and generate a multi-line chart that compares the ARIMA model's results to the observed values.

In [54]:
print(arima_model.summary())

                             ARIMA Model Results                              
Dep. Variable:                D.close   No. Observations:                 1257
Model:                 ARIMA(1, 1, 1)   Log Likelihood               -4730.780
Method:                       css-mle   S.D. of innovations             10.429
Date:                Thu, 04 Jun 2020   AIC                           9469.560
Time:                        11:20:35   BIC                           9490.106
Sample:                             1   HQIC                          9477.282
                                                                              
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.9237      0.290      3.187      0.001       0.356       1.492
ar.L1.D.close    -0.6758      0.276     -2.446      0.014      -1.217      -0.134
ma.L1.D.close     0.6512      0.283     

<IPython.core.display.Javascript object>

In [55]:
melted = pd.melt(data, id_vars="date", value_vars=["close", "close AR", "MA", "ARIMA"])
melted

Unnamed: 0,date,variable,value
0,2013-02-11,close,257.210000
1,2013-02-12,close,258.700000
2,2013-02-13,close,269.470000
3,2013-02-14,close,269.240000
4,2013-02-15,close,265.090000
...,...,...,...
5027,2018-02-01,ARIMA,1451.697314
5028,2018-02-02,ARIMA,1392.522688
5029,2018-02-05,ARIMA,1428.870776
5030,2018-02-06,ARIMA,1393.235218


<IPython.core.display.Javascript object>

In [56]:
ilinechart(melted, "date", "value", groups="variable")

<IPython.core.display.Javascript object>

### Run an Seasonal Autoregressive Integrated Moving Average (SARIMA) model on the series and add the results to a column in the dataframe.

In [67]:
sarima_model = SARIMAX(
    data["close"], order=(1, 1, 1), seasonal_order=(2, 2, 2, 2)
).fit()
data["SARIMA"] = sarima_model.predict()


Non-invertible starting seasonal moving average Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals



<IPython.core.display.Javascript object>

### Print the model summary and generate a multi-line chart that compares the SARMA model's results with the observed values.

In [63]:
sarima_model.summary()

0,1,2,3
Dep. Variable:,close,No. Observations:,1258.0
Model:,"SARIMAX(1, 1, 1)x(2, 2, [1, 2], 2)",Log Likelihood,-4766.95
Date:,"Thu, 04 Jun 2020",AIC,9547.9
Time:,11:26:02,BIC,9583.834
Sample:,0,HQIC,9561.407
,- 1258,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.9460,0.017,-54.570,0.000,-0.980,-0.912
ma.L1,0.9986,0.017,60.189,0.000,0.966,1.031
ar.S.L2,0.0280,0.026,1.095,0.273,-0.022,0.078
ar.S.L4,-0.0334,0.024,-1.373,0.170,-0.081,0.014
ma.S.L2,-1.8851,0.016,-117.455,0.000,-1.917,-1.854
ma.S.L4,0.8971,0.016,57.068,0.000,0.866,0.928
sigma2,115.7583,1.571,73.679,0.000,112.679,118.838

0,1,2,3
Ljung-Box (Q):,50.69,Jarque-Bera (JB):,20691.87
Prob(Q):,0.12,Prob(JB):,0.0
Heteroskedasticity (H):,4.54,Skew:,1.16
Prob(H) (two-sided):,0.0,Kurtosis:,22.77


<IPython.core.display.Javascript object>

In [64]:
melted = pd.melt(
    data, id_vars="date", value_vars=["close", "close AR", "MA", "ARIMA", "SARIMA"]
)
melted

Unnamed: 0,date,variable,value
0,2013-02-11,close,257.210000
1,2013-02-12,close,258.700000
2,2013-02-13,close,269.470000
3,2013-02-14,close,269.240000
4,2013-02-15,close,265.090000
...,...,...,...
6285,2018-02-01,SARIMA,1461.946741
6286,2018-02-02,SARIMA,1398.336841
6287,2018-02-05,SARIMA,1436.468756
6288,2018-02-06,SARIMA,1399.381657


<IPython.core.display.Javascript object>

In [65]:
ilinechart(melted, "date", "value", groups="variable")

<IPython.core.display.Javascript object>

### Run an Seasonal Autoregressive Integrated Moving Average with Exogenous Factors (SARIMAX) model on the series and add the results to a column in the dataframe.

Use the daily Open prices as the exogenous factors.

### Print the model summary and generate a multi-line chart that compares the SARMAX model's results with the SARIMA model's values.

### Evaluate the performance of all the models and compare to each other, using Mean Absolute Error and Root Mean Squared Error as your evaluation metrics.

In [76]:
data.columns

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'Name', 'close AR',
       'MA', 'close difference', 'ARMA', 'ARIMA', 'SARIMA'],
      dtype='object')

<IPython.core.display.Javascript object>

In [87]:
models = ["MA", "ARMA", "ARIMA", "SARIMA"]

for model in models:
    diff = data["close"] - data[model]
    mae = diff.abs().mean()
    rmse = np.sqrt(np.mean(diff ** 2))
    print("Model", ":", model, "|", "MAE", mae, "|", "RMSE", rmse)
    print("~" * 64, "\n")

Model : MA | MAE 124.42659103151567 | RMSE 144.41532243724936
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

Model : ARMA | MAE 576.4668711946457 | RMSE 641.8945891696234
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

Model : ARIMA | MAE 6.600041420406552 | RMSE 10.433777985446154
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

Model : SARIMA | MAE 7.384696801764013 | RMSE 14.752152002092995
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 



<IPython.core.display.Javascript object>