# Import Library

In [1]:
import os
import sys
import numpy as np
import pandas as pd

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)
    from models.ts_forecaster import TimeSeriesForecaster
    from models.ts_preprocess import generate_future_dataframe

# Load Dataset

In [2]:
df = pd.read_csv("sample_data.csv")
df = df.set_index("date")
df

Unnamed: 0_level_0,x1,x2,x3,y
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01,4.559453,2.980033,4.055621,4.214337
2016-01-02,4.210408,3.029542,3.152499,3.224680
2016-01-03,3.370900,3.370900,3.940435,5.128299
2016-01-04,3.598311,3.745475,3.153416,2.850409
2016-01-05,-8.227478,2.717077,4.155089,4.545156
...,...,...,...,...
2021-12-27,3.131693,23.821462,4.663600,21.241504
2021-12-28,3.226848,2.706707,3.980070,3.131693
2021-12-29,3.221490,3.199063,3.596113,3.422970
2021-12-30,3.838340,2.721457,4.174123,3.513204


# Define Steps

In [3]:
steps = 3
df_train = df.iloc[:-steps, :].copy()
df_train = df_train.loc[:, ["y"]]
df_train.index.max()

'2021-12-28'

In [4]:
df_future = generate_future_dataframe(cutoff="2021-12-28", freq="D", steps=steps)
df_future = df_future.set_index("date")

# Forecast

In [5]:
tsf = TimeSeriesForecaster()

## ARIMA

In [6]:
tsf.train(algo="arima", target="y", df=df_train)
tsf.forecast(df_future=df_future, steps=steps, freq="D", alpha=0.2)

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 2189
Model:               SARIMAX(3, 0, 3)   Log Likelihood               -5848.897
Date:                Sun, 12 Feb 2023   AIC                          11713.793
Time:                        21:41:39   BIC                          11759.323
Sample:                    01-01-2016   HQIC                         11730.434
                         - 12-28-2021                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      7.0104      0.375     18.706      0.000       6.276       7.745
ar.L1          0.1366      0.028      4.806      0.000       0.081       0.192
ar.L2          0.1817      0.027      6.736      0.0

In [7]:
tsf.df_fcst

Unnamed: 0,date,yhat,yhat_lower,yhat_upper
0,2021-12-28,,-0.246171,8.7539
1,2021-12-29,4.253865,-0.954069,8.05619
2,2021-12-30,3.551061,0.591901,9.606067


## Prophet

In [8]:
tsf.train(algo="prophet", target="y", df=df_train)
tsf.forecast(df_future=df_future, steps=steps, freq="D")

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)


Initial log joint probability = -9.18744
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      83       4454.98   4.88248e-05       72.9941    8.86e-07       0.001      142  LS failed, Hessian reset 
      99       4454.99   2.92613e-06        72.556      0.7876      0.7876      163   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     115       4454.99   3.78094e-07       77.2845      0.3107           1      186   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


In [9]:
tsf.df_fcst

Unnamed: 0,date,yhat,yhat_lower,yhat_upper
0,2021-12-28,4.306813,-0.033598,8.811585
1,2021-12-29,4.869196,0.047983,9.260378
2,2021-12-30,4.616652,0.070039,9.094439
