In [None]:
DATA_PATH = 'data/data_total_extended.csv'
FREQ = 'h'
YEAR = 2021
SEASONAL_PERIODS = [24,168,24*365]
FOURIER_ORDER = 7
ORDER = (4,1,4)
OUTPUT_PATH = f'data/predictions_aft_{YEAR}.csv'

In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

In [None]:
data = pd.read_csv(DATA_PATH, parse_dates=['timestamp'])
data = data.set_index('timestamp').asfreq(FREQ).dropna()
data['year'] = data.index.year

train = data[(data['year'] < YEAR) & (data['year'] > YEAR - 5)]
test = data[data['year'] == YEAR]

ts = train['value']
t = np.arange(len(ts))
exo = pd.DataFrame({f'{p}_sin_{i}': np.sin(2*np.pi*i*t/p)
                    for p in SEASONAL_PERIODS for i in range(1,FOURIER_ORDER+1)},
                   index=ts.index)
exo = exo.assign(**{f'{p}_cos_{i}': np.cos(2*np.pi*i*t/p)
                    for p in SEASONAL_PERIODS for i in range(1,FOURIER_ORDER+1)})


In [None]:
m = ARIMA(ts, order=ORDER, exog=exo).fit()

tf = np.arange(len(ts), len(ts)+len(test))
exo_f = pd.DataFrame({f'{p}_sin_{i}': np.sin(2*np.pi*i*tf/p)
                      for p in SEASONAL_PERIODS for i in range(1,FOURIER_ORDER+1)},
                     index=test.index)
exo_f = exo_f.assign(**{f'{p}_cos_{i}': np.cos(2*np.pi*i*tf/p)
                         for p in SEASONAL_PERIODS for i in range(1,FOURIER_ORDER+1)})

y_pred = m.forecast(steps=len(test), exog=exo_f).values


In [None]:
out = test.copy()
out['value'] = y_pred
out.to_csv(OUTPUT_PATH, index=False)