In [17]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skfda import FDataBasis
from skfda.preprocessing.dim_reduction import FPCA
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.arima.model import ARIMA

from utils.helpers import (forecast, monthly_vol,  # ARIMA(0, 1, 1) forecast
                           rrmse)
from utils.transformations import CODA

In [18]:
warnings.filterwarnings('ignore')

In [19]:
fd_df = pd.read_pickle("data/03-processed/fd_df.pickle")
y = pd.read_pickle("data/03-processed/y.pickle")

In [20]:
tscv = TimeSeriesSplit(n_splits=30, test_size=1)

In [21]:
# Create index
pred_index = [y.index[test_i].to_list()[0] for _, test_i in tscv.split(y)]
pred_index = pd.PeriodIndex(pred_index)

In [22]:
coda_scores = pd.Series(index=y.columns, dtype=float)

coda_preds = pd.DataFrame(index=pred_index, columns=y.columns, 
                          dtype=float)
y_on_pred_index = pd.DataFrame(index=pred_index, columns=y.columns,
                               dtype=float)

for column in fd_df.index:
    y_true = []
    y_pred = []
    for train_i, test_i in tscv.split(y):
        X_train = fd_df[column][train_i]
        y_test = y.iloc[test_i[0]][column]

        # Transform data through CODA algorithm
        coda = CODA()
        X_train_transformed = coda.fit_transform(X_train)

        # Fit FPCA on transformed data
        fpca = FPCA(n_components=7)
        scores = fpca.fit_transform(X_train_transformed)

        # Forecast FPCA scores
        scores_pred = np.apply_along_axis(forecast, axis=0, arr=scores)

        # Forecast transformed function
        X_transformed_pred = fpca.inverse_transform(scores_pred)

        # Inverse transform through CODA algorithm
        X_pred = coda.inverse_transform(X_transformed_pred)

        # Calculate monthly volatility from density forecast
        y_test_pred = np.array(monthly_vol(X_pred, test_i[0]))

        y_true.append(y_test)
        y_pred.append(y_test_pred)

        y_on_pred_index.loc[y.index[test_i][0], column] = y_test
        coda_preds.loc[y.index[test_i][0], column] = y_test_pred
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    coda_scores[column] = rrmse(y_true, y_pred)

In [24]:
# Save data and results
coda_scores.to_pickle("data/05-coda/coda_scores.pickle")
y_on_pred_index.to_pickle("data/05-coda/y_on_pred_index.pickle")
coda_preds.to_pickle("data/05-coda/coda_preds.pickle")