In [286]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skfda import FDataGrid
from statsmodels.nonparametric.kde import KDEUnivariate

In [287]:
plt.style.use("ggplot")

In [288]:
df = pd.read_pickle("data/02-preprocessed/return_df.pickle")
df.index = pd.to_datetime(df.index)

In [289]:
# Centre the data; get same result whether we centre before or
# after KDE but easier and more efficient to centre before
centre = lambda arr: arr - np.mean(arr, axis=0)
df1 = df.resample("M", group_keys=False).apply(centre)

# Convert each month of observations for each currency pair into a sample
to_sample = lambda arr: arr.values.tolist()
df1 = df1.resample("M", group_keys=False).agg(to_sample)

In [290]:
df1.index = df1.index.to_period("M")

In [291]:
# KDE params
params = {"kernel": "gau",
          "bw": "silverman",
          "fft": True,
          "gridsize": 1024,
          "cut": 3,
          "adjust": 1
          }

In [292]:
# Create DataFrame for functional data
fd_df = pd.Series(index=df1.columns, dtype=object)

In [293]:
# Create discrete domain for density functions
grid_points = np.linspace(-0.04, 0.04, params["gridsize"])

for column_name, column in df1.items():
    # Compute the KDEs, discretized onto grid_points
    data_matrix = []
    for _, sample in column.resample("M"):
        sample = sample[0]

        kde = KDEUnivariate(sample)
        kde.fit(**params)
        
        func = kde.evaluate(grid_points)
        data_matrix.append(func)
    data_matrix = np.array(data_matrix)

    # Threshold values for use in CODA - small enough to not affect integral
    data_matrix[data_matrix <= 10e-40] = 10e-40

    # Create FDataGrid
    fd_column = FDataGrid(grid_points=grid_points, data_matrix=data_matrix)

    fd_df[column_name] = fd_column

In [294]:
# Create monthly volatility target
monthly_vol = lambda arr: np.std(arr) * np.sqrt(arr.shape[0])

y = df.resample("M").apply(monthly_vol)
y.index = y.index.to_period("M")

In [295]:
# Save processed data
fd_df.to_pickle("data/03-processed/fd_df.pickle")
y.to_pickle("data/03-processed/y.pickle")