# Spectral methods / digital signal processing for noise removal

In this notebook we explore Fourier and Wavelets methods for denosing financial time series data. The goal is to determine if a denoised signal provides improved forecasting results. Some considerations and difficulties with this approach:
- you cant denoise the the entire signal, then split the denoised signal into a testing and training set. This would cause information leakage - ie using testing set knowledge in the transformation. 
    - potential work around is to denoise testing set only and then predict on noisy set
    - what would be the result of this? Perhaps smoother predictions, but these smoother predictions may not results in improved evaluation metrics such as RMSE or DA
- perhaps we can simply add the denoised signal as another feature to create a multivariate problem; feature 1 = original signal, feature 2 = denoised signal

In [None]:
# import scipy fft functions
from scipy.fft import fft, ifft, fftfreq

# interactive figures
%matplotlib widget 

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# ml training code
from one_dimensional_time_series_forecasting import time_series_prediction
from one_dimensional_time_series_forecasting import hit_rate
from one_dimensional_time_series_forecasting import invert_scaling

# model evalution metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

# data preprocessing
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler

import random

# testing environment

In [None]:
# model and forecast
window_length = 200
split = 600 

####################################################################################
# scale and transform data
####################################################################################
scaler = MinMaxScaler()
scaled_training_data = scaler.fit_transform(df['In-Game'][0:-split].to_numpy().reshape(-1,1)).flatten()
scaled_test_data = scaler.transform(df['In-Game'][-split:].to_numpy().reshape(-1,1)).flatten()
data = np.append(scaled_training_data,scaled_test_data)

####################################################################################
# model and forecast
####################################################################################

# initialize class object
normal = time_series_prediction(df['DateTime'],data,window_length,1)# pass: time series dates, univariate time series, lag window length, a number of steps ahead to predict
normal.sliding_window_1(verbose=0) # time series to supervised ML problem
normal.train_test_split(split=split) # testing and training dataset split
normal.test_train_plot(steps=150,ylabel='In Game')    # visualize training split

# perform some prediction tasks
normal.naive_model()
normal.linear_regression()
normal.support_vector_machine(model_tunning=True)
normal.neural_net_mlp(model_tunning=True)

####################################################################################
# visualize results
####################################################################################

normal.vis_results_time_series(second_plot='error',ylabel='In Game')

####################################################################################
# tabulate results
####################################################################################
tabulated_results_0 = normal.results()
tabulated_results_0.plot(figsize=(10,4))
display(tabulated_results_0)

####################################################################################
# calculate directional accuracy
####################################################################################

# data to feed to hit_rate function:
dates = tabulated_results_0['date'].iloc[split+window_length:]
original_values = tabulated_results_0['Value'].iloc[split+window_length:]
lin_predictions = tabulated_results_0['Linear'].iloc[split+window_length:]
svm_predictions = tabulated_results_0['SVM'].iloc[split+window_length:]
nn_predictions =  tabulated_results_0['NN'].iloc[split+window_length:]
naive_predictions =  tabulated_results_0['Naive'].iloc[split+window_length:]

# hit rate calculations
print('Linear Regression:')
df_lin = hit_rate(dates,original_values,lin_predictions)

print('SVM:')
df_svm = hit_rate(dates,original_values,svm_predictions)

print('NN:')
df_nn = hit_rate(dates,original_values,nn_predictions)

print('Naive:')
df_naive = hit_rate(dates,original_values,naive_predictions)