In [None]:
import pandas as pd
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate, Bidirectional, GRU 
from keras import Model
from keras import optimizers
from tensorflow import keras

In [None]:
data_full = pd.read_csv('Hourly Volatility Dataset.csv')

In [None]:
shuffled_data_full = data_full.sample(frac=1, random_state=42)

In [None]:
#Train-test split
splitlimit = int(len(data_full)*0.8)
training_features, test_data = shuffled_data_full[:splitlimit], shuffled_data_full[splitlimit:]

In [None]:
X = shuffled_data_full[["Return_Squared", "Hourly Volatility"]]
Y = shuffled_data_full["target"]
data_set = shuffled_data_full[["Return_Squared", "Hourly Volatility", "target"]]

In [None]:
#Outlier Detection in training_data_features

training_features["hourly_volatility_rolling_median"] = training_features["Hourly Volatility"].rolling(window=41, center=True, min_periods=1).median()
training_features["return_squared_rolling_median"] = training_features["Return_Squared"].rolling(window=41, center=True, min_periods=1).median()
training_features["volatility minus median"] = (training_features["Hourly Volatility"] - training_features["hourly_volatility_rolling_median"]).abs()
training_features["return minus median"] = (training_features["Return_Squared"] - training_features["return_squared_rolling_median"]).abs()
volatility_outliers_removed = training_features[~(training_features['volatility minus median'] > 5 * training_features['volatility minus median'].median())]
both_outliers_removed = volatility_outliers_removed[~(volatility_outliers_removed['return minus median'] > 5 * volatility_outliers_removed['return minus median'].median())]

In [None]:
X_cleaned = both_outliers_removed[["Return_Squared", "Hourly Volatility"]]
Y_cleaned = both_outliers_removed["target"]
data_set_cleaned = both_outliers_removed[["Return_Squared", "Hourly Volatility", "target"]]

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
training_data_features_scaled = scaler.fit_transform(X_cleaned)
data_set_scaled = scaler.fit_transform(data_set_cleaned)

In [None]:
#Reconstructing training data 

Z = []

backcandles = 15

for j in range(2):
    Z.append([])
    for i in range(backcandles, training_data_features_scaled.shape[0]):
        Z[j].append(training_data_features_scaled[i-backcandles:i, j])
        
Z = np.moveaxis(Z, [0], [2])
Z, yi = np.array(Z), np.array(data_set_scaled[backcandles-1:, -1])
y_final = np.reshape(yi,(len(yi),1))
y_final = y_final[:-1]

In [None]:
#GRU model
lstm_input = Input(shape = (backcandles, 2), name = 'lstm_input')

inputs = LSTM(80, name='first_layer')(lstm_input)

inputs = Dense(1, name='dense_layer')(inputs)

output = Activation('sigmoid', name = 'output')(inputs)

model = Model(inputs = lstm_input, outputs = output)

model.compile(optimizer="adam", loss = "binary_crossentropy", metrics = ["accuracy"])

history = model.fit(x=Z, y=y_final, epochs = 30, validation_data = (Z, y_final))

In [None]:
history.history.keys() 
plt.plot(history.history['val_accuracy'], color = 'aqua', label = "Validation Accuracy")
plt.plot(history.history['accuracy'], color = 'royalblue', label = "Training Accuracy")
plt.xlabel('Number of Epochs', size = 12)
plt.ylabel('Accuracy', size = 12)
plt.legend()
#plt.savefig('GRU on hourly data.png')
plt.show()