In [1]:
import pandas as pd
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate, Bidirectional, GRU 
from keras import Model
from keras import optimizers
from tensorflow import keras

In [2]:
data_full = pd.read_csv('Intraday Volatility Dataset.csv')

In [3]:
X = data_full[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]]
Y = data_full["target"]
data_set = data_full[["Date", "Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility", "target"]]

In [4]:
splitlimit = int(len(data_set)*0.8)
training_features, test_data = data_set[:splitlimit], data_set[splitlimit:]

In [5]:

training_features["volatility_rolling_median"] = training_features["Daily Volatility"].rolling(window=41, center=True, min_periods=1).median()
training_features["return_squared_rolling_median"] = training_features["Return_Squared"].rolling(window=41, center=True, min_periods=1).median()
training_features["return_rolling_median"] = training_features["Return"].rolling(window=41, center=True, min_periods=1).median()
training_features["EMAF_rolling_median"] = training_features["EMAF"].rolling(window=41, center=True, min_periods=1).median()
training_features["volume_rolling_median"] = training_features["Volume"].rolling(window=41, center=True, min_periods=1).median()

training_features["volatility minus median"] = (training_features["Daily Volatility"] - training_features["volatility_rolling_median"]).abs()
training_features["return_squared minus median"] = (training_features["Return_Squared"] - training_features["return_squared_rolling_median"]).abs()
training_features["return minus median"] = (training_features["Return"] - training_features["return_rolling_median"]).abs()
training_features["EMAF minus median"] = (training_features["EMAF"] - training_features["EMAF_rolling_median"]).abs()
training_features["volume minus median"] = (training_features["Volume"] - training_features["volume_rolling_median"]).abs()

volatility_outliers_removed = training_features[~(training_features['volatility minus median'] > 5 * training_features['volatility minus median'].median())]
all_outliers_removed = volatility_outliers_removed[~(volatility_outliers_removed['return_squared minus median'] > 5 * volatility_outliers_removed['return_squared minus median'].median())]
all_outliers_removed = all_outliers_removed[~(all_outliers_removed['return minus median'] > 5 * volatility_outliers_removed['return minus median'].median())]
all_outliers_removed = all_outliers_removed[~(all_outliers_removed['EMAF minus median'] > 5 * volatility_outliers_removed['EMAF minus median'].median())]
all_outliers_removed = all_outliers_removed[~(all_outliers_removed['volume minus median'] > 5 * volatility_outliers_removed['volume minus median'].median())]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_features["volatility_rolling_median"] = training_features["Daily Volatility"].rolling(window=41, center=True, min_periods=1).median()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_features["return_squared_rolling_median"] = training_features["Return_Squared"].rolling(window=41, center=True, min_periods=1).median()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydat

In [6]:
X_cleaned = all_outliers_removed[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]]
Y_cleaned = all_outliers_removed["target"]
data_set_cleaned = all_outliers_removed[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility", "target"]]

In [7]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
training_data_features_scaled = scaler.fit_transform(X_cleaned)
data_set_scaled = scaler.fit_transform(data_set_cleaned)

In [8]:
Z = []

backcandles = 10

for j in range(5):
    Z.append([])
    for i in range(backcandles, training_data_features_scaled.shape[0]):
        Z[j].append(training_data_features_scaled[i-backcandles:i, j])

In [9]:
Z = np.moveaxis(Z, [0], [2])
Z, yi = np.array(Z), np.array(data_set_scaled[backcandles-1:, -1])
y_final = np.reshape(yi,(len(yi),1))
y_final = y_final[1:]

In [10]:
#Random Search and Walk-Forward Cross-Validation


from keras.models import load_model
from sklearn.model_selection import ParameterSampler
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Activation
from sklearn.model_selection import cross_val_score, RandomizedSearchCV
import numpy as np
from scipy.stats import randint, uniform


def create_model(units=80):
    lstm_input = Input(shape=(backcandles, 5), name='lstm_input')
    inputs = GRU(units, name='first_layer')(lstm_input)
    inputs = Dense(1, name='dense_layer')(inputs)
    output = Activation('sigmoid', name='output')(inputs)
    model = Model(inputs=lstm_input, outputs=output)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model



#hyperparameter space
param_dist = {
    'units': randint(50, 150),  
    'batch_size': [16, 32, 64],  
    'epochs': randint(10,30), 
}


best_score = -np.inf  # Initialize best score
best_params = None  # Initialize best parameters
best_model_path = "best_model.h5"  # Path to save the best model

n_iter = 50  # Number of iterations for random search
tscv = TimeSeriesSplit(n_splits=5)
best_score = -np.inf 
best_params = None  

for params in ParameterSampler(param_dist, n_iter=n_iter):
    fold_scores = []  # Store scores for each fold
    
    for train_index, test_index in tscv.split(Z):
        # Split data
        X_train_fold, X_val_fold = Z[train_index], Z[test_index]
        y_train_fold, y_val_fold = y_final[train_index], y_final[test_index]
        
        # Create model
        model = create_model(units=params['units'])
        
        # Compile model with the chosen hyperparameters
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
        
        # Fit the model
        model.fit(X_train_fold, y_train_fold, epochs=params['epochs'], batch_size=params['batch_size'], verbose=1)
        
        # Evaluate the model
        _, score = model.evaluate(X_val_fold, y_val_fold, verbose=0)
        fold_scores.append(score)
    
    # Compute the average score across all folds
    avg_score = np.mean(fold_scores)
    
    # If the current model's score is better, update best score, parameters, and save the model
    if avg_score > best_score:
        best_score = avg_score
        best_params = params
        
        # Save the current best model
        model.save(best_model_path)
        print(f"New best model saved with score: {avg_score}")
        
# After the search
print(f"Best Score: {best_score}")
print(f"Best Parameters: {best_params}")


2024-04-19 18:11:56.671233: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2024-04-19 18:11:56.671358: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoch 18/23
Epoch 19/23
Epoch 20/23
Epoch 21/23
Epoch 22/23
Epoch 23/23
Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoch 18/23
Epoch 19/23
Epoch 20/23
Epoch 21/23
Epoch 22/23
Epoch 23/23
Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoch 18/23
Epoch 19/23
Epoch 20/23
Epoch 21/23
Epoch 22/23
Epoch 23/23
Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoc

Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/2

Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoc

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21


Epoch 21/21
Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 1

Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27
Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27
Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27
Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 2

Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27
Best Score: 0.6454545378684997
Best Parameters: {'batch_size': 32, 'epochs': 23, 'units': 80}


In [11]:
# Load the best model
best_model = load_model(best_model_path)

In [12]:
X_test = test_data[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]]
Y_test = test_data["target"]
test_dataset = test_data[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility", "target"]]

In [13]:
#Scaling test data
test_scaled = scaler.fit_transform(test_dataset)
X_test_scaled = scaler.fit_transform(X_test)

In [14]:
#reconstructing test data 


T = []

backcandles = 10

for j in range(5):
    T.append([])
    for i in range(backcandles, X_test_scaled.shape[0]):
        T[j].append(X_test_scaled[i-backcandles:i, j])
        
        
T = np.moveaxis(T, [0], [2])
T, yi_test = np.array(T), np.array(test_scaled[backcandles-1:, -1])
y_final_test = np.reshape(yi_test,(len(yi_test),1))
y_final_test = y_final_test[1:]

In [15]:
#GRU in sample 
from sklearn.metrics import confusion_matrix

validation_predictions = best_model.predict(Z)
validation_predicted_classes = (validation_predictions > 0.5).astype(int)
dataframe_val = pd.DataFrame(y_final, columns = ["target"])
dataframe_val["predicted"] = validation_predicted_classes
cm = confusion_matrix(dataframe_val['predicted'], dataframe_val['target'])
print(cm)

[[739 348]
 [191 440]]


In [16]:
#GRU out of sample
test_predictions = best_model.predict(T)
test_predicted_classes = (test_predictions > 0.5).astype(int)
dataframe = pd.DataFrame(y_final_test, columns = ["target"])
dataframe["predicted"] = test_predicted_classes
cm = confusion_matrix(dataframe['predicted'], dataframe['target'])
print(cm)

[[392 217]
 [ 94 242]]


In [1]:
#ROC Curve 
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from matplotlib import cm
import numpy as np


# Calculate ROC curve, AUC, and thresholds
fpr, tpr, thresholds = roc_curve(dataframe['target'], test_predictions)

roc_auc = auc(fpr, tpr)

roc_auc = auc(fpr, tpr)

# Print the AUC
print(f"AUC: {roc_auc:.4f}")

# Create the plot
plt.figure(figsize=(10, 8))
cmap = cm.get_cmap('viridis')  # Choose a colormap

# The scatter plot for the ROC points
sc = plt.scatter(fpr, tpr, c=thresholds, cmap=cmap, edgecolor='none', s =70)

# Optionally, print the thresholds alongside FPR and TPR for inspection
for f, t, thresh in zip(fpr, tpr, thresholds):
    print(f"Threshold: {thresh:.2f}, 1-Specificity: {f:.2f}, Sensitivity: {t:.2f}")

# Plotting the ROC curve
plt.plot(fpr, tpr, color='black', lw=1, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.gca().tick_params(axis='x', labelsize=15)
plt.gca().tick_params(axis='y', labelsize=15)
plt.xlabel('1-Specificity', fontsize=20)
plt.ylabel('Sensitivity', fontsize=20)


# Adding colorbar with custom font for the label
cbar = plt.colorbar(sc)
cbar.set_label('Threshold', size=18)
cbar.ax.tick_params(labelsize=15)

plt.legend(loc="lower right", fontsize= 15)
plt.savefig('ROC GRU.jpg', format='jpg', dpi=300, bbox_inches='tight')
plt.show()


NameError: name 'dataframe' is not defined