In [None]:
import pandas as pd
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate, Bidirectional 
from keras import Model
from keras import optimizers
from tensorflow import keras

In [None]:
data_full = pd.read_csv('Daily Volatility Dataset.csv')

In [None]:
X = data_full[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]]
Y = data_full["target"]
data_set = data_full[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility", "target"]]

In [None]:
splitlimit = int(len(data_set)*0.8)
training_features, test_data = data_set[:splitlimit], data_set[splitlimit:]

In [None]:
#Scaling the data
from sklearn.preprocessing import MinMaxScaler
scaler1 = MinMaxScaler()
scaler2 = MinMaxScaler()
training_data_features_scaled = scaler1.fit_transform(training_features[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]])
dataset_scaled = scaler2.fit_transform(training_features)

In [None]:
Z = []

backcandles = 10

for j in range(5):
    Z.append([])
    for i in range(backcandles, training_data_features_scaled.shape[0]):
        Z[j].append(training_data_features_scaled[i-backcandles:i, j])

In [None]:
Z = np.moveaxis(Z, [0], [2])

In [None]:
Z, yi = np.array(Z), np.array(dataset_scaled[backcandles-1:, -1])

In [None]:
y_final = np.reshape(yi,(len(yi),1))
y_final = y_final[:-1]

In [None]:
#Random Search and Walk-Forward Cross-Validation


from keras.models import load_model
from sklearn.model_selection import ParameterSampler
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Activation
from sklearn.model_selection import cross_val_score, RandomizedSearchCV
import numpy as np
from scipy.stats import randint, uniform


def create_model(units=80):
    lstm_input = Input(shape=(backcandles, 5), name='lstm_input')
    inputs = LSTM(units, name='first_layer')(lstm_input)
    inputs = Dense(1, name='dense_layer')(inputs)
    output = Activation('sigmoid', name='output')(inputs)
    model = Model(inputs=lstm_input, outputs=output)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model



#hyperparameter space
param_dist = {
    'units': randint(50, 150),  
    'batch_size': [16, 32, 64],  
    'epochs': randint(10,30), 
}


best_score = -np.inf  # Initialize best score
best_params = None  # Initialize best parameters
best_model_path = "best_model.h5"  # Path to save the best model

n_iter = 200  # Number of iterations for random search
tscv = TimeSeriesSplit(n_splits=5)
best_score = -np.inf 
best_params = None  

for params in ParameterSampler(param_dist, n_iter=n_iter):
    fold_scores = []  # Store scores for each fold
    
    for train_index, test_index in tscv.split(Z):
        # Split data
        X_train_fold, X_val_fold = Z[train_index], Z[test_index]
        y_train_fold, y_val_fold = y_final[train_index], y_final[test_index]
        
        # Create model
        model = create_model(units=params['units'])
        
        # Compile model with the chosen hyperparameters
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
        
        # Fit the model
        model.fit(X_train_fold, y_train_fold, epochs=params['epochs'], batch_size=params['batch_size'], verbose=1)
        
        # Evaluate the model
        _, score = model.evaluate(X_val_fold, y_val_fold, verbose=0)
        fold_scores.append(score)
    
    # Compute the average score across all folds
    avg_score = np.mean(fold_scores)
    
    # If the current model's score is better, update best score, parameters, and save the model
    if avg_score > best_score:
        best_score = avg_score
        best_params = params
        
        # Save the current best model
        model.save(best_model_path)
        print(f"New best model saved with score: {avg_score}")
        
# After the search
print(f"Best Score: {best_score}")
print(f"Best Parameters: {best_params}")


In [None]:
# Load the best model
best_model = load_model(best_model_path)

In [None]:
#reconstructing test data 

training_data_features_scaled = scaler1.transform(test_data[["Volume", "Return", "Return_Squared", "EMAF", "Daily Volatility"]])
training_dataset_scaled = scaler2.transform(test_data)

T = []

backcandles = 10

for j in range(5):
    T.append([])
    for i in range(backcandles, training_data_features_scaled.shape[0]):
        T[j].append(training_data_features_scaled[i-backcandles:i, j])
        
        
T = np.moveaxis(T, [0], [2])
T, yi_test = np.array(T), np.array(training_dataset_scaled[backcandles-1:, -1])
y_final_test = np.reshape(yi_test,(len(yi_test),1))
y_final_test = y_final_test[:-1]

In [None]:
test_predictions = best_model.predict(T)
test_predicted_classes = (test_predictions > 0.5).astype(int)
validation_predictions = best_model.predict(Z)
validation_predicted_classes = (validation_predictions > 0.5).astype(int)

In [None]:
y_frame = test_data[["target"]].tail(955- backcandles +1)
y_frame = y_frame.iloc[:-1]
y_frame['predicted'] = test_predicted_classes
y_frame_v = training_features[["target"]].tail(3817- backcandles +1)
y_frame_v = y_frame_v.iloc[:-1]
y_frame_v['predicted'] = validation_predicted_classes


In [None]:
#out-of-sample confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_frame['predicted'], y_frame['target'])
print(cm)

In [None]:
#ROC Curve 
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from matplotlib import cm
import numpy as np


# Calculate ROC curve, AUC, and thresholds
fpr, tpr, thresholds = roc_curve(y_frame['target'], test_predictions)
roc_auc = auc(fpr, tpr)

roc_auc = auc(fpr, tpr)

# Print the AUC
print(f"AUC: {roc_auc:.4f}")

# Create the plot
plt.figure(figsize=(10, 8))
cmap = cm.get_cmap('viridis')  # Choose a colormap

# The scatter plot for the ROC points
sc = plt.scatter(fpr, tpr, c=thresholds, cmap=cmap, edgecolor='none', s =70)

# Optionally, print the thresholds alongside FPR and TPR for inspection
for f, t, thresh in zip(fpr, tpr, thresholds):
    print(f"Threshold: {thresh:.2f}, 1-Specificity: {f:.2f}, Sensitivity: {t:.2f}")

# Plotting the ROC curve
plt.plot(fpr, tpr, color='black', lw=1, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.gca().tick_params(axis='x', labelsize=15)
plt.gca().tick_params(axis='y', labelsize=15)
plt.xlabel('1-Specificity', fontsize=20)
plt.ylabel('Sensitivity', fontsize=20)


# Adding colorbar with custom font for the label
cbar = plt.colorbar(sc)
cbar.set_label('Threshold', size=18)
cbar.ax.tick_params(labelsize=15)

plt.legend(loc="lower right", fontsize= 15)
#plt.savefig('ROC LSTM.pdf', format='pdf', dpi=300, bbox_inches='tight')
plt.show()
