In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from sklearn import preprocessing as pre
from tensorflow.keras.models import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import VarianceThreshold

# Load the dataset
data = pd.read_csv('final-dataset.csv')

# Prepare the features and labels
X = data.drop(columns=['Signal','Label','Hurst Component'])
y = data['Label']

# Feature selection using VarianceThreshold
def variance_threshold_selector(data, threshold):
    selector = VarianceThreshold(threshold)
    selector.fit_transform(data)
    return data[data.columns[selector.get_support(indices=True)]]

# Apply feature selection
p = 0.8
X = variance_threshold_selector(X, p*(1-p))

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
x_val, x_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

# Reshape input data for RNN (samples, timesteps, features)
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
x_test = x_test.values.reshape((x_test.shape[0], 1, x_test.shape[1]))
x_val = x_val.values.reshape((x_val.shape[0], 1, x_val.shape[1]))

# Scale the features
scaler = pre.StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
x_test = scaler.transform(x_test.reshape(x_test.shape[0], -1)).reshape(x_test.shape)
x_val = scaler.transform(x_val.reshape(x_val.shape[0], -1)).reshape(x_val.shape)

# Define RNN model
def create_rnn_model(optimizer='adam', init='glorot_uniform', dropout_rate=0.2):
    model = Sequential()
    model.add(SimpleRNN(128, input_shape=(1, X_train.shape[2]), activation='relu', kernel_initializer=init))
    model.add(Dropout(dropout_rate))
    model.add(Dense(64, activation='relu', kernel_initializer=init))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model in a KerasClassifier
model = KerasClassifier(build_fn=create_rnn_model, verbose=0)

# Define hyperparameters for GridSearchCV
parameters = {
    'batch_size': [32, 64, 128],
    'epochs': [50, 100, 200],
    'optimizer': ['adam', 'sgd'],
    'init': ['glorot_uniform', 'normal'],
    'dropout_rate': [0.2, 0.3, 0.4]
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=parameters, n_jobs=-1, cv=3)

# Fit the grid search
grid_search.fit(X_train, y_train)

# Print the best parameters and score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_}")


  model = KerasClassifier(build_fn=create_rnn_model, verbose=0)


Best Parameters: {'batch_size': 32, 'dropout_rate': 0.4, 'epochs': 200, 'init': 'normal', 'optimizer': 'adam'}
Best Score: 0.7622433503468832
