In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Dropout
from sklearn import preprocessing as pre
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import VarianceThreshold

# Load the dataset
data = pd.read_csv('final-dataset.csv')

# Prepare the features and labels
X = data.drop(columns=['Signal','Label','Hurst Component'])
y = data['Label']

# Feature selection using VarianceThreshold
def variance_threshold_selector(data, threshold):
    selector = VarianceThreshold(threshold)
    selector.fit_transform(data)
    return data[data.columns[selector.get_support(indices=True)]]

# Apply feature selection
p = 0.8
X = variance_threshold_selector(X, p*(1-p))
X.shape

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
x_val, x_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

# Scale the features
X_train = pre.scale(X_train)
x_test = pre.scale(x_test)
x_val = pre.scale(x_val)

# Define a function to create a basic deep learning model
def create_model(optimizer='adam', init='glorot_uniform', dropout_rate=0.2):
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_initializer=init))  # Increased units
    model.add(Dropout(dropout_rate))  # Dropout to prevent overfitting
    model.add(Dense(64, activation='relu', kernel_initializer=init))  # More layers and units
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model in a KerasClassifier for grid search
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define hyperparameters for GridSearchCV
parameters = {
    'batch_size': [32, 64, 128],
    'epochs': [50, 100, 200],
    'optimizer': ['adam', 'sgd'],
    'init': ['glorot_uniform', 'normal'],
    'dropout_rate': [0.2, 0.3, 0.4]  # Trying different dropout rates
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=parameters, n_jobs=-1, cv=3)

# Fit the grid search
grid_search.fit(X_train, y_train)

# Print the best parameters and score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_}")

  model = KerasClassifier(build_fn=create_model, verbose=0)


Best Parameters: {'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 200, 'init': 'normal', 'optimizer': 'adam'}
Best Score: 0.7724714477856954
