In [38]:
!pip install scikeras



In [39]:
import pandas as pd

data = pd.read_csv('cleaned_heart_data.csv')
# data_clean = pd.read_csv('../data/processed/cleaned_heart_data.csv')

In [40]:
from sklearn.model_selection import train_test_split

seed_value = 42
# Separate features (X) and target (y)
y = data['HeartDisease']
X = data.drop('HeartDisease', axis=1) # Drop target column

# Since the data is pre-processed we don't need to do any further processing but simply split it
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed_value)

In [41]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.regularizers import l2

# MLP model creation
def create_model(neurons=64, activation='relu', optimizer='adam', input_shape=(X_train.shape[1],)):
    model = Sequential([
        Input(shape=input_shape),
        Dense(neurons, activation=activation),
        Dense(neurons // 2, activation=activation), # Half neurons in the second layer
        Dense(1, activation='sigmoid') # Output layer for binary classification
    ])

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [42]:
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier


# 2. Create a KerasClassifier
model = KerasClassifier(model=create_model, verbose=0)

# # 3. Define the hyperparameter grid
param_grid = {
    'model__neurons': [64, 128],
    'model__activation': ['relu', 'leaky_relu', 'tanh'],
    'optimizer': ['adam', 'rmsprop'],
    'batch_size': [16, 32],
    'epochs': [50, 20]
}

# 4. Create GridSearchCV object
grid = GridSearchCV(estimator=model,
                    param_grid=param_grid,
                    cv=3,
                    scoring='accuracy',
                    error_score='raise',
                    n_jobs=-1)

# 5. Fit the grid search to your training data
grid_result = grid.fit(X_train, y_train)

# 6. Print the best parameters and score
print(f"Best Parameters: {grid_result.best_params_}")
print(f"Best Accuracy: {grid_result.best_score_:.4f}")

  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters: {'batch_size': 16, 'epochs': 20, 'model__activation': 'leaky_relu', 'model__neurons': 64, 'optimizer': 'rmsprop'}
Best Accuracy: 0.8372


In [43]:

# 7. Evaluate the best model on the test set
best_model = grid_result.best_estimator_.model_

# loss, accuracy = best_model.model.evaluate(X_test, y_test)  # Access the underlying Keras model
best_model.summary()
loss, accuracy = best_model.evaluate(X_test, y_test)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step - accuracy: 0.8044 - loss: 0.3845
Test Loss: 0.3958
Test Accuracy: 0.7933


In [44]:
#baseline - 'Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBP', 'RestingECG'
#stress test - 'MaxHR', 'Oldpeak', 'ExerciseAngina', 'ST_Slope'

# Baseline features
X_train_reduced = X_train.drop(columns=['MaxHR', 'Oldpeak', 'ExerciseAngina_Y', 'ST_Slope_Flat', 'ST_Slope_Up'])
X_test_reduced = X_test.drop(columns=['MaxHR', 'Oldpeak', 'ExerciseAngina_Y', 'ST_Slope_Flat', 'ST_Slope_Up'])


In [45]:
X_train_reduced.head(5)

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST
70,57,140,265,0,True,True,False,False,False,True
164,52,140,225,0,False,True,False,False,True,False
710,56,140,294,0,False,True,False,False,False,False
265,54,160,305,0,True,True,False,False,True,False
250,44,135,491,0,True,False,False,False,True,False


In [46]:
X_test_reduced.head(5)

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST
208,28,130,132,0,True,True,False,False,False,False
259,55,122,320,0,False,True,False,False,True,False
97,39,160,147,1,True,False,True,False,True,False
148,50,120,168,0,True,True,False,False,True,False
395,71,130,221,0,True,False,False,False,False,True


In [49]:
# Here we will do two different experiments

# 1. Re-train the best model with just the reduced data.
# for this we will create a new model using all the parameters
# found by usage of grid search but using the new data shape
best_model_reduced = create_model(
    neurons=grid_result.best_params_.get('model__neurons'),
    activation=grid_result.best_params_.get('model__activation'),
    optimizer=grid_result.best_params_.get('optimizer'),
    input_shape=(X_train_reduced.shape[1],))

best_model_reduced.summary()

In [51]:
import numpy as np

X_train_reduced = X_train_reduced.astype(np.float32)
X_test_reduced = X_test_reduced.astype(np.float32)

In [52]:
best_model_reduced.fit(X_train_reduced,
                       y_train,
                       epochs=grid_result.best_params_.get('epochs'),
                       batch_size=grid_result.best_params_.get('batch_size'),
                       validation_data=(X_test_reduced, y_test),
                       verbose=1)

Epoch 1/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.4658 - loss: 4.1546 - val_accuracy: 0.5533 - val_loss: 0.9029
Epoch 2/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5106 - loss: 1.9215 - val_accuracy: 0.4800 - val_loss: 2.6565
Epoch 3/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5115 - loss: 1.8500 - val_accuracy: 0.5267 - val_loss: 1.9133
Epoch 4/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5112 - loss: 1.8298 - val_accuracy: 0.5267 - val_loss: 2.8771
Epoch 5/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5365 - loss: 1.7061 - val_accuracy: 0.4800 - val_loss: 3.4388
Epoch 6/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5339 - loss: 1.6617 - val_accuracy: 0.4867 - val_loss: 0.8792
Epoch 7/20
[1m38/38[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c0774eb9e10>

In [55]:
# Define the new hyperparameter grid
param_grid_2 = {
    'model__neurons': [64, 128],
    'model__activation': ['relu', 'leaky_relu', 'tanh'],
    'optimizer': ['adam', 'rmsprop'],
    'batch_size': [16, 32],
    'epochs': [50, 20]
}

# Create the KerasClassifier with input_shape - ADD input_shape
model_reduced = KerasClassifier(model=create_model, # Assuming create_model is your function
                                input_shape=(X_train_reduced.shape[1],), # Define input_shape here
                                verbose=1)

grid_reduced = GridSearchCV(estimator=model_reduced,
                            param_grid=param_grid_2,
                            cv=3,
                            scoring='accuracy',
                            error_score='raise',
                            n_jobs=-1)

# Fit the grid search to your training data
grid_result_2 = grid_reduced.fit(X_train_reduced, y_train)

# Print the best parameters and score
print(f"Best Parameters: {grid_result_2.best_params_}")
print(f"Best Accuracy: {grid_result_2.best_score_:.4f}")



Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.5193 - loss: 6.0326
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4739 - loss: 1.1238
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5355 - loss: 1.0258
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5272 - loss: 0.8612
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5377 - loss: 0.7744
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5277 - loss: 0.8565
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5153 - loss: 0.8904
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5918 - loss: 0.6943
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m