In [None]:
#add confident interval
import config_cat_embedding
import pandas as pd
import numpy as np
import random
import time
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import SVC
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score)
from scipy import stats

from data_prep import bank_data_prep, adult_data_prep
from embedding_helper import create_network

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Load and preprocess data
data_path = config_cat_embedding.paths['data']
bank_data = pd.read_csv(data_path+'adult.csv', sep=',')

df_bank, cat_cols = adult_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
y = df_bank.y

# Define the classifiers
seed = 42
# We will determine the number_of_features inside the cross-validation loop after preprocessing

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss', random_state=seed)),
    # ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(
        model=create_network,
        epochs=100, batch_size=100, verbose=0, random_state=seed))
]

# Cross-validation setup
cv = StratifiedKFold(n_splits=20, shuffle=True, random_state=seed)

# Function to calculate confidence intervals
def confidence_interval(data, confidence=0.95):
    n = len(data)
    m = np.mean(data)
    std_err = stats.sem(data)
    h = std_err * stats.t.ppf((1 + confidence) / 2, n - 1)
    return m, m - h, m + h

# Loop over models
for name, model in models:
    print(f"Classifier: {name}")
    # Lists to store metrics for each fold
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    roc_aucs = []
    
    # Start the timer before cross-validation
    start_time = time.time()
    
    fold = 1
    for train_index, test_index in cv.split(X, y):
        # Split the data into training and testing sets for this fold
        X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
        y_train_fold, y_test_fold = y.iloc[train_index], y.iloc[test_index]
        
        # Preprocess data within the fold
        # One-hot encoding
        import category_encoders as ce
        one_hot_encoder = ce.OneHotEncoder(cols=cat_cols)
        X_train_ohe = one_hot_encoder.fit_transform(X_train_fold)
        X_test_ohe = one_hot_encoder.transform(X_test_fold)
        
        # Standard scaling
        stc = StandardScaler()
        X_train_scaled = stc.fit_transform(X_train_ohe)
        X_test_scaled = stc.transform(X_test_ohe)
        
        # Update number_of_features for MLP
        number_of_features = X_train_scaled.shape[1]
        if name == 'MLP':
            # Update the model with the correct number of features
            model.set_params(model__number_of_features=number_of_features)
        
        # Fit the model
        model.fit(X_train_scaled, y_train_fold)
        # Predict on the test fold
        y_pred_fold = model.predict(X_test_scaled)
        
        # Get prediction probabilities for ROC AUC
        if hasattr(model, "predict_proba"):
            y_pred_prob_fold = model.predict_proba(X_test_scaled)[:, 1]
        else:
            # For classifiers without predict_proba, use decision_function
            print("not having proba")
            y_pred_prob_fold = model.decision_function(X_test_scaled)
            # Normalize the decision function output to [0,1] range
            y_pred_prob_fold = (y_pred_prob_fold - y_pred_prob_fold.min()) / (y_pred_prob_fold.max() - y_pred_prob_fold.min())
        
        # Calculate metrics
        accuracies.append(accuracy_score(y_test_fold, y_pred_fold))
        precisions.append(precision_score(y_test_fold, y_pred_fold, zero_division=0))
        recalls.append(recall_score(y_test_fold, y_pred_fold))
        f1s.append(f1_score(y_test_fold, y_pred_fold))
        roc_aucs.append(roc_auc_score(y_test_fold, y_pred_prob_fold))
        
       # print(f"Fold {fold} completed.")
        fold += 1
    
    # Stop the timer after cross-validation
    end_time = time.time()
    total_computation_time = end_time - start_time  # Total time for the model
    
    # Calculate mean and confidence intervals
    acc_mean, acc_ci_lower, acc_ci_upper = confidence_interval(accuracies)
    prec_mean, prec_ci_lower, prec_ci_upper = confidence_interval(precisions)
    rec_mean, rec_ci_lower, rec_ci_upper = confidence_interval(recalls)
    f1_mean, f1_ci_lower, f1_ci_upper = confidence_interval(f1s)
    roc_mean, roc_ci_lower, roc_ci_upper = confidence_interval(roc_aucs)
    
    # Print results
    print(f"Accuracy: {acc_mean:.3f} (95% CI: {acc_ci_lower:.3f} - {acc_ci_upper:.3f})")
    print(f"Precision: {prec_mean:.3f} (95% CI: {prec_ci_lower:.3f} - {prec_ci_upper:.3f})")
    print(f"Recall: {rec_mean:.3f} (95% CI: {rec_ci_lower:.3f} - {rec_ci_upper:.3f})")
    print(f"F1 Score: {f1_mean:.3f} (95% CI: {f1_ci_lower:.3f} - {f1_ci_upper:.3f})")
    print(f"ROC AUC: {roc_mean:.3f} (95% CI: {roc_ci_lower:.3f} - {roc_ci_upper:.3f})")
    print(f"Total Computation Time: {total_computation_time:.3f} seconds\n")


In [1]:
#add confident interval
import config_cat_embedding
import pandas as pd
import numpy as np
import random
import time
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import SVC
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score)
from scipy import stats

from data_prep import bank_data_prep, adult_data_prep
from embedding_helper import create_network

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Load and preprocess data
data_path = config_cat_embedding.paths['data']
bank_data = pd.read_csv(data_path+'adult.csv', sep=',')

df_bank, cat_cols = adult_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
y = df_bank.y

# Define the classifiers
seed = 42
# We will determine the number_of_features inside the cross-validation loop after preprocessing

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss', random_state=seed)),
    # ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(
        model=create_network,
        epochs=100, batch_size=100, verbose=0, random_state=seed))
]

# Cross-validation setup
cv = StratifiedKFold(n_splits=20, shuffle=True, random_state=seed)

# Function to calculate confidence intervals
def confidence_interval(data, confidence=0.95):
    n = len(data)
    m = np.mean(data)
    std_err = stats.sem(data)
    h = std_err * stats.t.ppf((1 + confidence) / 2, n - 1)
    return m, m - h, m + h

# Loop over models
for name, model in models:
    print(f"Classifier: {name}")
    # Lists to store metrics for each fold
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    roc_aucs = []
    
    # Start the timer before cross-validation
    start_time = time.time()
    
    fold = 1
    for train_index, test_index in cv.split(X, y):
        # Split the data into training and testing sets for this fold
        X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
        y_train_fold, y_test_fold = y.iloc[train_index], y.iloc[test_index]
        
        # Preprocess data within the fold
        # One-hot encoding
        import category_encoders as ce
        one_hot_encoder = ce.OneHotEncoder(cols=cat_cols)
        X_train_ohe = one_hot_encoder.fit_transform(X_train_fold)
        X_test_ohe = one_hot_encoder.transform(X_test_fold)
        
        # Standard scaling
        stc = StandardScaler()
        X_train_scaled = stc.fit_transform(X_train_ohe)
        X_test_scaled = stc.transform(X_test_ohe)
        
        # Update number_of_features for MLP
        number_of_features = X_train_scaled.shape[1]
        if name == 'MLP':
            # Update the model with the correct number of features
            model.set_params(model__number_of_features=number_of_features)
        
        # Fit the model
        model.fit(X_train_scaled, y_train_fold)
        # Predict on the test fold
        y_pred_fold = model.predict(X_test_scaled)
        
        # Get prediction probabilities for ROC AUC
        if hasattr(model, "predict_proba"):
            y_pred_prob_fold = model.predict_proba(X_test_scaled)[:, 1]
        else:
            # For classifiers without predict_proba, use decision_function
            print("not having proba")
            y_pred_prob_fold = model.decision_function(X_test_scaled)
            # Normalize the decision function output to [0,1] range
            y_pred_prob_fold = (y_pred_prob_fold - y_pred_prob_fold.min()) / (y_pred_prob_fold.max() - y_pred_prob_fold.min())
        
        # Calculate metrics
        accuracies.append(accuracy_score(y_test_fold, y_pred_fold))
        precisions.append(precision_score(y_test_fold, y_pred_fold, zero_division=0))
        recalls.append(recall_score(y_test_fold, y_pred_fold))
        f1s.append(f1_score(y_test_fold, y_pred_fold))
        roc_aucs.append(roc_auc_score(y_test_fold, y_pred_prob_fold))
        
       # print(f"Fold {fold} completed.")
        fold += 1
    
    # Stop the timer after cross-validation
    end_time = time.time()
    total_computation_time = end_time - start_time  # Total time for the model
    
    # Calculate mean and confidence intervals
    acc_mean, acc_ci_lower, acc_ci_upper = confidence_interval(accuracies)
    prec_mean, prec_ci_lower, prec_ci_upper = confidence_interval(precisions)
    rec_mean, rec_ci_lower, rec_ci_upper = confidence_interval(recalls)
    f1_mean, f1_ci_lower, f1_ci_upper = confidence_interval(f1s)
    roc_mean, roc_ci_lower, roc_ci_upper = confidence_interval(roc_aucs)
    
    # Print results
    print(f"Accuracy: {acc_mean:.3f} (95% CI: {acc_ci_lower:.3f} - {acc_ci_upper:.3f})")
    print(f"Precision: {prec_mean:.3f} (95% CI: {prec_ci_lower:.3f} - {prec_ci_upper:.3f})")
    print(f"Recall: {rec_mean:.3f} (95% CI: {rec_ci_lower:.3f} - {rec_ci_upper:.3f})")
    print(f"F1 Score: {f1_mean:.3f} (95% CI: {f1_ci_lower:.3f} - {f1_ci_upper:.3f})")
    print(f"ROC AUC: {roc_mean:.3f} (95% CI: {roc_ci_lower:.3f} - {roc_ci_upper:.3f})")
    print(f"Total Computation Time: {total_computation_time:.3f} seconds\n")


Classifier: LR
Accuracy: 0.849 (95% CI: 0.846 - 0.851)
Precision: 0.736 (95% CI: 0.729 - 0.743)
Recall: 0.606 (95% CI: 0.597 - 0.616)
F1 Score: 0.665 (95% CI: 0.658 - 0.672)
ROC AUC: 0.905 (95% CI: 0.901 - 0.908)
Total Computation Time: 27.194 seconds

Classifier: DT
Accuracy: 0.815 (95% CI: 0.812 - 0.817)
Precision: 0.940 (95% CI: 0.934 - 0.946)
Recall: 0.269 (95% CI: 0.260 - 0.279)
F1 Score: 0.418 (95% CI: 0.406 - 0.430)
ROC AUC: 0.832 (95% CI: 0.829 - 0.836)
Total Computation Time: 29.393 seconds

Classifier: RF
Accuracy: 0.824 (95% CI: 0.822 - 0.827)
Precision: 0.886 (95% CI: 0.875 - 0.897)
Recall: 0.335 (95% CI: 0.324 - 0.345)
F1 Score: 0.485 (95% CI: 0.474 - 0.496)
ROC AUC: 0.894 (95% CI: 0.890 - 0.898)
Total Computation Time: 119.732 seconds

Classifier: KNN
Accuracy: 0.813 (95% CI: 0.809 - 0.816)
Precision: 0.634 (95% CI: 0.626 - 0.643)
Recall: 0.576 (95% CI: 0.565 - 0.588)
F1 Score: 0.604 (95% CI: 0.595 - 0.613)
ROC AUC: 0.811 (95% CI: 0.805 - 0.818)
Total Computation Time: 58

In [1]:
import config_cat_embedding
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
#from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from tqdm import tqdm_notebook as tqdm

from data_prep import bank_data_prep, adult_data_prep
from embedding_helper import create_network
import time
import random

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)


data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path+'adult.csv', sep=',')

df_bank, cat_cols = adult_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
target = df_bank.y

X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=1500)

# One-hot encoding
import category_encoders as ce
one_hot_encoder = ce.OneHotEncoder(cols=cat_cols) 
X_train_ohe = one_hot_encoder.fit_transform(X_train)
X_test_ohe = one_hot_encoder.transform(X_test)  # Use transform() instead of fit_transform()

# Standard scaling
stc = StandardScaler()
X_train_scaled = stc.fit_transform(X_train_ohe)
X_test_scaled = stc.transform(X_test_ohe)

# Define the classifiers
seed = 42
number_of_features = X_train_scaled.shape[1]  # Number of features in the input data

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss')),
    ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(build_fn=create_network, number_of_features=number_of_features, epochs=100, batch_size=100, verbose=0))
]

for name, model in models:
    start_time = time.time()  # Start time
    
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_pred_prob = model.predict_proba(X_test_scaled)
    
    end_time = time.time()  # End time
    computation_time = end_time - start_time  # Computation time
    
    print(f"Classifier: {name}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred, digits=3))  # Set digits to 3 for three decimal places
    print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_prob[:, 1]):.3f}")
    print(f"Computation Time: {computation_time:.3f} seconds")
    print()


Classifier: LR
[[6282  536]
 [ 887 1340]]
              precision    recall  f1-score   support

           0      0.876     0.921     0.898      6818
           1      0.714     0.602     0.653      2227

    accuracy                          0.843      9045
   macro avg      0.795     0.762     0.776      9045
weighted avg      0.836     0.843     0.838      9045

ROC AUC Score: 0.899
Computation Time: 0.157 seconds

Classifier: DT
[[6773   45]
 [1635  592]]
              precision    recall  f1-score   support

           0      0.806     0.993     0.890      6818
           1      0.929     0.266     0.413      2227

    accuracy                          0.814      9045
   macro avg      0.867     0.630     0.652      9045
weighted avg      0.836     0.814     0.772      9045

ROC AUC Score: 0.825
Computation Time: 0.119 seconds

Classifier: RF
[[6700  118]
 [1477  750]]
              precision    recall  f1-score   support

           0      0.819     0.983     0.894      6818
   

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Classifier: MLP
[[6172  646]
 [ 857 1370]]
              precision    recall  f1-score   support

           0      0.878     0.905     0.891      6818
           1      0.680     0.615     0.646      2227

    accuracy                          0.834      9045
   macro avg      0.779     0.760     0.769      9045
weighted avg      0.829     0.834     0.831      9045

ROC AUC Score: 0.884
Computation Time: 47.736 seconds



In [7]:
import config_cat_embedding
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from tqdm import tqdm_notebook as tqdm

from data_prep import bank_data_prep, adult_data_prep
from embedding_helper import create_network
import time
import random

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)


data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path+'adult.csv', sep=',')

df_bank, cat_cols = adult_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
target = df_bank.y

X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=1500)

# One-hot encoding
import category_encoders as ce
one_hot_encoder = ce.OneHotEncoder(cols=cat_cols) 
X_train_ohe = one_hot_encoder.fit_transform(X_train)
X_test_ohe = one_hot_encoder.transform(X_test)  # Use transform() instead of fit_transform()

# Standard scaling
stc = StandardScaler()
X_train_scaled = stc.fit_transform(X_train_ohe)
X_test_scaled = stc.transform(X_test_ohe)

# Define the classifiers
seed = 42
number_of_features = X_train_scaled.shape[1]  # Number of features in the input data

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss')),
    ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(build_fn=create_network, number_of_features=number_of_features, epochs=100, batch_size=100, verbose=0))
]

for name, model in models:
    start_time = time.time()  # Start time
    
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_pred_prob = model.predict_proba(X_test_scaled)
    
    end_time = time.time()  # End time
    computation_time = end_time - start_time  # Computation time
    
    print(f"Classifier: {name}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred, digits=3))  # Set digits to 3 for three decimal places
    print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_prob[:, 1]):.3f}")
    print(f"Computation Time: {computation_time:.3f} seconds")
    print()



  ('MLP', KerasClassifier(build_fn=create_network, number_of_features=number_of_features, epochs=100, batch_size=100, verbose=0))


Classifier: LR
[[7126  168]
 [ 619  325]]
              precision    recall  f1-score   support

           0      0.920     0.977     0.948      7294
           1      0.659     0.344     0.452       944

    accuracy                          0.904      8238
   macro avg      0.790     0.661     0.700      8238
weighted avg      0.890     0.904     0.891      8238

ROC AUC Score: 0.917
Computation Time: 0.068 seconds

Classifier: DT
[[7106  188]
 [ 618  326]]
              precision    recall  f1-score   support

           0      0.920     0.974     0.946      7294
           1      0.634     0.345     0.447       944

    accuracy                          0.902      8238
   macro avg      0.777     0.660     0.697      8238
weighted avg      0.887     0.902     0.889      8238

ROC AUC Score: 0.839
Computation Time: 0.042 seconds

Classifier: RF
[[7246   48]
 [ 771  173]]
              precision    recall  f1-score   support

           0      0.904     0.993     0.947      7294
   

In [7]:
import config_cat_embedding
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from tqdm import tqdm_notebook as tqdm

from data_prep import bank_data_prep, adult_data_prep
from embedding_helper import create_network
import time
import random

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)


data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path+'adult.csv', sep=',')

df_bank, cat_cols = adult_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
target = df_bank.y

X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=1500)

# One-hot encoding
import category_encoders as ce
one_hot_encoder = ce.OneHotEncoder(cols=cat_cols) 
X_train_ohe = one_hot_encoder.fit_transform(X_train)
X_test_ohe = one_hot_encoder.transform(X_test)  # Use transform() instead of fit_transform()

# Standard scaling
stc = StandardScaler()
X_train_scaled = stc.fit_transform(X_train_ohe)
X_test_scaled = stc.transform(X_test_ohe)

# Define the classifiers
seed = 42
number_of_features = X_train_scaled.shape[1]  # Number of features in the input data

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss')),
   ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
   ('MLP', KerasClassifier(build_fn=create_network, number_of_features=number_of_features, epochs=100, batch_size=100, verbose=0))
]

for name, model in models:
    start_time = time.time()  # Start time
    
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_pred_prob = model.predict_proba(X_test_scaled)
    
    end_time = time.time()  # End time
    computation_time = end_time - start_time  # Computation time
    
    print(f"Classifier: {name}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred, target_names=['class 0', 'class 1'], digits=4))  # Set digits to 3 for three decimal places
    print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_prob[:, 1]):.3f}")
    print(f"Computation Time: {computation_time:.3f} seconds")
    print()



  ('MLP', KerasClassifier(build_fn=create_network, number_of_features=number_of_features, epochs=100, batch_size=100, verbose=0))


Classifier: LR
[[6281  537]
 [ 889 1338]]
              precision    recall  f1-score   support

     class 0     0.8760    0.9212    0.8981      6818
     class 1     0.7136    0.6008    0.6524      2227

    accuracy                         0.8423      9045
   macro avg     0.7948    0.7610    0.7752      9045
weighted avg     0.8360    0.8423    0.8376      9045

ROC AUC Score: 0.899
Computation Time: 0.314 seconds

Classifier: DT
[[6773   45]
 [1635  592]]
              precision    recall  f1-score   support

     class 0     0.8055    0.9934    0.8897      6818
     class 1     0.9294    0.2658    0.4134      2227

    accuracy                         0.8143      9045
   macro avg     0.8674    0.6296    0.6515      9045
weighted avg     0.8360    0.8143    0.7724      9045

ROC AUC Score: 0.825
Computation Time: 0.069 seconds

Classifier: RF
[[6700  118]
 [1477  750]]
              precision    recall  f1-score   support

     class 0     0.8194    0.9827    0.8936      6818
   