In [None]:
import pickle result_pkl = {'accuracies': accuracies, 'precisions': precisions, 'recalls': recalls, 'f1s': f1s, 'rocs': roc_aucs}
# Save to a pickle file
with open('results.pkl', 'wb') as f:     pickle.dump(result_pkl, f)
 

 

In [None]:
with open('results.pkl', 'rb') as f:     loaded_result_pkl = pickle.load(f)

In [1]:
import config_cat_embedding
import numpy as np
import pandas as pd
import random
import time

from gensim.models import Word2Vec
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix, classification_report)
from tqdm.notebook import tqdm
from data_prep import bank_data_prep
from embedding_helper import create_network
from scipy import stats  # For confidence intervals

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Load and preprocess data
data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path + 'bank-additional-full.csv', sep=';')

df_bank, cat_cols = bank_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
y = df_bank.y


# Define the classifiers
seed = 42

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss', random_state=seed)),
    # ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(
        model=create_network,
        epochs=100, batch_size=100, verbose=0, random_state=seed))
]

# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)  # You can adjust n_splits as needed

# Function to calculate confidence intervals
def confidence_interval(data, confidence=0.95):
    n = len(data)
    m = np.mean(data)
    std_err = stats.sem(data)
    h = std_err * stats.t.ppf((1 + confidence) / 2, n - 1)
    return m, m - h, m + h

# Move the embedding function outside the loop
def get_word2vec_embeddings(df, cat_cols, model, dimpool):
    elements = []
    for _, row in df.iterrows():
        categorical_embeddings = []
        for col in cat_cols:
            try:
                categorical_embeddings.append(model.wv[row[col]])
            except KeyError:
                categorical_embeddings.append(np.zeros((dimpool,)))
        elements.append(np.array(categorical_embeddings))
    reshaped_x = np.reshape(elements, (len(elements), len(cat_cols) * dimpool))
    return reshaped_x

# Main loop over models
for name, classifier in models:
    print(f"Classifier: {name}")
    # Lists to store metrics for each fold
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    roc_aucs = []
    computation_times = []

    fold = 1
    for train_index, test_index in cv.split(X, y):
        # Split data into training and test sets for this fold
        X_train_fold = X.iloc[train_index].copy()
        X_test_fold = X.iloc[test_index].copy()
        y_train_fold = y.iloc[train_index].reset_index(drop=True)
        y_test_fold = y.iloc[test_index].reset_index(drop=True)

        # Combine categorical columns into a single string for Word2Vec
        X_train_fold['stringcat'] = X_train_fold[cat_cols].apply(lambda x: ' '.join(x), axis=1)

        # Train Word2Vec model on training data
        dimpool = 50  # Embedding dimension
        word2vec_model = Word2Vec(sentences=X_train_fold['stringcat'].str.split(" "), vector_size=dimpool,
                                  window=2, min_count=1, workers=1, seed=42)

        # Generate embeddings for training data
        X_train_emb = get_word2vec_embeddings(X_train_fold, cat_cols, word2vec_model, dimpool)

        # Handle numerical features
        numerical_cols = X_train_fold.select_dtypes(exclude='object').columns.tolist()
        X_train_num = X_train_fold[numerical_cols].reset_index(drop=True)

        # Create DataFrame for embeddings with string column names
        emb_col_names = [f'emb_{i}' for i in range(X_train_emb.shape[1])]
        X_train_emb_df = pd.DataFrame(X_train_emb, columns=emb_col_names)

        # Concatenate numerical features and embeddings
        X_train_combined = pd.concat([X_train_num, X_train_emb_df], axis=1)

        # Ensure all column names are strings
        X_train_combined.columns = X_train_combined.columns.astype(str)

        # Generate embeddings for test data
        X_test_emb = get_word2vec_embeddings(X_test_fold, cat_cols, word2vec_model, dimpool)
        X_test_num = X_test_fold[numerical_cols].reset_index(drop=True)
        X_test_emb_df = pd.DataFrame(X_test_emb, columns=emb_col_names)
        X_test_combined = pd.concat([X_test_num, X_test_emb_df], axis=1)
        X_test_combined.columns = X_test_combined.columns.astype(str)

        # Standard scaling
        stc = StandardScaler()
        X_train_scaled = stc.fit_transform(X_train_combined)
        X_test_scaled = stc.transform(X_test_combined)

        # Update number_of_features for MLP
        number_of_features = X_train_scaled.shape[1]
        if name == 'MLP':
            classifier.set_params(model__number_of_features=number_of_features)

        # Start timing
        start_time = time.time()

        # Fit the model
        classifier.fit(X_train_scaled, y_train_fold)

        # End timing
        end_time = time.time()
        elapsed_time = end_time - start_time
        computation_times.append(elapsed_time)

        # Predict on test data
        y_pred_fold = classifier.predict(X_test_scaled)
        if hasattr(classifier, "predict_proba"):
            y_pred_prob_fold = classifier.predict_proba(X_test_scaled)[:, 1]
        else:
            y_pred_scores = classifier.decision_function(X_test_scaled)
            y_pred_prob_fold = (y_pred_scores - y_pred_scores.min()) / (y_pred_scores.max() - y_pred_scores.min())

        # Collect performance metrics
        accuracies.append(accuracy_score(y_test_fold, y_pred_fold))
        precisions.append(precision_score(y_test_fold, y_pred_fold, zero_division=0))
        recalls.append(recall_score(y_test_fold, y_pred_fold))
        f1s.append(f1_score(y_test_fold, y_pred_fold))
        roc_aucs.append(roc_auc_score(y_test_fold, y_pred_prob_fold))

        fold += 1

    # Calculate mean and confidence intervals
    acc_mean, acc_ci_lower, acc_ci_upper = confidence_interval(accuracies)
    prec_mean, prec_ci_lower, prec_ci_upper = confidence_interval(precisions)
    rec_mean, rec_ci_lower, rec_ci_upper = confidence_interval(recalls)
    f1_mean, f1_ci_lower, f1_ci_upper = confidence_interval(f1s)
    roc_mean, roc_ci_lower, roc_ci_upper = confidence_interval(roc_aucs)
    time_mean = np.mean(computation_times)

    # Print results
    print(f"Accuracy: {acc_mean:.4f} (95% CI: {acc_ci_lower:.4f} - {acc_ci_upper:.4f})")
    print(f"Precision: {prec_mean:.4f} (95% CI: {prec_ci_lower:.4f} - {prec_ci_upper:.4f})")
    print(f"Recall: {rec_mean:.4f} (95% CI: {rec_ci_lower:.4f} - {rec_ci_upper:.4f})")
    print(f"F1 Score: {f1_mean:.4f} (95% CI: {f1_ci_lower:.4f} - {f1_ci_upper:.4f})")
    print(f"ROC AUC: {roc_mean:.4f} (95% CI: {roc_ci_lower:.4f} - {roc_ci_upper:.4f})")
    print(f"Average Computation Time per Fold: {time_mean:.4f} seconds\n")


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


Classifier: LR
Accuracy: 0.9075 (95% CI: 0.9047 - 0.9103)
Precision: 0.6643 (95% CI: 0.6419 - 0.6868)
Recall: 0.3621 (95% CI: 0.3311 - 0.3931)
F1 Score: 0.4683 (95% CI: 0.4419 - 0.4947)
ROC AUC: 0.9142 (95% CI: 0.9088 - 0.9195)
Average Computation Time per Fold: 1.6623 seconds

Classifier: DT
Accuracy: 0.9061 (95% CI: 0.9024 - 0.9098)
Precision: 0.6509 (95% CI: 0.6272 - 0.6745)
Recall: 0.3591 (95% CI: 0.3263 - 0.3918)
F1 Score: 0.4625 (95% CI: 0.4317 - 0.4933)
ROC AUC: 0.8515 (95% CI: 0.8411 - 0.8619)
Average Computation Time per Fold: 0.8710 seconds

Classifier: RF
Accuracy: 0.8989 (95% CI: 0.8949 - 0.9028)
Precision: 0.6871 (95% CI: 0.6493 - 0.7248)
Recall: 0.1860 (95% CI: 0.1460 - 0.2260)
F1 Score: 0.2920 (95% CI: 0.2402 - 0.3438)
ROC AUC: 0.8813 (95% CI: 0.8718 - 0.8908)
Average Computation Time per Fold: 9.0679 seconds

Classifier: KNN
Accuracy: 0.8869 (95% CI: 0.8834 - 0.8903)
Precision: 0.4942 (95% CI: 0.4644 - 0.5241)
Recall: 0.2446 (95% CI: 0.2063 - 0.2829)
F1 Score: 0.3269 (9

In [1]:
import config_cat_embedding
import numpy as np
import pandas as pd
import random
import time

from gensim.models import Word2Vec
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix, classification_report)
from tqdm.notebook import tqdm
from data_prep import bank_data_prep
from embedding_helper import create_network
from scipy import stats  # For confidence intervals

# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Load and preprocess data
data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path + 'bank-additional-full.csv', sep=';')

df_bank, cat_cols = bank_data_prep(bank_data)

X = df_bank.iloc[:, :-1]
y = df_bank.y


# Define the classifiers
seed = 42

models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss', random_state=seed)),
    # ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(
        model=create_network,
        epochs=100, batch_size=100, verbose=0, random_state=seed))
]

# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)  # You can adjust n_splits as needed

# Function to calculate confidence intervals
def confidence_interval(data, confidence=0.95):
    n = len(data)
    m = np.mean(data)
    std_err = stats.sem(data)
    h = std_err * stats.t.ppf((1 + confidence) / 2, n - 1)
    return m, m - h, m + h

# Move the embedding function outside the loop
def get_word2vec_embeddings(df, cat_cols, model, dimpool):
    elements = []
    for _, row in df.iterrows():
        categorical_embeddings = []
        for col in cat_cols:
            try:
                categorical_embeddings.append(model.wv[row[col]])
            except KeyError:
                categorical_embeddings.append(np.zeros((dimpool,)))
        elements.append(np.array(categorical_embeddings))
    reshaped_x = np.reshape(elements, (len(elements), len(cat_cols) * dimpool))
    return reshaped_x

# Main loop over models
for name, classifier in models:
    print(f"Classifier: {name}")
    # Lists to store metrics for each fold
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    roc_aucs = []
    computation_times = []

    fold = 1
    for train_index, test_index in cv.split(X, y):
        # Split data into training and test sets for this fold
        X_train_fold = X.iloc[train_index].copy()
        X_test_fold = X.iloc[test_index].copy()
        y_train_fold = y.iloc[train_index].reset_index(drop=True)
        y_test_fold = y.iloc[test_index].reset_index(drop=True)

        # Combine categorical columns into a single string for Word2Vec
        X_train_fold['stringcat'] = X_train_fold[cat_cols].apply(lambda x: ' '.join(x), axis=1)

        # Train Word2Vec model on training data
        dimpool = 30  # Embedding dimension
        word2vec_model = Word2Vec(sentences=X_train_fold['stringcat'].str.split(" "), vector_size=dimpool,
                                  window=2, min_count=1, workers=1, seed=42)

        # Generate embeddings for training data
        X_train_emb = get_word2vec_embeddings(X_train_fold, cat_cols, word2vec_model, dimpool)

        # Handle numerical features
        numerical_cols = X_train_fold.select_dtypes(exclude='object').columns.tolist()
        X_train_num = X_train_fold[numerical_cols].reset_index(drop=True)

        # Create DataFrame for embeddings with string column names
        emb_col_names = [f'emb_{i}' for i in range(X_train_emb.shape[1])]
        X_train_emb_df = pd.DataFrame(X_train_emb, columns=emb_col_names)

        # Concatenate numerical features and embeddings
        X_train_combined = pd.concat([X_train_num, X_train_emb_df], axis=1)

        # Ensure all column names are strings
        X_train_combined.columns = X_train_combined.columns.astype(str)

        # Generate embeddings for test data
        X_test_emb = get_word2vec_embeddings(X_test_fold, cat_cols, word2vec_model, dimpool)
        X_test_num = X_test_fold[numerical_cols].reset_index(drop=True)
        X_test_emb_df = pd.DataFrame(X_test_emb, columns=emb_col_names)
        X_test_combined = pd.concat([X_test_num, X_test_emb_df], axis=1)
        X_test_combined.columns = X_test_combined.columns.astype(str)

        # Standard scaling
        stc = StandardScaler()
        X_train_scaled = stc.fit_transform(X_train_combined)
        X_test_scaled = stc.transform(X_test_combined)

        # Update number_of_features for MLP
        number_of_features = X_train_scaled.shape[1]
        if name == 'MLP':
            classifier.set_params(model__number_of_features=number_of_features)

        # Start timing
        start_time = time.time()

        # Fit the model
        classifier.fit(X_train_scaled, y_train_fold)

        # End timing
        end_time = time.time()
        elapsed_time = end_time - start_time
        computation_times.append(elapsed_time)

        # Predict on test data
        y_pred_fold = classifier.predict(X_test_scaled)
        if hasattr(classifier, "predict_proba"):
            y_pred_prob_fold = classifier.predict_proba(X_test_scaled)[:, 1]
        else:
            y_pred_scores = classifier.decision_function(X_test_scaled)
            y_pred_prob_fold = (y_pred_scores - y_pred_scores.min()) / (y_pred_scores.max() - y_pred_scores.min())

        # Collect performance metrics
        accuracies.append(accuracy_score(y_test_fold, y_pred_fold))
        precisions.append(precision_score(y_test_fold, y_pred_fold, zero_division=0))
        recalls.append(recall_score(y_test_fold, y_pred_fold))
        f1s.append(f1_score(y_test_fold, y_pred_fold))
        roc_aucs.append(roc_auc_score(y_test_fold, y_pred_prob_fold))

        fold += 1

    # Calculate mean and confidence intervals
    acc_mean, acc_ci_lower, acc_ci_upper = confidence_interval(accuracies)
    prec_mean, prec_ci_lower, prec_ci_upper = confidence_interval(precisions)
    rec_mean, rec_ci_lower, rec_ci_upper = confidence_interval(recalls)
    f1_mean, f1_ci_lower, f1_ci_upper = confidence_interval(f1s)
    roc_mean, roc_ci_lower, roc_ci_upper = confidence_interval(roc_aucs)
    time_mean = np.mean(computation_times)

    # Print results
    print(f"Accuracy: {acc_mean:.4f} (95% CI: {acc_ci_lower:.4f} - {acc_ci_upper:.4f})")
    print(f"Precision: {prec_mean:.4f} (95% CI: {prec_ci_lower:.4f} - {prec_ci_upper:.4f})")
    print(f"Recall: {rec_mean:.4f} (95% CI: {rec_ci_lower:.4f} - {rec_ci_upper:.4f})")
    print(f"F1 Score: {f1_mean:.4f} (95% CI: {f1_ci_lower:.4f} - {f1_ci_upper:.4f})")
    print(f"ROC AUC: {roc_mean:.4f} (95% CI: {roc_ci_lower:.4f} - {roc_ci_upper:.4f})")
    print(f"Average Computation Time per Fold: {time_mean:.4f} seconds\n")


Classifier: LR
Accuracy: 0.9075 (95% CI: 0.9047 - 0.9103)
Precision: 0.6644 (95% CI: 0.6404 - 0.6884)
Recall: 0.3621 (95% CI: 0.3319 - 0.3923)
F1 Score: 0.4683 (95% CI: 0.4424 - 0.4942)
ROC AUC: 0.9141 (95% CI: 0.9088 - 0.9194)
Average Computation Time per Fold: 1.2059 seconds

Classifier: DT
Accuracy: 0.9061 (95% CI: 0.9024 - 0.9098)
Precision: 0.6509 (95% CI: 0.6272 - 0.6745)
Recall: 0.3591 (95% CI: 0.3263 - 0.3918)
F1 Score: 0.4625 (95% CI: 0.4317 - 0.4933)
ROC AUC: 0.8515 (95% CI: 0.8411 - 0.8619)
Average Computation Time per Fold: 0.5338 seconds

Classifier: RF
Accuracy: 0.8991 (95% CI: 0.8950 - 0.9031)
Precision: 0.6913 (95% CI: 0.6518 - 0.7307)
Recall: 0.1864 (95% CI: 0.1479 - 0.2249)
F1 Score: 0.2930 (95% CI: 0.2427 - 0.3433)
ROC AUC: 0.8933 (95% CI: 0.8863 - 0.9003)
Average Computation Time per Fold: 7.4279 seconds

Classifier: KNN
Accuracy: 0.8884 (95% CI: 0.8855 - 0.8913)
Precision: 0.5088 (95% CI: 0.4841 - 0.5335)
Recall: 0.2504 (95% CI: 0.2191 - 0.2817)
F1 Score: 0.3354 (9

In [11]:
import config_cat_embedding
import numpy as np
import pandas as pd
import random

from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,classification_report,auc, roc_auc_score

from tqdm import tqdm_notebook as tqdm
from data_prep import bank_data_prep
from embedding_helper import create_network
# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)


#%% load the data and completed the data pre-processing
data_path = config_cat_embedding.paths['data']
data_path_out = config_cat_embedding.paths['data_output']
bank_data = pd.read_csv(data_path + 'bank-additional-full.csv', sep=';')

dat_bank, cat_cols = bank_data_prep(bank_data)

X = dat_bank.iloc[:, :-1]
target = dat_bank.y

X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=1500)

#Get categorical columns
X_train['stringcat'] = X_train[cat_cols].apply(lambda x: ' '.join(x), axis=1)

#train word2vec model
dimpool = 30
model = Word2Vec(sentences=X_train['stringcat'].str.split(" "), vector_size=dimpool, window=2, min_count=1, workers=1,seed=42)
model.save("word2vec.model.bank")


#model.wv['entrepreneur']
elements = []
for row in tqdm(X_train.iterrows()):
    categorical_embeddings = []
    for i in cat_cols:
        #print(i,row[1][i])
        try:
            
            categorical_embeddings.append(model.wv[row[1][i]])
        except:
            categorical_embeddings.append(np.zeros((dimpool)))
    elements.append(np.array(categorical_embeddings))
#elements 
reshaped_x = (np.reshape(elements,(len(elements),len(cat_cols)*dimpool)))   
# Get the numerical columns
numerical_cols = np.where(X_train.dtypes!="object")[0]
my_data = pd.concat([X_train.iloc[:, numerical_cols].reset_index(drop=True), pd.DataFrame(reshaped_x)], axis=1)
# due to the new index of my_data, we have to change the index of y_train
y_train = y_train.reset_index(drop=True)



#%% apply it on the X_test dataset 
elements_test = []
for row in tqdm(X_test.iterrows()):
    categorical_embeddings = []
    for i in cat_cols:
        #print(i,row[1][i])
        try:
            categorical_embeddings.append(model.wv[row[1][i]])
        except:
            categorical_embeddings.append(np.zeros((dimpool)))
    elements_test .append(np.array(categorical_embeddings))

#elements 
reshaped_x_test = (np.reshape(elements_test ,(len(elements_test ),len(cat_cols)*dimpool)))   
# Get the numerical columns
my_test_data = pd.concat([X_test.iloc[:, numerical_cols].reset_index(drop=True), pd.DataFrame(reshaped_x_test)], axis=1)
# due to the new index of my_data, we have to change the index of y_train
y_test = y_test.reset_index(drop=True)


X_train2, X_test2, y_train2, y_test2 = train_test_split(my_data,y_train,test_size=0.2, random_state=1500)
stc = StandardScaler()
X_scaled = stc.fit_transform(X_train2.values)


seed=42
models = [
    ('LR', LogisticRegression(solver='lbfgs', random_state=seed, max_iter=1000)),
    ('DT', DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed)),
    ('RF', RandomForestClassifier(n_estimators=200, max_depth=5, random_state=seed, min_samples_leaf=3)),
    ('KNN', KNeighborsClassifier(n_neighbors=3)),
    ('XGB', XGBClassifier(eval_metric='logloss')),
    ('SVM', SVC(gamma='scale', random_state=seed, probability=True)),
    ('MLP', KerasClassifier(build_fn=create_network, number_of_features=my_data.shape[1], epochs=100, batch_size=100, verbose=0))
]

for name, classifier in models:
    start_time = time.time()
    classifier.fit(X_scaled, y_train2)
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Classifier: {name}")
    print(f"Computation Time: {elapsed_time} seconds")      
#     y_pred = classifier.predict(stc.transform(X_test2.values))
#     y_pred_prob = classifier.predict_proba(stc.transform(X_test2.values))

#     print(confusion_matrix(y_test2,y_pred))
#     print(classification_report(y_test2,y_pred, digits=4))
#     print(roc_auc_score(y_test2,y_pred_prob[:,1]))
    
    y_pred_test = classifier.predict(stc.transform(my_test_data.values))
    y_pred_prob_test = classifier.predict_proba(stc.transform(my_test_data.values))

    print(confusion_matrix(y_test,y_pred_test))
    print(classification_report(y_test,y_pred_test, digits=4))

    print(roc_auc_score(y_test,y_pred_prob_test[:,1]))
    print()
    



Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for row in tqdm(X_train.iterrows()):


0it [00:00, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for row in tqdm(X_test.iterrows()):


0it [00:00, ?it/s]

  ('MLP', KerasClassifier(build_fn=create_network, number_of_features=my_data.shape[1], epochs=100, batch_size=100, verbose=0))


Classifier: LR
Computation Time: 1.226273536682129 seconds
[[7123  171]
 [ 607  337]]
              precision    recall  f1-score   support

           0     0.9215    0.9766    0.9482      7294
           1     0.6634    0.3570    0.4642       944

    accuracy                         0.9056      8238
   macro avg     0.7924    0.6668    0.7062      8238
weighted avg     0.8919    0.9056    0.8928      8238

0.9169146744712394

Classifier: DT
Computation Time: 0.2270958423614502 seconds
[[7049  245]
 [ 570  374]]
              precision    recall  f1-score   support

           0     0.9252    0.9664    0.9453      7294
           1     0.6042    0.3962    0.4786       944

    accuracy                         0.9011      8238
   macro avg     0.7647    0.6813    0.7120      8238
weighted avg     0.8884    0.9011    0.8919      8238

0.8489391385071546

Classifier: RF
Computation Time: 3.1036245822906494 seconds
[[7212   82]
 [ 753  191]]
              precision    recall  f1-score   