In [1]:
import os
import numpy as np
import pandas as pd

from time import time

from scipy.stats import ks_2samp

from imblearn.over_sampling import ADASYN, SMOTE, BorderlineSMOTE, KMeansSMOTE, SMOTEN, SMOTENC, SVMSMOTE
from imblearn.combine import SMOTEENN, SMOTETomek

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier

from lightgbm import LGBMClassifier

from catboost import CatBoostClassifier

from imblearn.ensemble import BalancedBaggingClassifier, BalancedRandomForestClassifier, EasyEnsembleClassifier


from sklearn.metrics import roc_curve, precision_recall_curve
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score, classification_report, confusion_matrix

metrics_data_columns = ['model','sampling_method','k_neighbour', 'train_accuracy', 'test_accuracy', 'roc_auc',
                         'precision_0', 'recall_0', 'f1_0',
                         'precision_1', 'recall_1', 'f1_1', 
                         'ks_stat', 'p_value', 
                         'tp', 'tn', 'fp', 'fn']


# model_list = [ 'logistic_regression','gaussian_naive_bayes','support_vector_classifier',
#               'ada_boost','extra_trees_classifier','gradient_boosting_classifier',
#               'hist_gradient_boosting_classifier','random_forest_classifier',
#               'balanced_bagging_classifier','balanced_random_forest_classifier',
#               'easy_ensemble_classifier','lgbm_classifier','catboost_classifier']

model_list = [ 'gaussian_naive_bayes', 'ada_boost','extra_trees_classifier',
              'gradient_boosting_classifier', 'hist_gradient_boosting_classifier',
              'random_forest_classifier','balanced_bagging_classifier',
              'balanced_random_forest_classifier','easy_ensemble_classifier',
              'lgbm_classifier','catboost_classifier']

import warnings
warnings.filterwarnings('ignore')

In [2]:
evaluation_files_loc = './final_results/bos_models_evaluation/'
if not os.path.exists(evaluation_files_loc):
    os.makedirs(evaluation_files_loc)

## Data preperation and evaluation

In [4]:
# Data preparation and Evaluation

def features_target_split(df, target_col='Exited'):
    """
    Split the DataFrame into features and target variables.
    
    Parameters:
        df (DataFrame): The input DataFrame.
        target_col (str): The name of the target column. Default is 'Exited'.
        
    Returns:
        x (DataFrame): The features.
        y (Series): The target variable.
    """
    # Drop the target column from the DataFrame to get the features
    x = df.drop(target_col, axis=1)
    
    # Assign the target column as the y variable
    y = df[target_col]
    
    # Return the features and target variables
    return x,y


def train_test_split(x,y,df,target_col='Exited', test_size=0.2, random_state=42):
    """
    Split the features and target variables into training and testing sets.
    
    Parameters:
        x (DataFrame): The features.
        y (Series): The target variable.
        df (DataFrame): The original DataFrame.
        target_col (str): The name of the target column. Default is 'Exited'.
        test_size (float or int): The proportion or absolute number of samples to include in the testing set. Default is 0.2.
        random_state (int): The seed used by the random number generator. Default is 42.
        
    Returns:
        x_train (DataFrame): The training set features.
        x_test (DataFrame): The testing set features.
        y_train (Series): The training set target variable.
        y_test (Series): The testing set target variable.
    """
    from sklearn.model_selection import train_test_split
    
    # Split the features and target variables into training and testing sets
    # Stratified is being used to maintain the proportion of class [0 and 1] in splits.
    x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                        test_size=test_size, 
                                                        random_state=random_state, 
                                                        stratify=df[target_col])
    
    return x_train, x_test, y_train, y_test


def prediction(model, x_train, x_test):
    """
    Generate predictions using a trained logistic regression model.
    
    Parameters:
        log_reg_model (LogisticRegression): The trained logistic regression model.
        x_train (array-like or sparse matrix): The training set features.
        x_test (array-like or sparse matrix): The testing set features.
        
    Returns:
        y_pred_train (array-like): Predicted labels for the training set.
        y_pred_test (array-like): Predicted labels for the testing set.
        y_pred_test_proba (array-like): Predicted probabilities for the testing set.
    """
    # Generate predictions for the training set
    y_pred_train = model.predict(x_train)
    
    # Generate predictions for the testing set
    y_pred_test = model.predict(x_test)
    
    # Generate predicted probabilities for the testing set
    y_pred_test_proba = model.predict_proba(x_test)
    
    return y_pred_train, y_pred_test, y_pred_test_proba


class Evaluation():
    def __init__(self,y_train, y_test, y_pred_train, y_pred_test, y_pred_test_proba):
        self.y_train = y_train
        self.y_test = y_test
        self.y_pred_train = y_pred_train
        self.y_pred_test = y_pred_test
        self.y_pred_test_proba = y_pred_test_proba
    
    def __ks_stats_value__(self):
        """
        Calculate the Kolmogorov-Smirnov (KS) statistic and p-value.
        
        Returns:
            ks_stat (float): The KS statistic.
            p_value (float): The p-value.
        """
        
        # proba_non_churn contains the predicted probabilities for instances that did not churn
        proba_non_churn = self.y_pred_test_proba[:,1][self.y_test==0]
        
        # proba_churn contains the predicted probabilities for instances that actually churned
        proba_churn = self.y_pred_test_proba[:,1][self.y_test==1]
        
        # Calculating Kolmogorov-Smirnov (KS) statistic and p-value
        ks_stat, p_value = ks_2samp(proba_non_churn, proba_churn)
        return ks_stat, p_value
    
    def __accuracy_value__(self):
        train_accuracy = accuracy_score(self.y_train, self.y_pred_train)
        test_accuracy = accuracy_score(self.y_test, self.y_pred_test)
        return train_accuracy, test_accuracy

    def __prec_rec_f1_value__(self, pos_label):
        """
        Calculate precision, recall, and F1-score for a given label.
        
        Parameters:
            pos_label: The label for which metrics are calculated.
        
        Returns:
            precision (float): Precision score.
            recall (float): Recall score.
            f1 (float): F1-score.
        """
        precision = precision_score(self.y_test, self.y_pred_test,pos_label=pos_label)
        recall = recall_score(self.y_test, self.y_pred_test,pos_label=pos_label)
        f1 = f1_score(self.y_test, self.y_pred_test, pos_label=pos_label)
        return precision, recall, f1

    def __roc_value__(self):
        roc_auc = roc_auc_score(self.y_test, self.y_pred_test)
        return roc_auc

    def __confusion_matrix_value__(self):
        tn, fp, fn, tp = confusion_matrix(self.y_test, self.y_pred_test).ravel()
        return tn, fp, fn, tp
    
    def main(self):
        train_accuracy, test_accuracy = self.__accuracy_value__()
        
        precision_0, recall_0, f1_0 = self.__prec_rec_f1_value__(pos_label=0)
        precision_1, recall_1, f1_1 = self.__prec_rec_f1_value__(pos_label=1)
        
        ks_stat, p_value = self.__ks_stats_value__()
        
        roc_auc = self.__roc_value__()
        
        tn, fp, fn, tp = self.__confusion_matrix_value__()
        
        all_metrics = [train_accuracy, test_accuracy, roc_auc, 
                       precision_0, recall_0, f1_0, 
                       precision_1, recall_1, f1_1, 
                       ks_stat, p_value, 
                       tp, tn, fp, fn]
        
        all_metrics = [round(value, ndigits=6) for value in all_metrics]
        all_metrics_dict = {'train_acc':all_metrics[0], 'test_acc':all_metrics[1], 'roc_auc':all_metrics[2],  
                            'class_0':{'precision':all_metrics[3], 'recall':all_metrics[4], 'f1':all_metrics[5]}, 
                            'class_1':{'precision':all_metrics[6], 'recall':all_metrics[7], 'f1':all_metrics[8]},
                            'ks_stats':all_metrics[9], 'p_value':all_metrics[10],
                            'tp':all_metrics[11],'tn':all_metrics[12],'fp':all_metrics[13],'fn':all_metrics[14]}
        
        return all_metrics, all_metrics_dict

# Models

In [6]:
def logistic_model_train(x_train, y_train, random_state=42, max_iter=1000):
    """
    Train a logistic regression model using the provided training data.
    
    Parameters:
        x_train (DataFrame): The training set features.
        y_train (Series): The training set target variable.
        random_state (int): The seed used by the random number generator. Default is 42.
        max_iter (int): The maximum number of iterations for the solver to converge. Default is 1000.
        
    Returns:
        log_reg_model (LogisticRegression): The trained logistic regression model.
    """
    
    # Create an instance of LogisticRegression model with specified random_state and max_iter
    log_reg_model = LogisticRegression(random_state=random_state, max_iter=max_iter)
    
    # Fit the logistic regression model to the training data
    log_reg_model.fit(x_train, y_train)
    
    return log_reg_model


def gnb_model_train(x_train, y_train):
    
    # instantiate the model
    gnb = GaussianNB()
    gnb.fit(x_train, y_train)
    return gnb

def svc_model_train(x_train, y_train, random_state=42):

    svc = SVC(probability=True,random_state=random_state)
    svc.fit(x_train, y_train)
    return svc

def adaboost_model_train(x_train, y_train, random_state=42):

    adb_model = AdaBoostClassifier(random_state=random_state)
    adb_model.fit(x_train, y_train)
    return adb_model

def etc_model_train(x_train, y_train, random_state=42):
    etc_model = ExtraTreesClassifier(random_state=random_state)
    etc_model.fit(x_train, y_train)
    return etc_model

def gbc_model_train(x_train, y_train, random_state=42):
    gbc_model = GradientBoostingClassifier(random_state=random_state)
    gbc_model.fit(x_train, y_train)
    return gbc_model

def hgbc_model_train(x_train, y_train, random_state=42):
    hgbc_model = HistGradientBoostingClassifier(random_state=random_state)
    hgbc_model.fit(x_train, y_train)
    return hgbc_model

def rfc_model_train(x_train, y_train, random_state=42):
    rfc_model = RandomForestClassifier(random_state=random_state)
    rfc_model.fit(x_train, y_train)
    return rfc_model

def bbc_model_train(x_train, y_train, random_state=42):
    bbc_model = BalancedBaggingClassifier(random_state=random_state)
    bbc_model.fit(x_train, y_train)
    return bbc_model

def brfc_model_train(x_train, y_train, random_state=42):
    brfc_model = BalancedRandomForestClassifier(random_state=random_state)
    brfc_model.fit(x_train, y_train)
    return brfc_model

def eec_model_train(x_train, y_train, random_state=42):
    eec_model = EasyEnsembleClassifier(random_state=random_state)
    eec_model.fit(x_train, y_train)
    return eec_model

def lgbm_model_train(x_train, y_train, random_state=42):
    lgbm_model = LGBMClassifier(random_state=random_state)
    lgbm_model.fit(x_train, y_train)
    return lgbm_model

def catboost_model_train(x_train, y_train, random_state=42):
    catboost_model = CatBoostClassifier(random_state=random_state)
    catboost_model.fit(x_train, y_train, verbose=False)
    return catboost_model

def train_all_models(x_train, y_train, model_name):
    
    if model_name == 'logistic_regression':
        model = logistic_model_train(x_train, y_train)
        
    elif model_name == 'gaussian_naive_bayes':
        model = gnb_model_train(x_train, y_train)
        
    elif model_name == 'support_vector_classifier':
        model = svc_model_train(x_train, y_train)
        
    elif model_name == 'ada_boost':
        model = adaboost_model_train(x_train, y_train)
        
    elif model_name == 'extra_trees_classifier':
        model = etc_model_train(x_train, y_train)

    elif model_name == 'gradient_boosting_classifier':
        model = gbc_model_train(x_train, y_train)
    
    elif model_name == 'hist_gradient_boosting_classifier':
        model = hgbc_model_train(x_train, y_train)
    
    elif model_name == 'random_forest_classifier':
        model = rfc_model_train(x_train, y_train)

    elif model_name == 'balanced_bagging_classifier':
        model = bbc_model_train(x_train, y_train)
        
    elif model_name == 'balanced_random_forest_classifier':
        model = brfc_model_train(x_train, y_train)
        
    elif model_name == 'easy_ensemble_classifier':
        model = eec_model_train(x_train, y_train)

    elif model_name == 'lgbm_classifier':
        model = lgbm_model_train(x_train, y_train)

    elif model_name == 'catboost_classifier':
        model = catboost_model_train(x_train, y_train)

    else:
        print("Check model name")
    
    return model

## Class Balancing Methods

In [10]:
def smote_method(x,y,neighbour):
    # Apply SMOTE
    sm = SMOTE(random_state=42, k_neighbors=neighbour)
    x_new, y_new = sm.fit_resample(x,y)
    
    return x_new, y_new

def adasyn_method(x,y,neighbour):
    adap_synt = ADASYN(random_state=42, n_neighbors=neighbour)
    x_new, y_new = adap_synt.fit_resample(x,y)
    
    return x_new, y_new

def borderline_smote_method(x,y,neighbour):
    border_smote = BorderlineSMOTE(random_state=42, k_neighbors=neighbour)
    x_new, y_new = border_smote.fit_resample(x,y)

    return x_new, y_new

def kmeans_smote_method(x,y,neighbour):
    kmeans_smote = KMeansSMOTE(random_state=42, k_neighbors=neighbour,cluster_balance_threshold=0.2)
    x_new, y_new = kmeans_smote.fit_resample(x,y)

    return x_new, y_new

def smoten_method(x,y,neighbour):
    # Apply SMOTEN
    sm = SMOTEN(random_state=42, k_neighbors=neighbour)
    x_new, y_new = sm.fit_resample(x,y)
    
    return x_new, y_new

def smotenc_method(x,y,neighbour, approach_type):
    
    # File location of the dataset
    data_loc = "./Churn_Modelling.csv"

    # Read the CSV file into a Pandas DataFrame, using the first column as the index
    df = pd.read_csv(data_loc, index_col=0)
    
    df.drop(['CustomerId'], axis = 1,inplace=True)
    
    if approach_type == 1:
    
        x,y = features_target_split(df)

        # Apply SMOTENC
        sm = SMOTENC(categorical_features=[0,2,3],random_state=42, k_neighbors=neighbour)
        x_new, y_new = sm.fit_resample(x,y)

        x_new.drop(['Surname', 'Geography', 'Gender'], axis = 1,inplace=True)
        
        return x_new, y_new
    
    if approach_type == 2:
        x,y = features_target_split(df)
        
        # Split the features and target variables into training and testing sets.
        x_train, x_test, y_train, y_test = train_test_split(x,y,df)

        # Apply SMOTENC
        sm = SMOTENC(categorical_features=[0,2,3],random_state=42, k_neighbors=neighbour)
        x_train_new, y_train_new = sm.fit_resample(x_train,y_train)

        x_train_new.drop(['Surname', 'Geography', 'Gender'], axis = 1,inplace=True)
        
        x_test.drop(['Surname', 'Geography', 'Gender'], axis = 1,inplace=True)
        
        return x_train_new, x_test, y_train_new, y_test 

def svm_smote_method(x,y,neighbour):
    # Apply SVMSMOTE
    sm = SVMSMOTE(random_state=42, k_neighbors=neighbour)
    x_new, y_new = sm.fit_resample(x,y)
    
    return x_new, y_new

def smote_enn_method(x,y,neighbour):
    # Apply SMOTEENN
    sm = SMOTEENN(random_state=42, smote=SMOTE(random_state=42, k_neighbors=neighbour))
    x_new, y_new = sm.fit_resample(x,y)
    
    return x_new, y_new

def smote_tomek_method(x,y,neighbour):
    # Apply SMOTETOMEK
    sm = SMOTETomek(random_state=42, smote=SMOTE(random_state=42, k_neighbors=neighbour))
    x_new, y_new = sm.fit_resample(x,y)
    
    return x_new, y_new

def sampling_method(method_name, neighbour, x_train, y_train):
    
#     print(f"\nUsing {method_name.upper()} :: APPROACH 2 :: ")

    if method_name == 'smote':
        # Apply SMOTE
        x_train_new, y_train_new = smote_method(x_train,y_train,neighbour)

    if method_name == 'adasyn':
        # Apply ADASYN
        x_train_new, y_train_new = adasyn_method(x_train,y_train,neighbour)

    if method_name == 'borderline_smote':
        # Apply Borderline SMOTE
        x_train_new, y_train_new = borderline_smote_method(x_train,y_train,neighbour)

    if method_name == 'kmeans_smote':
        # Apply KMeans SMOTE
        x_train_new, y_train_new = kmeans_smote_method(x_train,y_train,neighbour)

    if method_name == 'smoten':
        # Apply SMOTEN
        x_train_new, y_train_new = smoten_method(x_train,y_train,neighbour)

#     if method_name == 'smotenc':
#         # Apply SMOTENC
#         x_train_new, x_test, y_train_new, y_test = smotenc_method(x_train,y_train,neighbour, approach_type=2)

    if method_name == 'svmsmote':
        # Apply SVMSMOTE
        x_train_new, y_train_new = svm_smote_method(x_train,y_train,neighbour)

    if method_name == 'smoteenn':
        # Apply SMOTEENN
        x_train_new, y_train_new = smote_enn_method(x_train,y_train,neighbour)

    if method_name == 'smotetomek':
        # Apply SMOTETOMEK
        x_train_new, y_train_new = smote_tomek_method(x_train,y_train,neighbour)
    
    
    return x_train_new, y_train_new

## Reading Data

In [8]:
# File location of the dataset
data_loc = "./Churn_Modelling.csv"

# Read the CSV file into a Pandas DataFrame, using the first column as the index
df = pd.read_csv(data_loc, index_col=0)

# df.head()

# Drop all categorical columns
df.drop(['CustomerId', 'Surname','Geography', 'Gender'], axis = 1,inplace=True)
df.head()

Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
RowNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,619,42,2,0.0,1,1,1,101348.88,1
2,608,41,1,83807.86,1,0,1,112542.58,0
3,502,42,8,159660.8,3,1,0,113931.57,1
4,699,39,1,0.0,2,0,0,93826.63,0
5,850,43,2,125510.82,1,1,1,79084.1,0


## Train Test Split

In [9]:
# Split the DataFrame into features and target variables.
x,y = features_target_split(df)

# Split the features and target variables into training and testing sets.
x_train, x_test, y_train, y_test = train_test_split(x,y,df)

## Training

In [11]:
methods_ = ['smote', 'adasyn', 'borderline_smote', 'kmeans_smote', 'smoten' , 'svmsmote', 'smoteenn', 'smotetomek']
# k_values = [273, 1020, 1409, 110, 282, 148, 1, 116]

In [12]:
for model_name in model_list:
    metrics_data = []
    print(model_name.upper())
    
    for mth in methods_:
        if mth == 'kmeans_smote':
            # No cluster are formed beyond this value of k
            stop_range = 339
        
        elif mth == 'smotenc':
            # RAM issue
            stop_range = 150
        
        else:
            stop_range = y_train.value_counts()[1]


        for neighbour in range(1,stop_range):
            x_train_new, y_train_new = sampling_method(mth, neighbour, x_train, y_train)

            model = train_all_models(x_train_new, y_train_new, model_name)

            # Generate predictions
            y_pred_train, y_pred_test, y_pred_test_proba = prediction(model, x_train_new, x_test)

            # Calculate evaluation metrics
            model_evaluation = Evaluation(y_train_new, y_test, y_pred_train, y_pred_test, y_pred_test_proba)

            all_metrics,_ = model_evaluation.main()
            all_metrics.insert(0, model_name)
            all_metrics.insert(1, mth)
            all_metrics.insert(2, neighbour)
    
            if neighbour%1000 == 0 or neighbour == 1:
                print("{:<6} :: Train Acc: {:<14} :: Test Acc: {}".format(neighbour, all_metrics[3], all_metrics[4]))
#                 print('.', end='')
            metrics_data.append(all_metrics)

    metrics_df = pd.DataFrame(metrics_data, columns=metrics_data_columns)
    metrics_df.to_csv(os.path.join(evaluation_files_loc, f'bos_{model_name}_eval_num_data.csv'), header=True, index=False)

GAUSSIAN_NAIVE_BAYES
1      :: Train Acc: 0.716641       :: Test Acc: 0.683
500    :: Train Acc: 0.712009       :: Test Acc: 0.6795
1000   :: Train Acc: 0.713972       :: Test Acc: 0.6805
1500   :: Train Acc: 0.719937       :: Test Acc: 0.68
1      :: Train Acc: 0.714469       :: Test Acc: 0.6605


KeyboardInterrupt: 

In [13]:
metrics_df = pd.DataFrame(metrics_data, columns=metrics_data_columns)
metrics_df

Unnamed: 0,model,sampling_method,k_neighbour,train_accuracy,test_accuracy,roc_auc,precision_0,recall_0,f1_0,precision_1,recall_1,f1_1,ks_stat,p_value,tp,tn,fp,fn
0,gaussian_naive_bayes,smote,1,0.716641,0.6830,0.699481,0.906012,0.671689,0.771449,0.361416,0.727273,0.482871,0.411698,0.0,296,1070,523,111
1,gaussian_naive_bayes,smote,2,0.710126,0.6795,0.692710,0.902027,0.670433,0.769175,0.356618,0.714988,0.475879,0.403646,0.0,291,1068,525,116
2,gaussian_naive_bayes,smote,3,0.711224,0.6770,0.691141,0.901612,0.667294,0.766955,0.354446,0.714988,0.473941,0.403322,0.0,291,1063,530,116
3,gaussian_naive_bayes,smote,4,0.711146,0.6765,0.691742,0.902211,0.666039,0.766342,0.354369,0.717445,0.474411,0.406049,0.0,292,1061,532,115
4,gaussian_naive_bayes,smote,5,0.710126,0.6745,0.690486,0.901877,0.663528,0.764557,0.352657,0.717445,0.472874,0.409457,0.0,292,1057,536,115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1962,gaussian_naive_bayes,adasyn,334,0.710628,0.6700,0.688576,0.901809,0.657250,0.760349,0.349225,0.719902,0.470305,0.391738,0.0,293,1047,546,114
1963,gaussian_naive_bayes,adasyn,335,0.712258,0.6750,0.692629,0.903336,0.662900,0.764663,0.353791,0.722359,0.474960,0.394533,0.0,294,1056,537,113
1964,gaussian_naive_bayes,adasyn,336,0.711404,0.6730,0.690459,0.902314,0.661017,0.763043,0.351741,0.719902,0.472581,0.396416,0.0,293,1053,540,114
1965,gaussian_naive_bayes,adasyn,337,0.706827,0.6725,0.690145,0.902230,0.660389,0.762595,0.351319,0.719902,0.472200,0.391057,0.0,293,1052,541,114


In [None]:
   
# for mth in methods_:
#     print('\n',mth.upper())
#     metrics_data = [] 
#     if mth == 'kmeans_smote':
#         # No cluster are formed beyond this value of k
#         stop_range = 339

#     elif mth == 'smotenc':
#         # RAM issue
#         stop_range = 150

#     else:
#         stop_range = y_train.value_counts()[1]


#     for neighbour in range(1,stop_range):
#         x_train_new, y_train_new = sampling_method(mth, neighbour)
        
#         for model_name in model_list:
# #             print(model_name.upper())

#             model = train_all_models(x_train_new, y_train_new, model_name)

#             # Generate predictions
#             y_pred_train, y_pred_test, y_pred_test_proba = prediction(model, x_train_new, x_test)

#             # Calculate evaluation metrics
#             model_evaluation = Evaluation(y_train_new, y_test, y_pred_train, y_pred_test, y_pred_test_proba)

#             all_metrics,_ = model_evaluation.main()
#             all_metrics.insert(0, model_name)
#             all_metrics.insert(1, mth)
#             all_metrics.insert(2, neighbour)

# #             if neighbour%500 == 0 or neighbour == 1:
# #                 print("{:<40} :: Train Acc: {:<14} :: Test Acc: {}".format(mth.upper(), all_metrics[3], all_metrics[4]))

#             metrics_data.append(all_metrics)
#         print('.',end='')

#     metrics_df = pd.DataFrame(metrics_data, columns=metrics_data_columns)
#     metrics_df.to_csv(f'./all_methods_models/{mth}_models_evaluation_all.csv', header=True, index=False)


 SMOTE
................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................