In [21]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import auc, roc_curve, roc_auc_score, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [22]:
# Load partitions path: /media/william/NVME/projects/malaria-pibiti/1_entrada/partitions
path_partitions = '/media/william/NVME/projects/malaria-pibiti/1_entrada/partitions'
partitions = os.listdir(path_partitions) # 100 partitions Image,Class,Train,Test , 1.csv 2.csv 3.csv ... 100.csv
# order by partition number
partitions.sort(key=lambda x: int(x.split('.')[0]))
print(partitions)

# Path to Image in partition
path_image = '/media/william/NVME/projects/malaria-pibiti/1_entrada/'
dataset_names = [  'Dataset01_100', 'Dataset01_95.0',  'Dataset01_90.0']
""" 'Dataset01_85.0' , 'Dataset01_80.0',
                          'Dataset01_75.0',  'Dataset01_70.0',  'Dataset01_65.0',  'Dataset01_60.0',  'Dataset01_55.0',
                          'Dataset01_50.0',  'Dataset01_45.0',  'Dataset01_40.0','Dataset01_35.0',  'Dataset01_30.0',
                            'Dataset01_25.0',  'Dataset01_20.0',  'Dataset01_15.0',  'Dataset01_10.0',  'Dataset01_5.0'"""

# {'Dataset01__100': path_image+dataset_names[0]}
paths_datasets = {dataset_names[i]: path_image+dataset_names[i] for i in range(len(dataset_names))}


['1.csv', '2.csv', '3.csv', '4.csv', '5.csv', '6.csv', '7.csv', '8.csv', '9.csv', '10.csv', '11.csv', '12.csv', '13.csv', '14.csv', '15.csv', '16.csv', '17.csv', '18.csv', '19.csv', '20.csv', '21.csv', '22.csv', '23.csv', '24.csv', '25.csv', '26.csv', '27.csv', '28.csv', '29.csv', '30.csv', '31.csv', '32.csv', '33.csv', '34.csv', '35.csv', '36.csv', '37.csv', '38.csv', '39.csv', '40.csv', '41.csv', '42.csv', '43.csv', '44.csv', '45.csv', '46.csv', '47.csv', '48.csv', '49.csv', '50.csv', '51.csv', '52.csv', '53.csv', '54.csv', '55.csv', '56.csv', '57.csv', '58.csv', '59.csv', '60.csv', '61.csv', '62.csv', '63.csv', '64.csv', '65.csv', '66.csv', '67.csv', '68.csv', '69.csv', '70.csv', '71.csv', '72.csv', '73.csv', '74.csv', '75.csv', '76.csv', '77.csv', '78.csv', '79.csv', '80.csv', '81.csv', '82.csv', '83.csv', '84.csv', '85.csv', '86.csv', '87.csv', '88.csv', '89.csv', '90.csv', '91.csv', '92.csv', '93.csv', '94.csv', '95.csv', '96.csv', '97.csv', '98.csv', '99.csv', '100.csv']


In [23]:
#Save results

path_results = '/media/william/NVME/projects/malaria-pibiti/6_resultados'

def make_results_folders(path_results, dataset_name, method):
    path_dataset = os.path.join(path_results, dataset_name)
    if not os.path.exists(path_dataset):
        os.mkdir(path_dataset)
    path_method = os.path.join(path_dataset, method)
    if not os.path.exists(path_method):
        os.mkdir(path_method)
    path_metrics = os.path.join(path_method, 'metrics')
    if not os.path.exists(path_metrics):
        os.mkdir(path_metrics)
    path_csvs = os.path.join(path_method, 'csvs')
    if not os.path.exists(path_csvs):
        os.mkdir(path_csvs)
    path_test = os.path.join(path_method, 'test')
    if not os.path.exists(path_test):
        os.mkdir(path_test)
    return path_metrics, path_csvs, path_test

In [24]:
# config video 
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [25]:
methodsNames = [ 'KNN'] #[,'NBayes',  'RandomForest', 'NBayes'] 
input_size = (128, 128)

In [26]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, StackingClassifier, VotingClassifier, HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import NuSVC, SVC, LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from tune_sklearn import TuneGridSearchCV

def train_ml_algorithm(X_train, y_train, methodName):

    search = None
    # KNN
    # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier
    if (methodName == 'KNN'): 
        parameters = {
            "n_neighbors" : [1, 3, 5, 10, 15, 20],
            "weights": ['uniform', 'distance'],
            "algorithm": ['ball_tree', 'kd_tree', 'brute'],
            "leaf_size": [5, 15, 25, 35, 45, 55],
            'p': [10, 20, 40],
            "metric": ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],
            # 'metric_params': [None],
            'n_jobs': [3]
        }
        search  = GridSearchCV( 
            estimator=KNeighborsClassifier(),
            param_grid=parameters,
            verbose=1,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    
    # Decision Tree
    # https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
    elif (methodName == 'DTree'): 
        parameters = {
            'criterion': ["gini", "entropy"],
            'splitter': ['best','random'],
            # 'max_depth': [None],
            # 'min_samples_split': [None],
            # 'min_samples_leaf': [None],
            # 'min_weight_fraction_leaf': [None],
            'max_features': ['auto', 'sqrt', 'log2'],
            # 'random_state': [None],
            # 'max_leaf_nodes': [None],
            # 'min_impurity_decrease': [None],
            'class_weight': [None, 'balanced'],
            # 'ccp_alpha': [None]
        }
        search  = GridSearchCV( 
            estimator=DecisionTreeClassifier(),
            param_grid=parameters,
            scoring='accuracy',
            # use_gpu=True,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            verbose=1,
            n_jobs=-1
        )
        
    
    # SVM Linear
    # https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC
    elif (methodName == "SVMLinear"):  
        parameters = {
            'penalty': ['l1','l2'],
            # 'loss': ['hinge', 'squared_hinge'],
            'dual': [True, False],
            # 'tol': [1],
            'C': np.arange(0.01,100,10),
            'multi_class': ['ovr', 'crammer_singer'],
            'fit_intercept': [True, False],
            # 'intercept_scaling': [None],
            'class_weight': [None, 'balanced'],
            'verbose': [0],
            # 'random_state': [None, 1, 3, 5, 7],
            'max_iter': [1, 10, 50, 100, 200]
        }
        search  = GridSearchCV( 
            estimator=LinearSVC(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # SVM Nu
    # https://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVC.html#sklearn.svm.NuSVC
    elif (methodName == 'SVMNu'):  
        parameters = {
            'nu': np.arange(0.1,1.1,0.1),
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], # 'precomputed' requires an square matrix
            'degree': [1, 2, 3, 4, 5],
            'gamma': ['scale', 'auto'],
            # 'coef0': [None],
            'shrinking': [True, False],
            'probability': [True],
            # 'tol': [None],
            # 'cache_size': [None],
            'class_weight': [None, 'balanced'],
            'verbose': [0],
            'max_iter': [-1],
            'decision_function_shape': ['ovo', 'ovr'],
            'break_ties': [True, False],
            # 'random_state': [None],
        }
        search  = GridSearchCV( 
            estimator=NuSVC(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # SVM C
    # https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
    elif (methodName == 'SVMC'):  
        parameters = {
            'C': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1],
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
            'degree': [1, 2, 3, 4, 5],
            'gamma': ['scale', 'auto'],
            # 'coef0': [1, 2, 3],
            'shrinking': [True, False],
            'probability': [True, False],
            # 'tol': [None],
            # 'cache_size': [None],
            'class_weight': [None, 'balanced'],
            'verbose': [0],
            'max_iter': [-1],
            'probability':[True],
            'decision_function_shape': ['ovo', 'ovr'],
            # 'break_ties': [True, False],
            # 'random_state': [None],
        }
        search  = GridSearchCV( 
            estimator=SVC(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    
    

    # Discriminant Analysis (Linear)
    # https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis
    elif (methodName == 'DAnalysisLinear'): 
        parameters = {
            'solver': ['svd', 'lsqr', 'eigen'],
            'shrinkage': ['auto', 0.2, 0.4, 0.6,0.8, 1],
            # 'priors': [None],
            # 'n_components': [None, 10, 20, 40, 50, 100, 200],
            'store_covariance': [True, False],
            # 'tol': [None],
            # 'covariance_estimator': [None],
        }
        search  = GridSearchCV( 
            estimator=LinearDiscriminantAnalysis(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    
    # SGD
    # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html?highlight=sgd#sklearn.linear_model.SGDClassifier
    elif (methodName == 'SGD'): 
        parameters = {
            # 'penalty': ['l2', 'l1', 'elasticnet'],
            'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
            'epsilon':[0.01, 0.1, 1]
            # 'l1_ratio': [0.10, 0.15, 0.2, 0.25],
            # 'fit_intercept': [True, False],
            # 'max_iter': [1000, 1200,1400],
            # 'shuffle': [True, False],
            # 'early_stopping': [True],
            # 'n_iter_no_change': [3],
        }
        lr = SGDClassifier(loss='hinge')
        search  = TuneGridSearchCV( 
            estimator=lr,
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # Discriminant Analysis (Quadratic)
    # https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.html#sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis
    elif (methodName == 'DAnalysisQuadratic'): 
        parameters = {
            # 'priors': [None],
            'reg_param': [0.1, 0.2, 0.3],
            'store_covariance': [True, False],
            # 'tol': [None],
        }
        search  = GridSearchCV( 
            estimator=QuadraticDiscriminantAnalysis(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # Naive Bayes
    # https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#sklearn.naive_bayes.GaussianNB
    elif (methodName == 'NBayes'): 
        parameters = {
            # 'priors': None,
            "var_smoothing" : np.logspace(0,-9, num=100),
        }
        search  = GridSearchCV( 
            estimator=GaussianNB(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )


    # AdaBoost
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html?highlight=adaboost#sklearn.ensemble.AdaBoostClassifier
    elif (methodName == 'AdaBoost'): 
        parameters = {
            # 'base_estimator': [None],
            "n_estimators" : [10, 50, 100, 150],
            'learning_rate': [0.5, 1, 1.5],
            'algorithm': ['SAMME', 'SAMME.R'],
            # 'random_state': [3,5,7,9],
        }
        search  = GridSearchCV( 
            estimator=AdaBoostClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # RandomForest
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html?highlight=random#sklearn.ensemble.RandomForestClassifier
    elif (methodName == 'RandomForest'): 
        parameters = {
            "n_estimators" : [10, 100, 1000],
            'criterion': ["gini", "entropy"],
            'max_depth': [None],
            "max_features" : ['sqrt', 'log2'],
            'verbose': [0],
            'class_weight': ['balanced','balanced_subsample'],
        }
        search  = GridSearchCV( 
            estimator=RandomForestClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    # Bagging
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html#sklearn.ensemble.BaggingClassifier
    elif (methodName == 'Bagging'): 
        parameters = {
            "n_estimators" : [10, 50, 100, 150, 100],
            'verbose': [0],
        }
        search  = GridSearchCV( 
            estimator=BaggingClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    # ExtraTreesClassifier
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html#sklearn.ensemble.ExtraTreesClassifier
    elif (methodName == 'ExtraTrees'): 
        parameters = {
            "n_estimators" : [10, 50, 100, 150, 100],
            'criterion': ["gini", "entropy"],
            "max_features" : ['sqrt', 'log2'],
            'class_weight': ['balanced', 'balanced_subsample'],
            'verbose': [0],
        }
        search  = GridSearchCV( 
            estimator=ExtraTreesClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )
    # GradientBoostingClassifier
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier
    elif (methodName == 'GradientBoosting'): 
        parameters = {
            'loss': ['deviance', 'exponential'],
            "n_estimators" : [10, 50, 100, 150, 100],
            'criterion': ["squared_error" ],
            "max_features" : ['sqrt', 'log2'],
            'verbose': [0],
        }
        search  = GridSearchCV( 
            estimator=GradientBoostingClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # StackingClassifier
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html#sklearn.ensemble.StackingClassifier
    elif (methodName == 'Stacking'): 
        parameters = {
            'stack_method': ['predict_proba', 'decision_function', 'predict'],
            "passthrough" : [True, False],
            'verbose': [0],
        }
        clf1 = KNeighborsClassifier(n_neighbors=1)
        clf2 = RandomForestClassifier(random_state=1)
        clf3 = GaussianNB()
        lr = LogisticRegression()
        search  = GridSearchCV( 
            estimator=StackingClassifier(estimators=[clf1, clf2, clf3],  
                          final_estimator=lr),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # VotingClassifier
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn.ensemble.VotingClassifier
    elif (methodName == 'Voting'): 
        parameters = {
            'voting': ['hard', 'soft'],
            "flatten_transform" : [True, False],
            'verbose': [0],
        }
        search  = GridSearchCV( 
            estimator=VotingClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    # HistGradientBoostingClassifier
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html#sklearn.ensemble.HistGradientBoostingClassifier
    elif (methodName == 'HistGradient'): 
        parameters = {
            'loss': ['binary_crossentropy', 'categorical_crossentropy'],
        }
        search  = GridSearchCV( 
            estimator=HistGradientBoostingClassifier(),
            param_grid=parameters,
            cv=ShuffleSplit(test_size=0.01, n_splits=1, random_state=0),
            scoring='accuracy',
            n_jobs=-1
        )

    else:
        results = None
    
    
    if (search != None):
        results = search.fit(X_train, y_train)

    return results
    

In [27]:
# configure colors
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'


In [28]:
runtimeTrain = 0
runtimeTest = 0

# Configure personal metrics
def specificity(tn, fp):
    return tn / (tn + fp)

# Negative Predictive Error
def npv(tn, fn):
    return tn / (tn + fn + 1e-7)

# Matthews Correlation_Coefficient
def mcc(tp, tn, fp, fn):
    num = tp * tn - fp * fn
    den = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
    return num / np.sqrt(den + 1e-7)


def calculateMeasures(Y_pred, Y_true, Yscores, y_pred, y_true, yscores, folder, methodName, thresh, save_metrics_path):
    metrics = pd.DataFrame()
    tn, fp, fn, tp = confusion_matrix(Y_true, Y_pred, labels=[0,1]).ravel()
    #fpr, tpr, _ = roc_curve(y_true, scores, pos_label=2)
    auc_val = roc_auc_score(Y_true, Yscores)

    metrics['folder'] = [folder]
    metrics['network'] = [methodName]

    # Train RESULTS
    metrics['accuracy'] = [accuracy_score(Y_true, Y_pred)]
    metrics['precision'] = [precision_score(Y_true, Y_pred)]
    metrics['sensitivity'] = [recall_score(Y_true, Y_pred)]
    metrics['specificity'] = [specificity(tn,fp)]
    metrics['fmeasure'] = [f1_score(Y_true, Y_pred)]
    metrics['npv'] = [npv(tn, fn)]
    metrics['mcc'] = [mcc(tp, tn, fp, fn)]
    metrics['auc'] = [auc_val]
    metrics['tn'] = [tn]
    metrics['fp'] = [fp]
    metrics['fn'] = [fn]
    metrics['tp'] = [tp]
    metrics['runtime'] = [runtimeTrain]

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    #fpr, tpr, _ = roc_curve(y_true, scores, pos_label=2)
    auc_val = roc_auc_score(y_true, yscores)

    # Test RESULTS
    metrics['val_accuracy'] = [accuracy_score(y_true, y_pred)]
    metrics['val_precision'] = [precision_score(y_true, y_pred)]
    metrics['val_sensitivity'] = [recall_score(y_true, y_pred)]
    metrics['val_specificity'] = [specificity(tn,fp)]
    metrics['val_fmeasure'] =[f1_score(y_true, y_pred)]
    metrics['val_npv'] = [npv(tn, fn)]
    metrics['val_mcc'] = [mcc(tp, tn, fp, fn)]
    metrics['val_auc'] = [auc_val]
    metrics['val_tn'] = [tn]
    metrics['val_fp'] = [fp]
    metrics['val_fn'] = [fn]
    metrics['val_tp'] = [tp]
    metrics['val_runtime'] = [runtimeTest]

    print(bcolors.FAIL + 'ACC: %.2f' %(100*metrics['val_accuracy'][0]) + ' AUC: %.2f' %(100*metrics['val_auc'][0]) + bcolors.ENDC)

    if os.path.exists(os.path.join(save_metrics_path, methodName + str(thresh*100) + '.csv')):
        metrics.to_csv(os.path.join(save_metrics_path, methodName + str(thresh*100) + '.csv'), sep=',', mode='a', index=False, header=False)
    else:
        metrics.to_csv(os.path.join(save_metrics_path, methodName + str(thresh*100) + '.csv'), sep=',', index=False)  

In [29]:
import csv
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from PIL import Image

def load_dataset(n_classes: int, path_partition_name: str, data_set_name: str) -> tuple:
      TrainImages, TestImages, TrainLabels, TestLabels = list(), list(), list(), list()

      
      try:
          with open(path_partition_name, 'r') as file:
              csvreader = csv.reader(file)
              next(csvreader)  # Skip header if present
              for row in csvreader:
                  if row[2] == 'True':   # Train sample
                      TrainImages.append(row[0])
                      TrainLabels.append(int(row[1]))
                  elif row[3] == 'True': # Test sample
                      TestImages.append(row[0])
                      TestLabels.append(int(row[1]))

          TrainImages = [select_image(paths_datasets[data_set_name] + '/' + img) for img in TrainImages]
          TestImages = [select_image(paths_datasets[data_set_name] + '/' + img) for img in TestImages]
          print(sum(TrainLabels))
          print(sum(TestLabels))
          TrainLabels = to_categorical(np.array(TrainLabels), num_classes=n_classes)
          TestLabels = to_categorical(np.array(TestLabels), num_classes=n_classes)

          return np.array(TrainImages)/255.0, TrainLabels, np.array(TestImages)/255.0, TestLabels

      except Exception as e:
          print(f"Error processing dataset: {e}")
          return None, None, None, None
      
def select_image(path_img_filename):
    """
    Select and preprocess a single image.
    """
    try:
        image = Image.open(path_img_filename)
        image = np.asarray(image.convert('RGB'))
        image = tf.image.resize_with_crop_or_pad(image, input_size[0], input_size[1])
        image = tf.image.resize(image, [input_size[0], input_size[1]])
        return np.asarray(image) 
    except IOError:
        print(f"Error opening {path_img_filename}.")
        return None

In [30]:
for network in methodsNames:
    for partition in partitions:
        for dataset_name in dataset_names:
            print(f"Training {network} on partition {partition} with dataset {dataset_name}...")
            path_metrics, path_csvs, path_test = make_results_folders(path_results, dataset_name, network)
            path_partition = os.path.join(path_partitions, partition)
            X_train, y_train, X_test, y_test = load_dataset(2, path_partition, dataset_name)
            if X_train is None:
                continue
            results = train_ml_algorithm(X_train, y_train, network)
            runtimeTrain = results.refit_time_

            Y_pred = results.predict(X_train)
            Yscores = results.predict_proba(X_train)[:,1]
            y_pred = results.predict(X_test)
            yscores = results.predict_proba(X_test)[:,1]

            calculateMeasures(Y_pred, y_train, Yscores, y_pred, y_test, yscores, partition, network, 0.2, path_metrics)
            runtimeTest = results.score(X_test, y_test)
            print(f"Runtime: {runtimeTest}")
            print(f"Finished training {network} on partition {partition} with dataset {dataset_name}.")

Training KNN on partition 1.csv with dataset Dataset01_100...


2373
628
Fitting 1 folds for each of 2592 candidates, totalling 2592 fits


  pid = os.fork()


KeyboardInterrupt: 