In [None]:
import os
from datetime import datetime
import pandas as pd

from sklearn.linear_model import LogisticRegression, RidgeClassifier, Perceptron, SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_validate
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [None]:
def split_vector_to_columns(data):
    col_names = ['feature'+str(i) for i in range(len(data.vector[0]))]
    X = pd.DataFrame(data.vector.tolist(), columns=col_names)
    return X, data.label.to_numpy()

def data_preparation(data_name):
    data = pd.read_pickle(data_name)
    X, y = split_vector_to_columns(data)
    scaler = preprocessing.StandardScaler().fit(X)
    X = scaler.transform(X)
    return X, y

In [None]:
list_models_default_params = [LogisticRegression, RidgeClassifier, svm.SVC, SGDClassifier, Perceptron,
                              GaussianNB, 
                              DecisionTreeClassifier,
                              BaggingClassifier, AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier, 
                              KNeighborsClassifier,
                              MLPClassifier]
scoring = {'accuracy': 'accuracy',
        'f1_score': 'f1',
        'precision': 'precision',
        'recall': 'recall'}

In [None]:
list_parameters = {LogisticRegression.__name__: {'max_iter': [600]},
                Perceptron.__name__: {'alpha': [0.0001, 0.001, 0.01, 0.1]},
                RidgeClassifier.__name__: {'alpha': [0.01, 0.05, 0.1, 0.5, 1, 5]},
                svm.SVC.__name__: {'kernel': ['rbf', 'linear', 'poly']},
                SGDClassifier.__name__: {'alpha': [0.0001, 0.001, 0.01]},
                GaussianNB.__name__: {},
                DecisionTreeClassifier.__name__: {'min_samples_split': [10, 20, 30], 'min_samples_leaf': [10, 20, 30]},
                BaggingClassifier.__name__: {'n_estimators': [5,10,20]},
                AdaBoostClassifier.__name__: {'n_estimators': [10,25,50,75], 'learning_rate': [0.1, 0.5, 1, 5]},
                RandomForestClassifier.__name__: {'min_samples_split': [10, 20, 30], 'min_samples_leaf': [10, 20, 30]},
                #GradientBoostingClassifier.__name__: {},
                GradientBoostingClassifier.__name__: {'learning_rate': [0.01, 0.1, 0.5], 'min_samples_split': [5, 10], 'min_samples_leaf': [5, 10]},
                KNeighborsClassifier.__name__: {'n_neighbors': [3, 5, 7]},
                MLPClassifier.__name__: {'hidden_layer_sizes': [(100,), (100,100,), (100,10,)],'alpha': [0.0001, 0.001, 0.01]}
                }

In [None]:
def compute_model_performance(list_models_GS, X, y, scoring, cv=5, train=True, file_name="", save_dir=None, verbose=True):
    """
    For each model in the list_models, compute validation score for all metrics in scoring
    For each model, return the average cross validated score for each metric
    """
    result = {}
    idx_names = []
    tot = len(list_models_GS)
    for i, model in enumerate(list_models_GS):
        if verbose:
            print(f"Evaluating model {(i+1)}/{tot}: {type(model).__name__}...")
        idx_names.append(type(model).__name__)
        scores = cross_validate(model, X, y, scoring=scoring, cv=cv, return_train_score=train)
        for key, _ in scores.items():
            score = scores[key].mean()
            if key not in result:
                result[key] = [score]
            else: 
                result[key].append(score)
    output = pd.DataFrame(result, index=idx_names)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
    save_dir = "." if save_dir is None else save_dir
    output_file = os.path.join(save_dir, "Exp_result-{}-cv={}.log".format(file_name, cv))
    with open(output_file, "w") as f:
        f.write(output.to_string(index=True))
    return output

In [None]:
def compute_GS(list_models, X, y, list_parameters, scoring = "f1", n_jobs=-1, cv=5, verbose=True):
    """
    For each model in list_models, perform grid search and return the model instance initiated with the best performing parameter based on scoring
    """
    def compute_GS_one_model(model, X, y, params, scoring = "f1", n_jobs=-1, cv=5):
        grid= GridSearchCV(model, params, scoring = scoring, n_jobs=n_jobs, cv=cv)
        grid.fit(X, y)
        return grid.best_params_
    
    list_models_GS = []
    tot = len(list_models)
    for i, model in enumerate(list_models):
        params = list_parameters[model.__name__]
        if verbose:
            print(f"Grid search model {(i+1)}/{tot}: {model.__name__}...")
        if params != {}:
            p = compute_GS_one_model(model(), X, y, params, scoring=scoring, n_jobs=n_jobs, cv=cv)
            model_GS = model(**p)
        else:
            model_GS = model()
        list_models_GS.append(model_GS)
    return list_models_GS

In [None]:
# import multiprocessing
# def compute_GS(list_models, X, y, list_parameters, scoring = "accuracy", cv=5):
#     def compute_GS_one_model(model, X, y, params, scoring = "accuracy", cv=5):
#         grid= GridSearchCV(model, params, scoring = scoring, cv=cv)
#         grid.fit(X, y)
#         return grid.best_params_
    
#     def compute_GS_one_model_wrapper(args):
#         return compute_GS_one_model(*args)
    
#     pool = multiprocessing.Pool()
#     results = []
#     for model in list_models:
#         param = list_parameters[model.__name__]
#         if param != {}:
#             args = (model(), X, y, param, scoring, cv)
#             results.append(pool.apply_async(compute_GS_one_model_wrapper, (args,)))
#         else:
#             model_GS = model()
#             results.append(model_GS)
#     list_models_GS = []
#     for result in results:
#         if isinstance(result, multiprocessing.pool.ApplyResult):
#             best_params = result.get()
#             model_GS = model(**best_params)
#         else:
#             model_GS = result
#         list_models_GS.append(model_GS)
#     pool.close()
#     pool.join()
#     return list_models_GS

In [29]:
SAVE_DIR = './experiment_results'
CV = [3,5,8]
FILENAMES = ['sbert', 'bow', 'sent2vec', 'tfidf', 'Turbo_sbert', 'Turbo_bow', 'Turbo_sent2vec', 'Turbo_tfidf']

tot = len(FILENAMES)
for i, file in enumerate(FILENAMES):
    SEARCH_FILENAME = f"./dataset/vector_300_{file}.pickle"
    TRAIN_FILENAME = f"./dataset/vector_3000_{file}.pickle"
    RESULT_NAME = TRAIN_FILENAME.split("/")[2].split(".")[0]
    print(f"({i}/{tot}: PROCESSING {file}...")
    print("Step 1: read small dataset")
    X_search, y_search = data_preparation(SEARCH_FILENAME)
    print("Step 2: read large dataset and compute performance")
    X_train, y_train = data_preparation(TRAIN_FILENAME)
    for c in CV:
        print(f"cross validation = {c}")
        print("Step 3: hyperparameter search")
        list_models_GS = compute_GS(list_models_default_params, X_search, y_search, list_parameters, cv=c)
        print(list_models_GS)
        print("Step 4: compute model performance")
        r = compute_model_performance(list_models_GS, X_train, y_train, cv=c, scoring=scoring, train=True, file_name=RESULT_NAME, save_dir=SAVE_DIR)


(0/8: PROCESSING sbert...
Step 1: read small dataset
Step 2: read large dataset and compute performance
cross validation = 3
Step 3: hyperparameter search
Grid search model 1/13: LogisticRegression...
Grid search model 2/13: RidgeClassifier...
Grid search model 3/13: SVC...
Grid search model 4/13: SGDClassifier...
Grid search model 5/13: Perceptron...
Grid search model 6/13: GaussianNB...
Grid search model 7/13: DecisionTreeClassifier...
Grid search model 8/13: BaggingClassifier...
Grid search model 9/13: AdaBoostClassifier...
Grid search model 10/13: RandomForestClassifier...
Grid search model 11/13: GradientBoostingClassifier...
Grid search model 12/13: KNeighborsClassifier...
Grid search model 13/13: MLPClassifier...




[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=30), BaggingClassifier(), AdaBoostClassifier(learning_rate=1, n_estimators=10), RandomForestClassifier(min_samples_leaf=20, min_samples_split=30), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=10), KNeighborsClassifier(), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...
Eva



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.5), RandomForestClassifier(min_samples_leaf=20, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=3), MLPClassifier(hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=20), BaggingClassifier(n_estimators=5), AdaBoostClassifier(learning_rate=0.5, n_estimators=25), RandomForestClassifier(min_samples_leaf=20, min_samples_split=20), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=7), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: Gradi



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.5, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=5), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifie



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=10), BaggingClassifier(n_estimators=5), AdaBoostClassifier(learning_rate=0.5, n_estimators=25), RandomForestClassifier(min_samples_leaf=10, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=5), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13:



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(), SGDClassifier(), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=10), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=5, n_estimators=25), RandomForestClassifier(min_samples_leaf=10, min_samples_split=20), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=5), KNeighborsClassifier(), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=20), BaggingClassifier(n_estimators=5), AdaBoostClassifier(learning_rate=0.1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=30), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=10), KNeighborsClassifier(), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 10))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...
Evaluating model 12/13: KNeighborsClassifier...
Evaluating model 13/13: MLPClassifier...
cross validation = 8
Step 3: hyperparameter search
Grid search model 1/13: LogisticRegression...
Grid search model 2/13: RidgeClassifier...
Grid search model 3/13: SVC...
Grid search model 4/13: SGDClassifier...
Grid search model 5/13: Perceptron...
Grid search model 6/13: GaussianNB...
Grid search model 7/13: DecisionTreeClassifier...
Grid search model 8/13: BaggingClassifier...
Grid search model 9/13: AdaBoostClassifier...
Grid search model 10/13: RandomForestClassifier...
Grid search model 11/13: GradientBoostingClas



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=20), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=10), KNeighborsClassifier(), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 10))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...
Evaluating model 12/13: KNeighborsClassifier...
Evaluating model 13/13: MLPClassifier...
(3/8: PROCESSING tfidf...
Step 1: read small dataset
Step 2: read large dataset and compute performance
cross validation = 3
Step 3: hyperparameter search
Grid search model 1/13: LogisticRegression...
Grid search model 2/13: RidgeClassifier...
Grid search model 3/13: SVC...
Grid search model 4/13: SGDClassifier...
Grid search model 5/13: Perceptron...
Grid search model 6/13: GaussianNB...
Grid search model 7/13: DecisionTreeClassifier...
Grid search model 8/13: BaggingClassifier...
Grid search model 9/13: AdaBoostClassi



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=30, min_samples_split=10), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=1, n_estimators=25), RandomForestClassifier(min_samples_leaf=10, min_samples_split=30), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=5), KNeighborsClassifier(n_neighbors=7), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassif



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=30, min_samples_split=10), BaggingClassifier(), AdaBoostClassifier(learning_rate=1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=20), GradientBoostingClassifier(min_samples_leaf=5, min_samples_split=10), KNeighborsClassifier(n_neighbors=7), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClass



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.5), SVC(), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.1, n_estimators=25), RandomForestClassifier(min_samples_leaf=20, min_samples_split=20), GradientBoostingClassifier(min_samples_leaf=10, min_samples_split=5), KNeighborsClassifier(n_neighbors=7), MLPClassifier(alpha=0.001, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: Gradi



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=20), BaggingClassifier(n_estimators=5), AdaBoostClassifier(learning_rate=0.5, n_estimators=10), RandomForestClassifier(min_samples_leaf=20, min_samples_split=30), GradientBoostingClassifier(min_samples_leaf=10, min_samples_split=10), KNeighborsClassifier(n_neighbors=7), MLPClassifier(alpha=0.001)]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassif



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=10), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=30), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=5), KNeighborsClassifier(n_neighbors=3), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.5, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=5), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13:



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=10), BaggingClassifier(), AdaBoostClassifier(learning_rate=1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=20), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassi



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=5), SVC(kernel='linear'), SGDClassifier(), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=10), BaggingClassifier(n_estimators=5), AdaBoostClassifier(learning_rate=0.1), RandomForestClassifier(min_samples_leaf=20, min_samples_split=20), GradientBoostingClassifier(min_samples_leaf=10, min_samples_split=10), KNeighborsClassifier(n_neighbors=7), MLPClassifier(alpha=0.01)]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...
Evaluating model 12/13



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=1), SVC(kernel='linear'), SGDClassifier(), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.1, n_estimators=25), RandomForestClassifier(min_samples_leaf=20, min_samples_split=20), GradientBoostingClassifier(min_samples_leaf=10, min_samples_split=5), KNeighborsClassifier(n_neighbors=7), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassifier...
Evaluating mode



[LogisticRegression(max_iter=600), RidgeClassifier(alpha=1), SVC(kernel='linear'), SGDClassifier(alpha=0.01), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=20, min_samples_split=10), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=0.5, n_estimators=25), RandomForestClassifier(min_samples_leaf=10, min_samples_split=20), GradientBoostingClassifier(min_samples_leaf=10, min_samples_split=10), KNeighborsClassifier(n_neighbors=7), MLPClassifier(alpha=0.001)]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomForestClassifier...
Evaluating model 11/13: GradientBoostingClassif



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=30), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=1), RandomForestClassifier(min_samples_leaf=10, min_samples_split=20), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.01, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifie



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=10), BaggingClassifier(n_estimators=20), AdaBoostClassifier(learning_rate=1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=10), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=5,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=3), MLPClassifier(alpha=0.001, hidden_layer_sizes=(100, 100))]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13:



Grid search model 13/13: MLPClassifier...
[LogisticRegression(max_iter=600), RidgeClassifier(alpha=0.01), SVC(kernel='linear'), SGDClassifier(alpha=0.001), Perceptron(), GaussianNB(), DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=20), BaggingClassifier(), AdaBoostClassifier(learning_rate=1, n_estimators=75), RandomForestClassifier(min_samples_leaf=10, min_samples_split=30), GradientBoostingClassifier(learning_rate=0.5, min_samples_leaf=10,
                           min_samples_split=10), KNeighborsClassifier(n_neighbors=3), MLPClassifier()]
Step 4: compute model performance
Evaluating model 1/13: LogisticRegression...
Evaluating model 2/13: RidgeClassifier...
Evaluating model 3/13: SVC...
Evaluating model 4/13: SGDClassifier...
Evaluating model 5/13: Perceptron...
Evaluating model 6/13: GaussianNB...
Evaluating model 7/13: DecisionTreeClassifier...
Evaluating model 8/13: BaggingClassifier...
Evaluating model 9/13: AdaBoostClassifier...
Evaluating model 10/13: RandomFor