In [71]:
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline

# Para mostrar el pipeline
from sklearn import set_config

set_config(display='diagram')
sns.set()

In [72]:
df = pd.read_csv("sonar.csv")
df.head()

Unnamed: 0,Freq_1,Freq_2,Freq_3,Freq_4,Freq_5,Freq_6,Freq_7,Freq_8,Freq_9,Freq_10,...,Freq_52,Freq_53,Freq_54,Freq_55,Freq_56,Freq_57,Freq_58,Freq_59,Freq_60,Label
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [73]:
df['Target'] = df['Label'].map({
    'R': 0,
    'M': 1
})
X = df.drop(['Target', 'Label'], axis=1)
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [74]:
def calc_predictions(model):
    df_results = pd.DataFrame(
        columns=['model_name', 'accuracy', 'precision', 'recall_sensitivity', 'recall_specificity', 'f1', 'auc'])
    # entrenamiento y predicciones
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # classification metrics
    model_name = model.__class__.__name__
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall_sensitivity = recall_score(y_test, y_pred)
    recall_specificity = recall_score(y_test, y_pred, pos_label=0)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)

    # guardar resultados
    row = [model_name, accuracy, precision, recall_sensitivity, recall_specificity, f1, auc]
    df_results.loc[len(df_results)] = row
    return df_results

# Pipeline 2-3 pasos

In [75]:
# Modelado base
pipeline = Pipeline([
    ('lor', LogisticRegression()), # clasificación
])
pipeline.fit(X_train, y_train)
calc_predictions(pipeline)

Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.809524,0.810811,0.857143,0.75,0.833333,0.803571


In [76]:
from sklearn.feature_selection import SelectKBest
from sklearn.preprocessing import MinMaxScaler

pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    # ('univ_select', SelectKBest(k=20)),
    # ('scaler', StandardScaler()),
    ('lor', LogisticRegression()),
])
pipeline

In [77]:
pipeline.fit(X_train, y_train)
calc_predictions(pipeline)

Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.825397,0.852941,0.828571,0.821429,0.84058,0.825


# FeatureUnion

In [78]:
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA

union = FeatureUnion([
    ("univ_select", SelectKBest(k=5)),
    ("pca", PCA(n_components=20))
])
union.fit(X, y)
X_selected = union.transform(X)
print("X.shape ", X.shape)
print("X_selected.shape ", X_selected.shape)

X.shape  (208, 60)
X_selected.shape  (208, 25)


In [79]:
from sklearn.decomposition import TruncatedSVD

pipeline = Pipeline([
    ("feature_union", FeatureUnion([
        ("univ_select", SelectKBest(k=5)),
        ("pca", PCA(n_components=20)),
        ("svd", TruncatedSVD(n_components=20)),
    ])),
    ('scaler', MinMaxScaler()),
    ('lor', LogisticRegression())
])
pipeline

In [80]:
pipeline.fit(X_train, y_train)
calc_predictions(pipeline)

Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.857143,0.842105,0.914286,0.785714,0.876712,0.85


# Pipeline + GridSearchCV

In [81]:
pipeline = Pipeline([
    ("feature_union", FeatureUnion([
        ("univ_select", SelectKBest()),
        ("pca", PCA()),
        ("svd", TruncatedSVD()),
    ])),
    ('scaler', MinMaxScaler()),
    ('lor', LogisticRegression())
])
params = {
    'lor__C': [10**-2, 10**-1, 10**0, 10**1, 10**2],
    'lor__penalty': ['l1', 'l2', 'elasticnet'],
    'lor__class_weight': [None, 'balanced', {0:1, 1:5, 2:1}],
    'feature_union__univ_select__k': [5, 7, 10],
    'feature_union__pca__n_components': [10, 15, 20, 25],
    'feature_union__svd__n_components': [10, 15, 20, 25]
}

In [82]:
%%time
grid_model = GridSearchCV(pipeline, params, scoring='f1', n_jobs=4).fit(X_train, y_train)

CPU times: total: 5.55 s
Wall time: 29.5 s


7200 fits failed out of a total of 10800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3600 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Python310\lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Python310\lib\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "c:\Python310\lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Python310\

In [83]:
grid_model.best_params_

{'feature_union__pca__n_components': 20,
 'feature_union__svd__n_components': 20,
 'feature_union__univ_select__k': 10,
 'lor__C': 100,
 'lor__class_weight': {0: 1, 1: 5, 2: 1},
 'lor__penalty': 'l2'}

In [84]:
grid_model.best_score_

0.7904953560371517

In [85]:
best_pipeline = grid_model.best_estimator_
best_pipeline # objeto pipeline con todos los parámetros y todo ya configurado

In [86]:
calc_predictions(best_pipeline)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.857143,0.795455,1.0,0.678571,0.886076,0.839286


# ColumnTransformer

Clase ColumnTransformer para aplicar transformaciones a columnas específicas

In [87]:
df = pd.read_csv("titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Prefix,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,Braund,Mr. Owen Harris,male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,Cumings,Mrs. John Bradley (Florence Briggs Thayer),female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,Heikkinen,Miss. Laina,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,Futrelle,Mrs. Jacques Heath (Lily May Peel),female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,Allen,Mr. William Henry,male,35.0,0,0,373450,8.05,,S


In [88]:
X = df.drop(['Survived', 'PassengerId', 'Name', 'Ticket'], axis=1, errors='ignore')
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [89]:
X.head()

Unnamed: 0,Pclass,Prefix,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,3,Braund,male,22.0,1,0,7.25,,S
1,1,Cumings,female,38.0,1,0,71.2833,C85,C
2,3,Heikkinen,female,26.0,0,0,7.925,,S
3,1,Futrelle,female,35.0,1,0,53.1,C123,S
4,3,Allen,male,35.0,0,0,8.05,,S


In [90]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Prefix           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [91]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import QuantileTransformer, OneHotEncoder
from sklearn.impute import SimpleImputer

# columnas numericas
pipeline_num = Pipeline([
    ("imputer_mean", SimpleImputer(strategy='mean')),
    ("scaler", MinMaxScaler()),
    ("power", QuantileTransformer(output_distribution='normal'))
    # ("power", PowerTransformer()),
    # ("power", PowerTransformer(method='box-cox')),
])

# columnas categoricas
pipeline_cat = Pipeline([
    ("imputer_constant", SimpleImputer(strategy='constant', fill_value='X')),
    ("encoder", OneHotEncoder(drop='first', handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ("numeric", pipeline_num, ["Age", "Fare"]),
    ("categorical", pipeline_cat, ["Sex", "Cabin", "Embarked"])
])
preprocessor

In [92]:
from sklearn.pipeline import make_pipeline

# Otra forma de crear un Pipeline
pipeline = make_pipeline(preprocessor, LogisticRegression())
pipeline

In [93]:
print(df.columns)

Index(['PassengerId', 'Survived', 'Pclass', 'Prefix', 'Name', 'Sex', 'Age',
       'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


In [94]:
calc_predictions(pipeline)



Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.783582,0.738739,0.738739,0.815287,0.738739,0.777013


# Custom Transformer

Crear transformadores personalizados.

In [95]:
from sklearn.base import BaseEstimator, TransformerMixin


class Debugger(BaseEstimator, TransformerMixin):

    # constructor
    def __init__(self, title):
        self.title = title

    def transform(self, data):
        # print(self.title, "\n", pd.DataFrame(data).isnull().sum())
        # print(pd.DataFrame(data).shape)
        # print("====================")
        print(self.title, " ", pd.DataFrame(data).shape)
        print("====================")
        return data

    def fit(self, data, y=None, **fit_params_steps):
        return self

In [96]:
class FamilySizeFeature(BaseEstimator, TransformerMixin):
    def transform(self, X, y=None):
        X_copy = X.copy()
        # TODO - se pueden detectar si hay nulos pero lo más idóneo es en otro paso del Pipeline
        X_copy["FamilySize"] = X_copy["SibSp"] + X_copy["Parch"] + 1
        return X_copy

    def fit(self, data, y=None, **fit_params_steps):
        return self


In [97]:
class OutlierRemover(BaseEstimator, TransformerMixin):
    def __init__(self, factor=1.5):
        self.factor = factor

    def outlier_detector(self, X, y=None):
        X = pd.Series(X).copy()
        q1 = X.quantile(0.25)
        q3 = X.quantile(0.75)
        iqr = q3 - q1
        self.lower_bound.append(q1 - (self.factor * iqr))
        self.upper_bound.append(q3 + (self.factor * iqr))

    def fit(self, X, y=None):
        self.lower_bound = []
        self.upper_bound = []
        X.apply(self.outlier_detector)
        return self

    def transform(self, X, y=None):
        X = pd.DataFrame(X).copy()
        for i in range(X.shape[1]):
            x = X.iloc[:, i].copy()
            x[(x < self.lower_bound[i]) | (x > self.upper_bound[i])] = np.nan
            X.iloc[:, i] = x
        return X

In [98]:
class ClassifierModelAdapter(BaseEstimator):
    def __init__(self, model=LogisticRegression()):
        self.model = model

    def fit(self, X, y=None, **kwargs):
        print("Fitting estimator: ", self.model.__class__.__name__)
        self.model.fit(X, y)
        return self

    def predict(self, X, y=None):
        print("Predicting with estimator: ", self.model.__class__.__name__)
        return self.model.predict(X)

    def predict_proba(self, X):
        return self.model.predict_proba(X)

    def score(self, X, y):
        return self.model.score(X, y)


TODO

Probar a crear un transformador personalizado para procesar las columnas: Title, Cabin, Ticket

In [100]:
# columnas numericas
pipeline_num = Pipeline([
    # ("debugger1", Debugger("Numeric before imputer")),
    ("outlier_remover", OutlierRemover()),
    ("imputer_mean", SimpleImputer(strategy='mean')),
    # ("debugger2", Debugger("Numeric after imputer")),
    ("scaler", MinMaxScaler()),
    # ("debugger2", Debugger("Numeric after imputer")),
    ("power", QuantileTransformer(output_distribution='normal'))
    # ("power", PowerTransformer()),
    # ("power", PowerTransformer(method='box-cox')),
])

# columnas categoricas
pipeline_cat = Pipeline([
    ("imputer_constant", SimpleImputer(strategy='constant', fill_value='X')),
    ("encoder", OneHotEncoder(drop='first', handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ("numeric", pipeline_num, ["Age", "Fare"]),
    ("categorical", pipeline_cat, ["Sex", "Cabin", "Embarked"])
])

# pipeline = make_pipeline(
#     Debugger("before familysize creation"),
#     FamilySizeFeature(),
#     Debugger("after familysize creation"),
#     preprocessor,
#     LogisticRegression()
# )
pipeline = make_pipeline(
    Debugger("before familysize creation"),
    FamilySizeFeature(),
    Debugger("after familysize creation"),
    preprocessor,
    ClassifierModelAdapter(), # por defecto carga la regresión logística
    # ClassifierModelAdapter(RandomForestClassifier())
)
pipeline

In [101]:
calc_predictions(pipeline)

before familysize creation   (623, 9)
after familysize creation   (623, 10)
Fitting estimator:  LogisticRegression
before familysize creation   (268, 9)
after familysize creation   (268, 10)
Predicting with estimator:  LogisticRegression




Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.783582,0.738739,0.738739,0.815287,0.738739,0.777013


# Guardar el pipeline

1. Guardar el pipeline a un archivo
2. Cargar el pipeline desde un archivo
3. Realizar una predicción con el pipeline cargado

In [103]:
import joblib

# Guardar el modelo
joblib.dump(pipeline, '../pipeline.pkl')
# Guardar columnas
joblib.dump(list(X.columns), '../pipeline_columns.pkl')

['../pipeline_columns.pkl']

In [104]:
# cargar el modelo en memoria
pipeline_saved = joblib.load('../pipeline.pkl')
pipeline_saved

In [105]:
calc_predictions(pipeline_saved)

before familysize creation   (623, 9)
after familysize creation   (623, 10)
Fitting estimator:  LogisticRegression
before familysize creation   (268, 9)
after familysize creation   (268, 10)
Predicting with estimator:  LogisticRegression




Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.783582,0.738739,0.738739,0.815287,0.738739,0.777013


In [106]:
X.head()

Unnamed: 0,Pclass,Prefix,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,3,Braund,male,22.0,1,0,7.25,,S
1,1,Cumings,female,38.0,1,0,71.2833,C85,C
2,3,Heikkinen,female,26.0,0,0,7.925,,S
3,1,Futrelle,female,35.0,1,0,53.1,C123,S
4,3,Allen,male,35.0,0,0,8.05,,S


In [107]:
column_names = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']
new_row = [3, 'male', 23, 1, 0, 7, None, 'S']

X_new = pd.DataFrame(columns=column_names)
X_new.loc[len(X_new)] = new_row
X_new.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,3,male,23,1,0,7,,S


In [108]:
pipeline_saved.predict(X_new)

before familysize creation   (1, 8)
after familysize creation   (1, 9)
Predicting with estimator:  LogisticRegression




array([0], dtype=int64)

# Pipeline múltiples modelos

In [110]:
pipeline_num = Pipeline([
    ("outlier_remover", OutlierRemover()),
    ("imputer_mean", SimpleImputer(strategy='mean')),
    ("scaler", MinMaxScaler()),
    ("power", QuantileTransformer(output_distribution='normal'))
])
pipeline_cat = Pipeline([
    ("imputer_constant", SimpleImputer(strategy='constant', fill_value='X')),
    ("encoder", OneHotEncoder(drop='first', handle_unknown='ignore'))
])
preprocessor = ColumnTransformer([
    ("numeric", pipeline_num, ["Age", "Fare"]),
    ("categorical", pipeline_cat, ["Sex", "Cabin", "Embarked"])
])
models = [
    ('lor', LogisticRegression()),
    ('knn', KNeighborsClassifier()),
    ('rf', RandomForestClassifier()),
    ('gbm', GradientBoostingClassifier()),
]
for name, model in models:
    pipeline = make_pipeline(
        FamilySizeFeature(),
        preprocessor,
        model
    )
    df_results = calc_predictions(pipeline)
    print(name, df_results['f1'])
    print("=======")




lor 0    0.738739
Name: f1, dtype: float64
knn 0    0.689655
Name: f1, dtype: float64
rf 0    0.699029
Name: f1, dtype: float64
gbm 0    0.722222
Name: f1, dtype: float64




# Pipeline múltiples modelos + GridSearchCV

In [111]:
model_lor = LogisticRegression()
model_knn = KNeighborsClassifier()
model_rf = RandomForestClassifier()
model_gbm = GradientBoostingClassifier()

model_lor_params = {
    'classifier__C': [10 ** -2, 10 ** -1, 10 ** 0, 10 ** 1, 10 ** 2],
    'classifier__penalty': ['l1', 'l2', 'elasticnet'],
    'classifier__class_weight': [None, 'balanced', {0: 1, 1: 5, 2: 1}],
    'classifier': [model_lor]
}
model_knn_params = {
    'classifier__n_neighbors': [2, 5, 10, 25, 50],
    'classifier__metric': ['manhattan', 'chebyshev', 'minkowski'],
    'classifier__weights': ['uniform', 'distance'],
    'classifier': [model_knn]
}
model_rf_params = {
    'classifier__criterion': ['gini', 'entropy'],
    'classifier__n_estimators': [10, 50, 100, 250],
    'classifier__max_depth': [5, 10, 15, 25, 50],
    'classifier': [model_rf]
}
model_gbm_params = {
    'classifier__n_estimators': [10, 50, 100, 250],
    'classifier__max_depth': [5, 10, 15, 25, 50],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier': [model_gbm]
}

pipeline = Pipeline([
    ('family_size', FamilySizeFeature()),
    ('preprocessor', preprocessor),
    ('classifier', model_lor)
])

params = [
    model_lor_params,
    model_knn_params,
    model_rf_params,
    model_gbm_params
]

In [112]:
%%time
grid_model = GridSearchCV(pipeline, params, cv=3, n_jobs=4, scoring='f1').fit(X_train, y_train)

CPU times: total: 922 ms
Wall time: 24.1 s


120 fits failed out of a total of 525.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Python310\lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Python310\lib\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "c:\Python310\lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Python310\lib\s

In [113]:
best_model = grid_model.best_estimator_
best_model

In [114]:
calc_predictions(best_model)



Unnamed: 0,model_name,accuracy,precision,recall_sensitivity,recall_specificity,f1,auc
0,Pipeline,0.768657,0.775281,0.621622,0.872611,0.69,0.747117


In [115]:
grid_model.cv_results_

{'mean_fit_time': array([0.0172054 , 0.01603731, 0.01322691, 0.01372163, 0.01587264,
        0.01306287, 0.01322651, 0.01570582, 0.01272241, 0.01240357,
        0.01686549, 0.01339134, 0.01256649, 0.01587335, 0.01371988,
        0.01702809, 0.01868113, 0.01438411, 0.01488026, 0.01653345,
        0.0130593 , 0.01355608, 0.01967231, 0.01289622, 0.01223445,
        0.01735989, 0.01438483, 0.01487939, 0.01868343, 0.01355632,
        0.01256506, 0.02066811, 0.012568  , 0.0120701 , 0.02033623,
        0.01554108, 0.01206994, 0.02248367, 0.01306097, 0.01256696,
        0.02546207, 0.01571608, 0.01271598, 0.03042006, 0.01371161,
        0.0132285 , 0.01488002, 0.01339149, 0.01405358, 0.01421539,
        0.0132525 , 0.01355584, 0.01406447, 0.01354273, 0.01405374,
        0.01372242, 0.01322659, 0.01255465, 0.01355688, 0.01405358,
        0.01487986, 0.0165329 , 0.01554108, 0.01554251, 0.0140663 ,
        0.01305072, 0.01372258, 0.01388828, 0.01686891, 0.01420911,
        0.01336956, 0.01669931,

In [116]:
df_cv_results = pd.DataFrame(grid_model.cv_results_)
df_cv_results = df_cv_results.sort_values(by=["rank_test_score"])
df_cv_results[['rank_test_score', 'mean_test_score', 'std_test_score','params']]

Unnamed: 0,rank_test_score,mean_test_score,std_test_score,params
117,1,0.742334,0.024136,"{'classifier': GradientBoostingClassifier(), '..."
125,2,0.740813,0.016681,"{'classifier': GradientBoostingClassifier(), '..."
121,3,0.737914,0.014618,"{'classifier': GradientBoostingClassifier(), '..."
86,4,0.734808,0.035923,"{'classifier': RandomForestClassifier(), 'clas..."
31,5,0.733356,0.036255,"{'classifier': LogisticRegression(), 'classifi..."
...,...,...,...,...
32,136,,,"{'classifier': LogisticRegression(), 'classifi..."
30,136,,,"{'classifier': LogisticRegression(), 'classifi..."
29,136,,,"{'classifier': LogisticRegression(), 'classifi..."
41,136,,,"{'classifier': LogisticRegression(), 'classifi..."
