In [1]:
import pandas as pd
target = pd.read_csv('data/training_set_labels.csv')
data = pd.read_csv('data/training_set_features.csv')
data = data.merge(target, on="respondent_id").drop(columns='respondent_id')

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import plot_confusion_matrix, recall_score, accuracy_score, precision_score, f1_score, roc_auc_score
from sklearn import svm

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImPipeline

In [3]:
def grab_numeric(df):
    return df.select_dtypes(['float64', 'int64'])

GrabNumeric = FunctionTransformer(grab_numeric)

In [4]:
numeric_transformer = Pipeline(
    steps=[('grab_num', GrabNumeric), 
           ('num_impute', SimpleImputer(strategy='median', add_indicator=True)), 
           ('scaler', StandardScaler())])

categorical_transformer = Pipeline(
    steps=[('cat_impute', SimpleImputer(strategy='most_frequent')),
           ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'))])

num_features = list(range(0, 21)) + [31, 32]
cat_features = list(range(21, 31)) + [33, 34]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_features),
        ("cat", categorical_transformer, cat_features)], remainder='passthrough')

In [5]:
models = {'Baseline':
              {'regressor': LogisticRegression(random_state=20220124, max_iter=1000),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran' : None
              },
          'DecisionTree':
              {'regressor': DecisionTreeClassifier(random_state=20211122),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran': None
              },
          'RandomForest':
              {'regressor': RandomForestClassifier(random_state=10151997),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran': None
              },
          'GradientBoost':
              {'regressor': GradientBoostingClassifier(random_state=20220125),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran': None
              },  
          'AdaBoost':
              {'regressor': AdaBoostClassifier(random_state=20220126),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran': None
              },  
          'SVM':
              {'regressor': svm.SVC(random_state=20220127, probability=True),
               'preprocessor': preprocessor,
               'output': None,
               'fit_regressor': None,
               'time_ran': None
              },  
         }

In [6]:
def data_split(data, target):
    X = data.drop(columns=target)
    y = data[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=10151997)
    return X_train, X_test, y_train, y_test

In [7]:
def run_models(models, data, target):
    """
    Takes in dict of models to run, df of dataset, string name of target
    Prints every model in dict, its accuracy, precision, auc scores, and time it took to ran 
    """
    X_train, X_test, y_train, y_test = data_split(data, target)
    for name, model in models.items():
        print(name, 'Model:')
        X_train_processed = model['preprocessor'].fit_transform(X_train)
        X_test_processed = model['preprocessor'].transform(X_test)
        model['fit_regressor'] = model['regressor'].fit(X_train_processed, y_train)
        model['output'] = [model['fit_regressor'].score(X_test_processed, y_test), 
                           precision_score(y_test, model['regressor'].predict(X_test_processed)), 
                           roc_auc_score(y_test, model['regressor'].predict_proba(X_test_processed)[:,1])]
        model['time_ran'] = time.asctime()
        print(model, '\n')

In [8]:
run_models(models, data, 'h1n1_vaccine')

Baseline Model:
{'regressor': LogisticRegression(max_iter=1000, random_state=20220124), 'preprocessor': ColumnTransformer(remainder='passthrough',
                  transformers=[('num',
                                 Pipeline(steps=[('grab_num',
                                                  FunctionTransformer(func=<function grab_numeric at 0x7f8c28a4f1f0>)),
                                                 ('num_impute',
                                                  SimpleImputer(add_indicator=True,
                                                                strategy='median')),
                                                 ('scaler', StandardScaler())]),
                                 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                  14, 15, 16, 17, 18, 19, 20, 31, 32]),
                                ('cat',
                                 Pipeline(steps=[('cat_impute',
                                                  SimpleImput

In [9]:
def run_gridsearch(models, params, data, target):
    X_train, X_test, y_train, y_test = data_split(data, target)
    for model in models:
        if model in params.keys():
            print(model, 'Grid Search:')
            pipe = Pipeline(steps=[('col_transformer', models[model]['preprocessor']), 
                             ('regressor', models[model]['regressor'])])
            grid = params[model]
            gridsearch = GridSearchCV(estimator=pipe, param_grid=grid, scoring='precision', cv=5)
            gridsearch.fit(X_train, y_train)
            print(f'Cross validation mean: {gridsearch.cv_results_["mean_test_score"]}')
            print(f'Test precision: {gridsearch.best_score_ :.2%}')
            print(f'Optimal prameters: {gridsearch.best_params_}')

In [10]:
params = {'DecisionTree': [{'regressor__max_depth': [None, 2, 6, 10], 
         'regressor__min_samples_split': [5, 10]}]}

In [11]:
run_gridsearch(models, params, data, 'h1n1_vaccine')

DecisionTree Grid Search:
Cross validation mean: [0.55380876 0.58115068 0.68428017 0.68428017 0.70597741 0.70597741
 0.66124408 0.66017925]
Test precision: 70.60%
Optimal prameters: {'regressor__max_depth': 6, 'regressor__min_samples_split': 5}


In [12]:
X = data.drop(columns='h1n1_vaccine')
y = data['h1n1_vaccine']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=10151997)

In [13]:
sm = SMOTE(sampling_strategy='auto', random_state=42)

In [14]:
sm2 = SMOTE(sampling_strategy=0.8, random_state=42)

In [15]:
X_train_clean = X_train.select_dtypes('float64').dropna()
y_train_clean = y_train[X_train_clean.index]

In [16]:
X_clean_resmp, y_clean_resmp = sm.fit_resample(X_train_clean, y_train_clean)

y_clean_resmp.value_counts()

1    7096
0    7096
Name: h1n1_vaccine, dtype: int64

In [17]:
X_clean_resmp2, y_clean_resmp2 = sm2.fit_resample(X_train_clean, y_train_clean)

y_clean_resmp2.value_counts()

0    7096
1    5676
Name: h1n1_vaccine, dtype: int64

In [18]:
imb_pipe = ImPipeline(steps=[('ct', ColumnTransformer),
                             ('sm', SMOTE(random_state=42)),
                            ('rfc', RandomForestClassifier(random_state=42))])

In [19]:
parameters = {'rfc__criterion': ['gini', 'entropy'],
          'rfc__min_samples_leaf': [1, 5, 10],
          'sm__k_neighbors': [3, 5, 9]}


In [20]:
RF_clf=RandomForestClassifier(random_state=42)

params = {'n_estimators':[10, 30, 100],
          'criterion':['gini', 'entropy'],
          'max_depth':[2, 6, 10],
          'min_samples_split':[5, 10],
          'min_samples_leaf':[3, 6]}

In [21]:
gs = GridSearchCV(estimator=RF_clf,
                 param_grid=params,
                 cv=10, scoring='roc_auc')

In [22]:
gs.fit(X_train_clean,y_train_clean)
print(gs.best_params_) 
print(gs.best_score_)

{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 3, 'min_samples_split': 10, 'n_estimators': 100}
0.8530985864834303


In [23]:
GB_clf=GradientBoostingClassifier(random_state=42)

params = {'loss': ['deviance', 'exponential'],
          'n_estimators':[10, 30, 100],
          'criterion':['friedman_mse', 'squared_error'],
          'max_depth':[2, 6, 10],
          'min_samples_split':[5, 10],
          'min_samples_leaf':[3, 6]}

GB_GS = GridSearchCV(estimator=GB_clf,
                 param_grid=params,
                 cv=10, scoring='roc_auc')

In [24]:
GB_GS.fit(X_train_clean,y_train_clean)
print(GB_GS.best_params_) 
print(GB_GS.best_score_)

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

Traceback (most recent call last):
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 498, in fit
    n_stages = self._fit_stages(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 555, in _fit_stages
    raw_predictions = self._fit_stage(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/ensemble/_gb.py", line 211, in _fit_stage
    tree.fit(X, residual, sample_weight=sample_weight,
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 1242, in fit
    super().fit(
  File "/Users/jusohn2/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 3

{'criterion': 'friedman_mse', 'loss': 'exponential', 'max_depth': 2, 'min_samples_leaf': 3, 'min_samples_split': 5, 'n_estimators': 100}
0.8539226715741186


In [25]:
DT_clf=DecisionTreeClassifier(random_state=42)

params = {'criterion':['gini', 'entropy'],
          'splitter':['best', 'random'],
          'max_depth':[2, 6, 10],
          'min_samples_split':[5, 10],
          'min_samples_leaf':[3, 6]}

DT_GS = GridSearchCV(estimator=DT_clf,
                 param_grid=params,
                 cv=10, scoring='roc_auc')



In [26]:
DT_GS.fit(X_train_clean,y_train_clean)
print(DT_GS.best_params_) 
print(DT_GS.best_score_)

{'criterion': 'entropy', 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 10, 'splitter': 'best'}
0.832610979919054
