# Hyper Parameter Tuning

In the last section, we focused on cleaning data in order to achieve our first decent prediction. Now we are going to focus on hyper parameter tuning in order to improve our model's score even more (hopefully).

In [104]:
from IPython.utils import io
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [123]:
with io.capture_output() as captured:
    %run log_reg_00.ipynb 

In [124]:
# Display the parameters for our logistic regression estimator:
h_params = classifier_os.get_params()
h_params

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [153]:
p_grid = {
    'C': np.logspace(-3, 3, 7),
    'fit_intercept': [1, 0],
    'max_iter': [1000]
}

In [154]:
clf = model_selection.GridSearchCV(classifier_os, p_grid, scoring='f1')

In [155]:
clf.fit(x_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03]),
                         'fit_intercept': [1, 0], 'max_iter': [1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='f1', verbose=0)

In [156]:
clf.best_params_, clf.best_score_

({'C': 10.0, 'fit_intercept': 1, 'max_iter': 1000}, 0.8062774992610986)

In [159]:
pd.DataFrame(best_model.cv_results_).sort_values(by='mean_test_score', ascending=False).head(3)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.010631,0.00282,0.0014,0.000789,10,l1,"{'C': 10.0, 'penalty': 'l1'}",0.836364,0.787879,0.824242,0.792683,0.780488,0.804331,0.021897,1
10,0.008469,0.00542,0.001009,1.4e-05,100,l1,"{'C': 100.0, 'penalty': 'l1'}",0.836364,0.787879,0.824242,0.792683,0.780488,0.804331,0.021897,1
11,0.00321,0.000403,0.001195,0.000402,100,l2,"{'C': 100.0, 'penalty': 'l2'}",0.836364,0.787879,0.824242,0.792683,0.780488,0.804331,0.021897,1


In [160]:
y_pred = clf.predict(x_test)

In [161]:
def pprint(web, level):
    for k,v in web.items():
        if isinstance(v, dict):
            print('\t'*level, f'{k}: ')
            level += 1
            pprint(v, level)
            level -= 1
        else:
            print('\t'*level, k, ": ", v)

In [162]:
results = metrics.classification_report(y_pred, y_test, target_names=['died', 'lived'], output_dict=True)
pprint(results, 0)

 died: 
	 precision :  0.8344827586206897
	 recall :  0.8287671232876712
	 f1-score :  0.8316151202749139
	 support :  146
 lived: 
	 precision :  0.8076923076923077
	 recall :  0.813953488372093
	 f1-score :  0.8108108108108109
	 support :  129
 accuracy :  0.8218181818181818
 macro avg: 
	 precision :  0.8210875331564986
	 recall :  0.8213603058298822
	 f1-score :  0.8212129655428624
	 support :  275
 weighted avg: 
	 precision :  0.8219156016397396
	 recall :  0.8218181818181818
	 f1-score :  0.8218560078353893
	 support :  275


In [163]:
y_pred2 = classifier_os.predict(x_test)

In [164]:
pprint(metrics.classification_report(y_pred2, y_test, output_dict=True), 0)

 0: 
	 precision :  0.8344827586206897
	 recall :  0.8402777777777778
	 f1-score :  0.8373702422145328
	 support :  144
 1: 
	 precision :  0.823076923076923
	 recall :  0.816793893129771
	 f1-score :  0.8199233716475096
	 support :  131
 accuracy :  0.8290909090909091
 macro avg: 
	 precision :  0.8287798408488063
	 recall :  0.8285358354537744
	 f1-score :  0.8286468069310212
	 support :  275
 weighted avg: 
	 precision :  0.8290494333252953
	 recall :  0.8290909090909091
	 f1-score :  0.82905918750806
	 support :  275
