### Setup

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

# Load the data set
cancer = load_breast_cancer()

# Split the data into training and testing sets
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Create distributions to draw hyperparameters from
distributions = {'penalty': ['l1', 'l2'], 'C': uniform(loc=0, scale=100)}

# The logistic regression model
lr = LogisticRegression(solver = 'liblinear', max_iter = 1000)

# Create a RandomizedSearchCV model
clf = RandomizedSearchCV(lr, distributions, n_iter=8)

### 1. Fit `clf` to training data and get best hyperparameters

In [2]:
clf.fit(X_train, y_train)
best_model = clf.best_estimator_
print(best_model)
print(clf.best_params_)

LogisticRegression(C=27.05661259534914, max_iter=1000, solver='liblinear')
{'C': 27.05661259534914, 'penalty': 'l2'}


### 2. Calculate training and test scores of the best estimator

In [3]:
best_score = clf.best_score_
test_score = clf.score(X_test, y_test)

print(best_score)
print(test_score)

0.9718194254445965
0.951048951048951


### 3. Viewing random search Results

In [4]:
hyperparameter_values = pd.DataFrame(clf.cv_results_['params'])
randomsearch_scores = pd.DataFrame(clf.cv_results_['mean_test_score'], columns=['score'])

df = pd.concat([hyperparameter_values, randomsearch_scores], axis = 1)
print(df)

           C penalty     score
0  10.810261      l1  0.964761
1   7.251566      l2  0.960055
2  98.977496      l1  0.967114
3  45.565348      l2  0.969466
4  50.352482      l1  0.969466
5   8.662697      l1  0.962408
6  27.056613      l2  0.971819
7   2.168449      l2  0.957729
