In [4]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

dataset = load_breast_cancer()

scaler = StandardScaler()
dataset_scaled = scaler.fit_transform(dataset.data)

X_train, X_test, y_train, y_test = train_test_split(dataset_scaled, dataset.target, test_size=0.3, random_state=0)


In [5]:
from sklearn.metrics import accuracy_score, roc_auc_score

lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
lr_preds = lr_clf.predict(X_test)
lr_preds_proba = lr_clf.predict_log_proba(X_test)[:,1]

print(f'accuracy : {accuracy_score(y_test, lr_preds)}, roc_auc : {roc_auc_score(y_test, lr_preds)} ')

accuracy : 0.9766081871345029, roc_auc : 0.9715608465608465 


In [6]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']

for solver in solvers:
    lr_clf = LogisticRegression(solver=solver, max_iter=600)
    lr_clf.fit(X_train, y_train)
    lr_preds = lr_clf.predict(X_test)
    lr_preds_proba = lr_clf.predict_log_proba(X_test)[:,1]

    print(f' solver {solver} :: accuracy : {accuracy_score(y_test, lr_preds)}, roc_auc : {roc_auc_score(y_test, lr_preds)} ')

 solver lbfgs :: accuracy : 0.9766081871345029, roc_auc : 0.9715608465608465 
 solver liblinear :: accuracy : 0.9824561403508771, roc_auc : 0.9794973544973544 
 solver newton-cg :: accuracy : 0.9766081871345029, roc_auc : 0.9715608465608465 
 solver sag :: accuracy : 0.9824561403508771, roc_auc : 0.9794973544973544 
 solver saga :: accuracy : 0.9824561403508771, roc_auc : 0.9794973544973544 


liblinear, saga : l1, l2
lbfgs, newton-cg, sag : l2

C: alpha 역수 > 작을 수록 규제가 크다

In [26]:

from sklearn.model_selection import GridSearchCV

# params = {
#     'solver': ['liblinear', 'lbfgs'],
#     'penalty': ['l2', 'l1'],
#     'C' : [0.01, 0.1, 1, 5, 10, ]
# }
params = [
    {'solver': ['liblinear'], 'penalty': ['l1', 'l2'], 'C': [0.01, 0.1, 1, 5, 10, 100]},
    {'solver': ['lbfgs', 'newton-cg'], 'penalty': ['l2'], 'C': [0.1, 1, 5, 10, 100]}
]
lr_clf = LogisticRegression(max_iter=1000)

grid_clf = GridSearchCV(lr_clf, param_grid=params, scoring='accuracy', cv=3)
grid_clf.fit(dataset_scaled, dataset.target)
print(f'Best parameter : {grid_clf.best_params_}, Best_accuracy {grid_clf.best_score_}')

Best parameter : {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}, Best_accuracy 0.9789102385593614


Best parameter : {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}, Best_accuracy 0.9648565859092174
Best parameter : {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}, Best_accuracy 0.9789102385593614

accuracy : 0.9532163742690059, roc_auc : 0.9497354497354497 
accuracy : 0.9707602339181286, roc_auc : 0.9636243386243386 

In [27]:
best_param = grid_clf.best_params_

best_lr_clf = LogisticRegression(**best_param)
best_lr_clf.fit(X_train, y_train)

lr_preds = best_lr_clf.predict(X_test)
lr_preds_proba = best_lr_clf.predict_log_proba(X_test)[:,1]

print(f'accuracy : {accuracy_score(y_test, lr_preds)}, roc_auc : {roc_auc_score(y_test, lr_preds)} ')

accuracy : 0.9707602339181286, roc_auc : 0.9636243386243386 
