In [2]:
import warnings
warnings.filterwarnings("ignore")

from sklearn._config import set_config
set_config(print_changed_only=False)

In [3]:
from sklearn.datasets import load_breast_cancer

cancer = load_breast_cancer()

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler = StandardScaler()
data_scaled = scaler.fit_transform(cancer.data)

train_X, test_X, train_y, test_y = train_test_split(data_scaled, cancer.target, test_size=0.3, random_state=42)

train_X.shape, test_X.shape

((398, 30), (171, 30))

In [6]:
from sklearn.linear_model import LogisticRegression

lr_clf = LogisticRegression()
lr_clf

In [7]:
from sklearn.metrics import accuracy_score, roc_auc_score

lr_clf.fit(train_X, train_y)
predicts = lr_clf.predict(test_X)
probas = lr_clf.predict_proba(test_X)[:, 1]

print('accuracy: {0:.3f}, roc_auc:{1:.3f}'.format(accuracy_score(test_y, predicts),
                                                  roc_auc_score(test_y, probas)))

accuracy: 0.982, roc_auc:0.998


### solver에 따른 결과 비교

In [8]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']

for solver in solvers:
    lr_clf = LogisticRegression(solver=solver, max_iter=600)
    lr_clf.fit(train_X, train_y)
    
    predicts = lr_clf.predict(test_X)
    probas = lr_clf.predict_proba(test_X)[:, 1]
    
    print('solver:{0}, accuracy: {1:.3f}, roc_auc:{2:.3f}'.format(solver,
                                                                  accuracy_score(test_y, predicts),
                                                                  roc_auc_score(test_y, probas)))

solver:lbfgs, accuracy: 0.982, roc_auc:0.998
solver:liblinear, accuracy: 0.982, roc_auc:0.998
solver:newton-cg, accuracy: 0.982, roc_auc:0.998
solver:sag, accuracy: 0.982, roc_auc:0.998
solver:saga, accuracy: 0.982, roc_auc:0.998


### GridSearchCV 적용

In [9]:
from sklearn.model_selection import GridSearchCV

params={'solver':['liblinear', 'lbfgs'],
        'penalty':['l2', 'l1'],
        'C':[0.01, 0.1, 1, 5, 10]}

lr_clf = LogisticRegression()

grid_clf = GridSearchCV(lr_clf, param_grid=params, scoring='accuracy', cv=3 )
grid_clf.fit(data_scaled, cancer.target)

print('최적 하이퍼 파라미터:{0}, 최적 평균 정확도:{1:.3f}'.format(grid_clf.best_params_,
                                                  grid_clf.best_score_))

최적 하이퍼 파라미터:{'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}, 최적 평균 정확도:0.979
