# 导入必要的库

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# 加载数据集

In [2]:
# 加载 sklearn 自带的乳腺癌（分类）数据集
X, y = load_breast_cancer(return_X_y=True)

# 划分训练集和测试集

In [4]:
# 以指定比例将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    train_size=0.875, test_size=0.125, random_state=188
)

# 创建逻辑斯蒂回归模型并训练

In [5]:
# 使用 lr 类，初始化模型
clf = LogisticRegression(
    penalty="l2", C=1.0, random_state=None, solver="lbfgs", max_iter=3000,
    multi_class='ovr', verbose=0,
)

# 使用训练数据来学习（拟合）
clf.fit(X_train, y_train)



# 使用测试数据进行预测

In [6]:
# 使用测试数据来预测，返回值预测分类数据
y_pred = clf.predict(X_test)

# 评估模型性能

In [7]:
# 打印主要分类指标的文本报告
print('--- report ---')
print(classification_report(y_test, y_pred))

# 打印模型的参数
print('--- params ---')
print(clf.coef_, clf.intercept_)

# 打印准确率
print('--- accuracy ---')
print(accuracy_score(y_test, y_pred))

--- report ---
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        27
           1       0.98      0.98      0.98        45

    accuracy                           0.97        72
   macro avg       0.97      0.97      0.97        72
weighted avg       0.97      0.97      0.97        72

--- params ---
[[ 0.90397379  0.18532044 -0.32680134  0.02707765 -0.16564613 -0.20284255
  -0.54220675 -0.28262699 -0.24000995 -0.0279315  -0.0757065   1.17803557
   0.09023053 -0.10686037 -0.02446845  0.06465079 -0.04648059 -0.03565188
  -0.0336962   0.0157275   0.22345861 -0.46786732 -0.07055624 -0.0166465
  -0.33670547 -0.68736402 -1.51344955 -0.57629151 -0.70834876 -0.07934683]] [29.64778897]
--- accuracy ---
0.9722222222222222
