In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import numpy as np

# Importation et traitement du dataset

In [None]:
dataset = load_breast_cancer()

In [None]:
print(dataset['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [None]:
X = dataset['data']
y = dataset['target']

# Normalisation des données

In [None]:
# 从sklearn库中导入标准化工具 StandardScaler
from sklearn.preprocessing import StandardScaler

In [None]:
# 创建一个 StandardScaler 对象，用于后续标准化处理
scaler = StandardScaler()

# 对特征数据 X 进行标准化处理，使每个特征的均值为0，标准差为1
# fit_transform 做了两件事：先计算每一列的均值和标准差（fit），然后进行转换（transform）
X = scaler.fit_transform(X)


# Split train test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Apprentissage avec les paramètres par défaut

In [None]:
# 从 sklearn.svm 模块导入支持向量机分类器（Support Vector Classifier）
from sklearn.svm import SVC

# 从 sklearn.ensemble 模块导入随机森林分类器（Random Forest Classifier）
from sklearn.ensemble import RandomForestClassifier

In [None]:
# 创建一个分类器对象，这里使用的是随机森林（如果你想用SVM，只需要取消上面一行注释）
# clf = SVC()  # 使用支持向量机分类器
clf = RandomForestClassifier()  # 使用随机森林分类器

In [None]:
# 使用训练数据来训练分类器模型
# X_train：训练集中的特征数据
# y_train：训练集中的目标标签（分类结果）
clf.fit(X_train, y_train)

In [None]:
# 输出模型在训练集上的准确率
# clf.score(X_train, y_train) 计算模型在训练数据上的得分（默认是准确率 accuracy）
# f-string 用于格式化输出，把得分插入到字符串中
print(f'Train accuracy: {clf.score(X_train, y_train)}')
# 输出模型在测试集上的准确率
# clf.score(X_test, y_test) 用于评估模型对未见数据的泛化能力（越高越好）
print(f'Test accuracy: {clf.score(X_test, y_test)}')

Train accuracy: 1.0
Test accuracy: 0.972027972027972
[CV 1/5] END criterion=gini, max_depth=60, max_features=auto, n_estimators=166;, score=nan total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=110, max_features=log2, n_estimators=166;, score=0.965 total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=50, max_features=auto, n_estimators=66;, score=nan total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=50, max_features=auto, n_estimators=66;, score=nan total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=90, max_features=auto, n_estimators=83;, score=nan total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=90, max_features=auto, n_estimators=83;, score=nan total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=90, max_features=auto, n_estimators=83;, score=nan total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=30, max_features=auto, n_estimators=66;, score=nan total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=30, max_features=auto,

[CV 2/5] END criterion=entropy, max_depth=110, max_features=log2, n_estimators=166;, score=0.941 total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=60, max_features=log2, n_estimators=133;, score=1.000 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=10, max_features=log2, n_estimators=50;, score=0.941 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=10, max_features=log2, n_estimators=50;, score=0.965 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=80, max_features=sqrt, n_estimators=133;, score=1.000 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=80, max_features=sqrt, n_estimators=133;, score=0.965 total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=70, max_features=log2, n_estimators=66;, score=0.965 total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=100, max_features=sqrt, n_estimators=83;, score=0.965 total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=100, max_features=sqrt, n_estimators=83;, scor

[CV 4/5] END criterion=gini, max_depth=60, max_features=auto, n_estimators=166;, score=nan total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=10, max_features=auto, n_estimators=66;, score=nan total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=10, max_features=auto, n_estimators=66;, score=nan total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=50, max_features=auto, n_estimators=150;, score=nan total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=60, max_features=log2, n_estimators=133;, score=0.953 total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=90, max_features=sqrt, n_estimators=166;, score=0.965 total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=20, max_features=sqrt, n_estimators=133;, score=0.953 total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=20, max_features=sqrt, n_estimators=133;, score=0.988 total time=   0.2s
[CV 2/5] END criterion=gini, max_depth=40, max_features=sqrt, n_estimators=66;, score=0.941 tota

# Optimisation avec grid search

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# 从 sklearn 中导入用于网格搜索交叉验证的工具
from sklearn.model_selection import GridSearchCV

# 随机森林模型的参数网格（param_grid_rf）—— 用于 GridSearchCV 的超参数搜索
param_grid_rf = {
    # 决策树的数量，越多模型越稳定，但训练时间更长
    'n_estimators': [10, 25, 50, 100, 200],

    # 每次分裂时考虑的最大特征数方式：
    # 'auto' = 所有特征（对于分类任务等同于 sqrt(n_features)）
    # 'sqrt' = 每次分裂时随机选择 sqrt(n_features) 个特征
    # 'log2' = 每次分裂时使用 log2(n_features) 个特征
    'max_features': ['auto', 'sqrt', 'log2'],

    # 决策树的最大深度，限制深度可以防止过拟合；None 表示无限制
    'max_depth' : [1, 3, 5, 7, None],

    # 用于分裂的标准：
    # 'gini' 表示基尼系数，'entropy' 表示信息增益
    'criterion' : ['gini', 'entropy']
}

In [None]:
# 支持向量机模型（SVM）的参数网格（param_grid_svm）—— 用于 GridSearchCV 超参数搜索
param_grid_svm = {
    # 惩罚系数 C：越大表示对误分类惩罚越重（更可能过拟合）
    'C': [0.1, 1, 10, 100, 1000],

    # gamma：核函数的参数，控制样本影响范围，越小越平滑
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],

    # 核函数类型：
    # 'linear' 表示线性核，适合线性可分；
    # 'rbf' 是径向基核，适合非线性问题
    'kernel': ['linear', 'rbf']
}

In [None]:
# 对随机森林进行网格搜索
grid_search = GridSearchCV(estimator=clf, scoring='accuracy', param_grid=param_grid_rf, cv= 5, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 150 candidates, totalling 750 fits
[CV 1/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=10;, score=nan total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=10;, score=nan total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=10;, score=nan total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=10;, score=nan total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=10;, score=nan total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=25;, score=nan total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=25;, score=nan total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators=25;, score=nan total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=1, max_features=auto, n_estimators

[CV 2/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=10;, score=0.976 total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=10;, score=0.965 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=10;, score=0.976 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=10;, score=0.882 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25;, score=0.953 total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25;, score=0.929 total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25;, score=0.918 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25;, score=1.000 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25;, score=0.882 total time=   0.0s
[CV 1/5] E

[CV 3/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=25;, score=0.941 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=25;, score=0.953 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=25;, score=0.894 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.941 total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=50;, score=1.000 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.906 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 2/5] 

[CV 3/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=25;, score=0.965 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=25;, score=0.988 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=25;, score=0.929 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.929 total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.918 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.988 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.906 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 2/5] 

[CV 2/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=50;, score=0.941 total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=50;, score=0.976 total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=50;, score=0.918 total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.953 total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.941 total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.988 total time=   0.1s
[CV 5/5] END criterion=gini, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.9

[CV 4/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=50;, score=0.894 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.907 total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.918 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=200;, score=0.907 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=200;, score=0

[CV 3/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=50;, score=0.953 total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=50;, score=0.965 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=50;, score=0.906 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.953 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.918 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.976 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.918 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=3, max_features=sqrt, n_estimators=200;, score=0.

[CV 3/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.941 total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.988 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=50;, score=0.918 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.988 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=200;, score=0.

[CV 2/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.929 total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.929 total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=50;, score=1.000 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=50;, score=0.906 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.941 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.941 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.976 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=100;, score=0.9

[CV 5/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=50;, score=0.906 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.965 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.953 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.988 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=100;, score=0.929 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=200;, score=0.965 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=None, max_features=sqrt, n_estimators=200;, score=0.929 total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=None, max_features=sqrt

250 fits failed out of a total of 750.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
250 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 96, in validate_param

In [None]:
#grid_search.best_params_ 返回最优的参数组合（字典形式）
grid_search.best_params_

{'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'log2',
 'n_estimators': 10}

In [None]:
# 返回使用最佳参数配置的模型
best_clf = grid_search.best_estimator_

In [None]:
print(f'Train accuracy: {best_clf.score(X_train, y_train)}')
print(f'Test accuracy: {best_clf.score(X_test, y_test)}')

Train accuracy: 1.0
Test accuracy: 0.972027972027972


# Optimisation avec random search

In [None]:
# 从 sklearn 中导入随机搜索交叉验证工具
from sklearn.model_selection import RandomizedSearchCV

In [None]:
param_rnd_rf = {
    # 决策树的数量（森林中有多少棵树）
    # 这里使用 np.linspace 生成从50到200之间均匀分布的10个整数
    'n_estimators': [int(x) for x in np.linspace(start=50, stop=200, num=10)],

    # 决策树的最大深度（限制树的生长防止过拟合）
    # 从10到110等间距取11个值，再加上None表示不限深度
    'max_depth': [int(x) for x in np.linspace(10, 110, num=11)] + [None],

    # 每次分裂节点时考虑的最大特征数
    # 'auto': 自动选择（通常等价于 sqrt(n_features)）
    # 'sqrt': 使用特征数量的平方根
    # 'log2': 使用特征数量的对数
    'max_features': ['auto', 'sqrt', 'log2'],

    # 分裂标准
    # 'gini': 基尼不纯度，速度快，常用默认
    # 'entropy': 信息增益，计算复杂度稍高
    'criterion': ['gini', 'entropy']
}

In [None]:
# 创建一个随机搜索对象，用于自动调参
random_search = RandomizedSearchCV
 (clf, param_distributions=param_rnd_rf, n_iter=120, cv=5, scoring='accuracy', n_jobs=-1, verbose=3)

In [None]:
# 使用训练集数据来进行随机搜索训练，开始尝试参数组合
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


260 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
152 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/Users/cussat/.venvs/ML/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 96, in validate_param

In [None]:
# 获取参数最优的模型（自动选择表现最好的组合）
best_model = random_search.best_estimator_
# 输出找到的最优参数组合
random_search.best_params_

{'n_estimators': 183,
 'max_features': 'log2',
 'max_depth': 30,
 'criterion': 'entropy'}

In [None]:
print(f'Train accuracy: {best_model.score(X_train, y_train)}')
print(f'Test accuracy: {best_model.score(X_test, y_test)}')

Train accuracy: 1.0
Test accuracy: 0.9790209790209791
