In [22]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import MinMaxScaler

In [23]:
# 获取数据
iris_data = load_iris()

In [24]:
# 数据描述
print(iris_data.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [25]:
# 归一化
mm = MinMaxScaler()
x_data = mm.fit_transform(iris_data.data)

# 权重处理(尝试)
# x_data[:,[0]] = x_data[:,[0]]/(1-0.7826)
# x_data[:,[1]] = x_data[:,[0]]/(1+0.4194)
# x_data[:,[2]] = x_data[:,[0]]/(1-0.9490)
# x_data[:,[3]] = x_data[:,[0]]/(1-0.9565)

In [26]:
# 数据划分
x_train, x_test, y_train, y_test = train_test_split(
    x_data, iris_data.target, test_size=0.25)

In [37]:
x_train[:3,:]

array([[0.66666667, 0.41666667, 0.71186441, 0.91666667],
       [0.22222222, 0.70833333, 0.08474576, 0.125     ],
       [0.30555556, 0.58333333, 0.11864407, 0.04166667]])

In [27]:
# 估计器
knn = KNeighborsClassifier()

In [28]:
params = {
    "n_neighbors":[3,5,10]
}

In [29]:
# 获得网格搜索对象
gc = GridSearchCV(knn,param_grid=params,cv=3)

In [30]:
# 数据训练
gc.fit(x_train,y_train)

GridSearchCV(cv=3, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 5, 10]})

In [31]:
# 测试机准确率
gc.score(x_test,y_test)

0.9736842105263158

In [32]:
# 交叉验证中最好的结果
gc.best_score_

0.9554291133238503

In [33]:
# 最好的超参数
gc.best_params_

{'n_neighbors': 5}

In [34]:
# 每个超参数每次交叉验证的结果
gc.cv_results_

{'mean_fit_time': array([0.00100048, 0.00133348, 0.00099675]),
 'std_fit_time': array([9.98958356e-07, 4.69348758e-04, 8.12740957e-04]),
 'mean_score_time': array([0.00566785, 0.00533255, 0.00500162]),
 'std_score_time': array([0.00094263, 0.00046963, 0.00141473]),
 'param_n_neighbors': masked_array(data=[3, 5, 10],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 3}, {'n_neighbors': 5}, {'n_neighbors': 10}],
 'split0_test_score': array([0.92105263, 0.94736842, 0.92105263]),
 'split1_test_score': array([0.97297297, 0.97297297, 0.97297297]),
 'split2_test_score': array([0.94594595, 0.94594595, 0.94594595]),
 'mean_test_score': array([0.94665718, 0.95542911, 0.94665718]),
 'std_test_score': array([0.02120236, 0.01241897, 0.02120236]),
 'rank_test_score': array([2, 1, 2])}