In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

# 加载数据
data = load_iris()
X, y = data.data, data.target

# 划分训练测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 基础KNN模型
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
print("基础模型准确率:", knn.score(X_test, y_test))

In [None]:
KNN关键参数说明
参数	说明	常见取值
n_neighbors	考虑的最近邻数量	3-20
weights	投票权重	'uniform'(均等), 'distance'(距离加权)
p	距离度量	1(曼哈顿), 2(欧式)
metric	距离度量方法	'minkowski', 'euclidean', 'manhattan'
algorithm	计算算法	'auto', 'ball_tree', 'kd_tree', 'brute'


In [None]:
#网络调参
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': range(3, 21),
    'weights': ['uniform', 'distance'],
    'p': [1, 2],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

grid_search = GridSearchCV(
    KNeighborsClassifier(),
    param_grid,
    cv=5,  # 5折交叉验证
    scoring='accuracy',
    n_jobs=-1  # 使用所有CPU核心
)

grid_sarch.fit(X_train, y_train)

print("最佳参数:", grid_search.best_params_)
print("最佳交叉验证准确率:", grid_search.best_score_)

In [None]:
#随机

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_dist = {
    'n_neighbors': randint(3, 21),
    'weights': ['uniform', 'distance'],
    'p': [1, 2],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

random_search = RandomizedSearchCV(
    KNeighborsClassifier(),
    param_distributions=param_dist,
    n_iter=50,  # 随机尝试50组参数
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42
)

random_search.fit(X_train, y_train)

print("最佳随机搜索参数:", random_search.best_params_)
print("最佳随机搜索得分:", random_search.best_score_)

In [None]:
#交叉验证

from sklearn.model_selection import cross_val_score

knn = KNeighborsClassifier(n_neighbors=5)
cv_scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')

print("交叉验证准确率:", cv_scores)
print("平均准确率:", cv_scores.mean())

#处理不平衡 数据
from sklearn.model_selection import StratifiedKFold

stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(knn, X, y, cv=stratified_cv)

print("分层交叉验证结果:", cv_scores)

In [None]:
# 定义多个评分指标
scoring = {
    'accuracy': 'accuracy',
    'f1_macro': 'f1_macro',
    'precision': 'precision_macro',
    'recall': 'recall_macro'
}

grid_search = GridSearchCV(
    KNeighborsClassifier(),
    param_grid,
    cv=5,
    scoring=scoring,
    refit='f1_macro',  # 选择f1_macro作为最终优化指标
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# 查看所有指标结果
results = pd.DataFrame(grid_search.cv_results_)
print(results[['params', 'mean_test_accuracy', 'mean_test_f1_macro']])

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('scaler', StandardScaler()),  # 先标准化
    ('knn', KNeighborsClassifier())  # 再KNN
])

param_grid = {
    'knn__n_neighbors': range(3, 21),
    'knn__weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

pipeline = Pipeline([
    ('selector', SelectKBest(f_classif)),  # 特征选择
    ('knn', KNeighborsClassifier())
])

param_grid = {
    'selector__k': [2, 3, 4],  # 选择几个特征
    'knn__n_neighbors': range(3, 11)
}