# 实验二 分类算法

In [None]:
# 1. 加载数据集并探索数据
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# 将数据转换为 DataFrame 以便查看
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y
df['species'] = df['target'].map({i: target_names[i] for i in range(3)})
print(df.head())

# 2. 数据预处理（分割训练集和测试集）
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("训练集大小:", X_train.shape)
print("测试集大小:", X_test.shape)

# 3. 使用K近邻算法分类
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 创建模型并训练
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# 预测并评估
y_pred = knn.predict(X_test)
print("准确率:", accuracy_score(y_test, y_pred))
print("混淆矩阵:\n", confusion_matrix(y_test, y_pred))
print("分类报告:\n", classification_report(y_test, y_pred, target_names=target_names))

# 4. 使用支持向量机(SVM)分类
from sklearn.svm import SVC

svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print("准确率:", accuracy_score(y_test, y_pred))
# 5. 使用逻辑回归分类
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(multi_class='multinomial', solver='lbfgs',
                        max_iter=200)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print("准确率:", accuracy_score(y_test, y_pred))
print("分类报告:\n", classification_report(y_test, y_pred,
                                           target_names=target_names))

# 5. 进阶内容:结果可视化(可选)
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# 降维可视化
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.title('鸢尾花数据集 PCA 降维可视化')
plt.show()

# 手动实现逻辑回归(多分类)
import numpy as np


class ManualLogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.lr = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def fit(self, X, y):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))
        y_onehot = np.eye(n_classes)[y]

        # 初始化参数
        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)

        # 梯度下降
        for _ in range(self.epochs):
            z = np.dot(X, self.weights) + self.bias
            probs = self.softmax(z)
            error = probs - y_onehot

            # 计算梯度
            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error, axis=0)

            # 更新参数
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        probs = self.softmax(z)
        return np.argmax(probs, axis=1)


# 测试手动实现
manual_lr = ManualLogisticRegression(learning_rate=0.1, epochs=1000)
manual_lr.fit(X_train, y_train)
y_pred_manual = manual_lr.predict(X_test)
print("手动逻辑回归准确率:", accuracy_score(y_test, y_pred_manual))

from collections import Counter


class ManualKNN:
    def __init__(self, k=3):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = []
        for x in X:
            # 计算欧氏距离
            distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))
            # 取最近的k个样本的标签
            k_indices = np.argsort(distances)[:self.k]
            k_labels = self.y_train[k_indices]
            # 多数投票
            most_common = Counter(k_labels).most_common(1)
            y_pred.append(most_common[0][0])
        return np.array(y_pred)


# 测试手动实现
manual_knn = ManualKNN(k=3)
manual_knn.fit(X_train, y_train)
y_pred_manual = manual_knn.predict(X_test)
print("手动 KNN 准确率:", accuracy_score(y_test, y_pred_manual))
