In [5]:
# 导入库

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

In [7]:
# 读取数据

In [8]:
file_path = "letter-recognition.data"
columns = ['letter', 'x-box', 'y-box', 'width', 'height', 'onpix', 'x-bar', 'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege', 'xegvy', 'y-ege', 'yegvx']
data = pd.read_csv(file_path, header=None, names=columns)

In [9]:
data['target'] = np.where((data['letter'] == 'E') | (data['letter'] == 'F'), 1, 0)


In [10]:
data['target'] = np.where((data['letter'] == 'E') | (data['letter'] == 'F'), 1, 0)

In [11]:
X = data.drop(['letter', 'target'], axis=1)
y = data['target']

In [12]:
# 划分训练集和测试集

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [14]:
# 建立模型

In [15]:
# 定义要尝试的C值和伽马值
C_values = [1, 5, 10]
gamma_values = [0.01, 0.05, 0.1]

In [16]:
best_accuracy = 0
best_conf_matrix = None
best_C = None
best_gamma = None

In [17]:
# 循环尝试不同的C值和伽马值

In [18]:
for C in C_values:
    for gamma in gamma_values:
        # 建立SVM模型
        svm = SVC(C=C, gamma=gamma, kernel='rbf', random_state=1234)
        # 模型拟合
        svm.fit(X_train, y_train)
        # 模型在测试集上的预测
        y_pred = svm.predict(X_test)
        # 模型评估
        conf_matrix = confusion_matrix(y_test, y_pred)
        accuracy = np.sum(np.diag(conf_matrix)) / np.sum(conf_matrix)
        # 打印模型的准确率
        print('C={0}, gamma={1}, accuracy={2}'.format(C, gamma, accuracy))
        # 保存最好的模型
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_conf_matrix = conf_matrix
            best_C = C
            best_gamma = gamma

C=1, gamma=0.01, accuracy=0.9685
C=1, gamma=0.05, accuracy=0.9908333333333333
C=1, gamma=0.1, accuracy=0.9901666666666666
C=5, gamma=0.01, accuracy=0.985
C=5, gamma=0.05, accuracy=0.9936666666666667
C=5, gamma=0.1, accuracy=0.9923333333333333
C=10, gamma=0.01, accuracy=0.9873333333333333
C=10, gamma=0.05, accuracy=0.9933333333333333
C=10, gamma=0.1, accuracy=0.9921666666666666


In [19]:
# 打印最好的模型

In [20]:
print('最好的模型的准确率为：{0}'.format(best_accuracy))

最好的模型的准确率为：0.9936666666666667


In [21]:
# 打印最好的模型的混淆矩阵

In [22]:
print('最好的模型的混淆矩阵为：\n{0}'.format(best_conf_matrix))

最好的模型的混淆矩阵为：
[[5525   12]
 [  26  437]]


In [23]:
# 模型评估