In [170]:
# 支持向量机-SVC分类

In [171]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [172]:
# 读取数据
iris = datasets.load_iris()
iris.data.shape

(150, 4)

In [173]:
# 数据集划分
x_train, x_test, y_train, y_test =  train_test_split(iris.data, iris.target, test_size=0.3)

# 标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

In [174]:
# SVM建模
estimator = SVC()
estimator.fit(x_train, y_train)

In [175]:
# 模型评估
# 方法1：直接对比真实值和预测值
y_predict = estimator.predict(x_test)
print("y_predict:\n", y_predict)
print("直接对比真实值和预测值：\n", y_test == y_predict)
# 方法2：计算准确率
score = estimator.score(x_test, y_test)
print("准确率为：\n", score)

y_predict:
 [1 2 0 2 0 1 0 1 0 1 1 0 0 2 1 2 2 2 2 2 1 2 0 0 0 0 0 1 0 1 1 1 2 1 2 0 0
 1 1 1 1 2 0 1 1]
直接对比真实值和预测值：
 [ True  True  True  True  True  True  True  True  True False  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True]
准确率为：
 0.9777777777777777


In [176]:
# 支持向量
print("支持向量:\n",estimator.support_vectors_)
# 支持向量的索引
print("支持向量的索引:\n",estimator.support_)
# 支持向量的个数
print("支持向量的个数:\n",estimator.n_support_)

支持向量:
 [[-0.53803469  0.82224749 -1.18459053 -1.34338333]
 [-0.15459368  3.20721947 -1.29981239 -1.08265274]
 [-0.9214757   0.58375029 -1.18459053 -0.95228745]
 [-1.04928937 -0.13174131 -1.24220146 -1.34338333]
 [-1.04928937  1.06074468 -1.24220146 -0.82192215]
 [-1.68835772 -1.8012217  -1.41503426 -1.21301804]
 [-0.15459368  1.77623628 -1.18459053 -1.21301804]
 [-1.81617139  0.34525309 -1.41503426 -1.34338333]
 [-0.53803469  0.82224749 -1.29981239 -1.08265274]
 [ 0.48447468 -2.0397189   0.42851561  0.35136551]
 [-0.41022102 -1.8012217   0.14046094  0.09063492]
 [-0.53803469 -0.13174131  0.42851561  0.35136551]
 [-0.9214757  -1.3242273  -0.43564839 -0.17009567]
 [ 1.12354303  0.10675589  0.37090468  0.22100022]
 [ 0.22884733 -0.84723291  0.77418121  0.48173081]
 [ 0.22884733  0.82224749  0.42851561  0.48173081]
 [ 0.61228835  0.58375029  0.54373748  0.48173081]
 [ 0.61228835 -1.8012217   0.37090468  0.09063492]
 [ 1.50698404  0.34525309  0.54373748  0.22100022]
 [-0.28240735 -0.1317413

### 网格搜索交叉验证

In [177]:
from sklearn.model_selection import GridSearchCV

In [178]:
# 定义组合参数
# “kernel='rbf'”:核函数（linear:线性，rbf:高斯，poly:多项式）
# "C":错误样本的惩罚参数
params = {
    "kernel":("linear", "rbg", "poly"),
    "C":[0.01, 0.1, 0.5, 1, 2, 10, 100]
}
# 网格搜索交叉验证拟合模型
estimator = GridSearchCV(estimator, param_grid=params, cv=10)
estimator.fit(x_train, y_train)

70 fits failed out of a total of 210.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "D:\Soft\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "D:\Soft\Python\Python310\lib\site-packages\sklearn\svm\_base.py", line 180, in fit
    self._validate_params()
  File "D:\Soft\Python\Python310\lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "D:\Soft\Python\Python310\lib\site-packages\sklearn\utils\_param_validation.py", line 97, in validate_parameter_constraints
    raise InvalidParameterErro

In [179]:
# 最佳参数
print("最佳参数：\n", estimator.best_params_)
# 最佳结果
print("最佳结果：\n", estimator.best_score_)
# 最佳估计器
print("最佳估计器:\n", estimator.best_estimator_)
# 交叉验证结果
print("交叉验证结果：\n", estimator.cv_results_)

最佳参数：
 {'C': 0.5, 'kernel': 'linear'}
最佳结果：
 0.9718181818181819
最佳估计器:
 SVC(C=0.5, kernel='linear')
交叉验证结果：
 {'mean_fit_time': array([9.99927521e-05, 2.00009346e-04, 2.00080872e-04, 4.00090218e-04,
       1.00064278e-04, 2.00033188e-04, 5.00106812e-04, 4.00114059e-04,
       6.00171089e-04, 4.00114059e-04, 0.00000000e+00, 4.00090218e-04,
       8.00156593e-04, 0.00000000e+00, 6.00099564e-04, 4.00090218e-04,
       2.00033188e-04, 0.00000000e+00, 2.00057030e-04, 4.00066376e-04,
       7.00187683e-04]), 'std_fit_time': array([0.00029998, 0.00040002, 0.00040016, 0.00049001, 0.00030019,
       0.00040007, 0.00050011, 0.00049004, 0.00049004, 0.00049004,
       0.        , 0.00049001, 0.00040008, 0.        , 0.00048998,
       0.00049001, 0.00040007, 0.        , 0.00040011, 0.00048998,
       0.00045838]), 'mean_score_time': array([8.00156593e-04, 0.00000000e+00, 5.00106812e-04, 3.00049782e-04,
       0.00000000e+00, 9.99927521e-05, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 4.00

### 支持向量机-SVR回归

In [180]:
import pandas as pd
from sklearn.svm import SVR

In [181]:
# 导入数据
boston = pd.read_csv("../data/boston/boston.csv")
boston

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PIRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [182]:
# 特征值
x = boston.drop(["MEDV"], axis=1)
x

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PIRATIO,B,LSTAT
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48


In [183]:
# 目标值
y = boston["MEDV"]
y

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: MEDV, Length: 506, dtype: float64

In [184]:
# 数据集划分
x_train, x_test, y_train, y_test = train_test_split(x, y)
# 标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

In [185]:
# SVM建模
estimator = SVR()
estimator.fit(x_train, y_train)

In [186]:
# 计算准确率
score = estimator.score(x_test, y_test)
print("准确率为：\n", score)

准确率为：
 0.6792703277167541
