In [466]:
%matplotlib inline

import numpy as np
from sklearn.svm import SVC
from matplotlib import cm
import matplotlib.pyplot as plt
plt.style.use(plt.style.available[0])  #可以通过下标进行选择适合自己的画图样式
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_classification

###### 生成均值为 mu，标准偏差为 sigma, 元素个数 size(row,column), 服从正态分布的二维数组
mu, sigma, size = -10, 100, (100, 2)  # mean and standard deviation
X = np.random.normal(mu, sigma, size)

y = np.array(X[:, 1] - X[:, 0])  # 生成因变量Y
y[y > 0] = 1
y[y < 0] = -1

In [467]:
# 两个信息特性，每个类一个集群
X, y = make_classification(n_samples=100,n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1, flip_y=0.5)


In [468]:
print('X= ', X[:3])
print('y= ', y[:5])
print(X.shape, X.ndim)
print(y.shape, y.ndim)

X=  [[ 0.99404188 -0.93638321]
 [ 2.7496529   0.46094669]
 [ 1.45311037  0.85888639]]
y=  [1 1 1 0 0]
(1000, 2) 2
(1000,) 1


In [469]:
#将数据划分为训练集和验证集
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [470]:
###### 混淆测试集目标类别的 指定概率50%的错误
err = y_train
for i in range(int(len(err) * 0.5)):
    index = int(np.random.randint(len(err)))
    err[index] = 0 if err[index] == 1 else 1

In [471]:
# 查看测试集是否与未混淆后不同
# print(np.sum(y_train>0) + np.sum(y_test>0), np.sum(y > 0))

In [472]:
# 3. 特征工程（标准化）
from sklearn.preprocessing import StandardScaler

standardScaler = StandardScaler()

# 测试集 计算平均值和标准偏差 缩放特征集
X_train = standardScaler.fit_transform(X_train)
X_test = standardScaler.transform(X_test)
# X = standardScaler.fit_transform(X)


In [473]:
# Fit regression model
classifiers = [
    SVC(kernel="rbf", C=10, gamma=1),
    SVC(kernel="linear", C=10, gamma="auto"),
]

names = [
    "RBF SVM",
    "Linear SVM",
]
svc_rbf = SVC(kernel="rbf", C=10, gamma=1)
svc_lin = SVC(kernel="linear", C=10, gamma="auto")
svc_poly = SVC(kernel="poly", C=10, gamma="auto", degree=3, coef0=1)

In [474]:
svcs = [svc_rbf, svc_lin]
kernel_label = ["rbf", "linear"]
for ix, svc in enumerate(svcs):
    performance = svc.fit(X_train, y_train.ravel())
    # 获取预测值
    y_test_pred = performance.predict(X_test)
    # 显示估计器
    print(f'ix=%s, svcs=%s' %(ix, svcs[ix]))
    # 获取这个估计器的参数
    print(f'估计器的参数: %s' %(svc.get_params()))
    # https://blog.csdn.net/gracejpw/article/details/101546293
    # 返回预测的决定系数R^2
    # R^2越接近于1，模型的拟合优度越高。
    print(f'训练集R2评分: %s ' % (performance.score(X_train, y_train)))
    print(f'测试集R2评分: %s ' % (svc.score(X_test, y_test)))

ix=0, svcs=SVC(C=10, gamma=1)
估计器的参数: {'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
训练集R2评分: 0.58 
测试集R2评分: 0.748 
ix=1, svcs=SVC(C=10, gamma='auto', kernel='linear')
估计器的参数: {'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'auto', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
训练集R2评分: 0.5653333333333334 
测试集R2评分: 0.768 


In [475]:
"""

print(X_train.shape,X_test.shape)
XX = np.vstack((X_train,X_test))  #垂直拼接数组
print(XX.shape)

# figure number
fignum = 1

# fit the model
# 参考 https://blog.csdn.net/qq_43043256/article/details/104259061
# for name, penalty in (("unreg", 1), ("reg", 0.05)):
# for name, penalty in (("unreg", 1), ):
for name, clf in zip(names, classifiers):

    clf.fit(X, y)
    # get the separating hyperplane #获取分离超平面
    w = clf.coef_[0]  # 获取w

    # 根据超平面 yy= -w0/w1*x1-b1/w1
    a = -w[0] / w[1]  #斜率
    xx = np.linspace(-5, 5)  #公式中的x1
    # 我们得到截距b1和w1后，就可以求出所需要的公式
    # clf.intercept_[0]  #用来获得截距b1(这里共有两个值，分别为到x和到y的)
    yy = a * xx - (clf.intercept_[0]) / w[1]  #超平面

    # 绘制通过的分离超平面的平行线
    # 支持向量(从超平面方向上的边距)
    # 垂直于超平面)。竖直方向上是sqrt(1+a^2)
    # 对于线性回归和逻辑回归，其目标函数为：
    # g(x) = w1x1 + w2x2 + w3x3 + w4x4 + w0
    # coef_和intercept_都是模型参数，即为w
    # coef_为w1到w4
    # intercept_为w0
    # 如果有激活函数sigmoid，增加非线性变化  则为分类  即逻辑回归
    # 如果没有激活函数，则为回归
    # 对于这样的线性函数，都会有coef_和intercept_函数
    margin = 1 / np.sqrt(np.sum(clf.coef_ ** 2))
    yy_down = yy - np.sqrt(1 + a ** 2) * margin  #下边界
    yy_up = yy + np.sqrt(1 + a ** 2) * margin  #上边界

    # plot the line, the points, and the nearest vectors to the plane
    # 绘制直线、点和距离平面最近的向量
    plt.figure(fignum, figsize=(4, 3))
    plt.clf()
    plt.plot(xx, yy, "k-")
    plt.plot(xx, yy_down, "k--")
    plt.plot(xx, yy_up, "k--")

    plt.scatter(
        clf.support_vectors_[:, 0],  #分类0的支持向量
        clf.support_vectors_[:, 1],  #分类1的支持向量
        s=80,
        facecolors="none",
        zorder=10,
        edgecolors="k",
        cmap=cm.get_cmap("RdBu"),
    )
    plt.scatter(
        X[:, 0], X[:, 1], c=y, zorder=10, cmap=cm.get_cmap("RdBu"), edgecolors="k"
    )

    plt.axis("tight")
    x_min = -4.8
    x_max = 4.2
    y_min = -6
    y_max = 6

    YY, XX = np.meshgrid(yy, xx)
    xy = np.vstack([XX.ravel(), YY.ravel()]).T
    Z = clf.decision_function(xy).reshape(XX.shape)

    # Put the result into a contour plot #将结果放入等高线图
    plt.contourf(XX, YY, Z, cmap=cm.get_cmap("RdBu"), alpha=0.5, linestyles=["-"])

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)

    plt.xticks(())
    plt.yticks(())
    fignum = fignum + 1

plt.show()

"""

'\n\nprint(X_train.shape,X_test.shape)\nXX = np.vstack((X_train,X_test))  #垂直拼接数组\nprint(XX.shape)\n\n# figure number\nfignum = 1\n\n# fit the model\n# 参考 https://blog.csdn.net/qq_43043256/article/details/104259061\n# for name, penalty in (("unreg", 1), ("reg", 0.05)):\n# for name, penalty in (("unreg", 1), ):\nfor name, clf in zip(names, classifiers):\n\n    clf.fit(X, y)\n    # get the separating hyperplane #获取分离超平面\n    w = clf.coef_[0]  # 获取w\n\n    # 根据超平面 yy= -w0/w1*x1-b1/w1\n    a = -w[0] / w[1]  #斜率\n    xx = np.linspace(-5, 5)  #公式中的x1\n    # 我们得到截距b1和w1后，就可以求出所需要的公式\n    # clf.intercept_[0]  #用来获得截距b1(这里共有两个值，分别为到x和到y的)\n    yy = a * xx - (clf.intercept_[0]) / w[1]  #超平面\n\n    # 绘制通过的分离超平面的平行线\n    # 支持向量(从超平面方向上的边距)\n    # 垂直于超平面)。竖直方向上是sqrt(1+a^2)\n    # 对于线性回归和逻辑回归，其目标函数为：\n    # g(x) = w1x1 + w2x2 + w3x3 + w4x4 + w0\n    # coef_和intercept_都是模型参数，即为w\n    # coef_为w1到w4\n    # intercept_为w0\n    # 如果有激活函数sigmoid，增加非线性变化  则为分类  即逻辑回归\n    # 如果没有激活函数，则为回归\n    # 对于这样的线性函数，都会

In [476]:
print(svc.support_vectors_.shape)
print(svc.support_.shape)
print(svc_rbf.support_.shape)
print(svc_lin.support_.shape)
print(svc_lin.support_vectors_.shape)

(681, 2)
(681,)
(666,)
(681,)
(681, 2)


In [477]:
print(X.shape,X.ndim)
print('standardScaler.mean_', standardScaler.mean_)  #均值
print('standardScaler.var_', standardScaler.var_)  #方差
print('x_train.var', np.var(X_train))  #方差
print('x_train.std', np.std(X_train))  #标准差
print('X_train=', X_train[:3])
print('X_test=', X_test[:3])
print('y_train=', y_train[:3])
print('y_test=', y_test[:3])


(1000, 2) 2
standardScaler.mean_ [0.98739369 0.01613228]
standardScaler.var_ [0.58086419 1.18980639]
x_train.var 0.9999999999999994
x_train.std 0.9999999999999997
X_train= [[ 0.87428368  0.81309755]
 [ 0.41700611  0.69510598]
 [-1.92896813  1.39822587]]
X_test= [[-0.15484053 -0.76231759]
 [ 0.30223092 -1.12665987]
 [-0.16239218 -0.79864524]]
y_train= [0 0 1]
y_test= [0 0 0]
