In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn .svm import SVC
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [2]:
x,y = load_breast_cancer().data, load_breast_cancer().target
print(x.shape)
#标准化
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(x)
print(x)

In [3]:
from sklearn.metrics import accuracy_score
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=420)

Kernel = ['linear','poly','rbf','sigmoid']
for kernel in Kernel:
    model = SVC(kernel = kernel,gamma='auto',degree=1)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    print('选择{}和函数时模型的准确率为{}'.format(kernel,ac))

In [4]:
#网格
from sklearn.model_selection import GridSearchCV
#分层抽样
from sklearn.model_selection import StratifiedShuffleSplit
gamma_range = np.logspace(-10,0,20) # 10的-10次方到0次方，共20个
coef0_range = np.linspace(0,5,10)
param_grid = dict(gamma=gamma_range,coef0=coef0_range)

cv = StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=420)
grid = GridSearchCV(SVC(kernel='poly',degree=1),param_grid=param_grid,cv=cv)
grid.fit(x,y)
print('最优参数值为:',grid.best_params_)
print('选取该参数时,模型的预测准确率为:',grid.best_score_)

In [5]:
import matplotlib.pyplot as plt
score = []
gamma_range = np.logspace(-10,0,50) # 10的-10次方到0次方，共20个
for gamma in gamma_range:
    model = SVC(kernel='rbf',gamma=gamma)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)
    
plt.plot(figsize=(4,2))
plt.plot(gamma_range,score)
plt.xlabel('gamma')
plt.ylabel('accuracy')
plt.show()

In [6]:
print('最优参数值:',gamma_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))

In [7]:
score=[]
C_range = np.linspace(0.01,30,50)
for c in C_range:
    model = SVC(kernel='linear',C=c)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)

plt.plot(figsize=(4,2))
plt.plot(C_range,score)
plt.xlabel('C')
plt.ylabel('accuracy')
plt.show()
print('最优参数值:',C_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))

In [8]:
score=[]
C_range = np.linspace(0.01,30,50)
for c in C_range:
    model = SVC(kernel='rbf',C=c,gamma=0.01267)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)

plt.plot(figsize=(4,2))
plt.plot(C_range,score)
plt.xlabel('C')
plt.ylabel('accuracy')
plt.show()
print('最优参数值:',C_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))

# 未进行标准化

In [9]:
x,y = load_breast_cancer().data, load_breast_cancer().target
print(x.shape)
print(x)

In [10]:
from sklearn.metrics import accuracy_score
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=420)

Kernel = ['linear','poly','rbf','sigmoid']
for kernel in Kernel:
    model = SVC(kernel = kernel,gamma='auto',degree=1)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    print('选择{}和函数时模型的准确率为{}'.format(kernel,ac))

In [11]:
#网格
from sklearn.model_selection import GridSearchCV
#分层抽样
from sklearn.model_selection import StratifiedShuffleSplit
gamma_range = np.logspace(-10,0,20) # 10的-10次方到0次方，共20个
coef0_range = np.linspace(0,5,10)
param_grid = dict(gamma=gamma_range,coef0=coef0_range)

cv = StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=420)
grid = GridSearchCV(SVC(kernel='poly',degree=1),param_grid=param_grid,cv=cv)
grid.fit(x,y)
print('最优参数值为:',grid.best_params_)
print('选取该参数时,模型的预测准确率为:',grid.best_score_)

In [12]:
import matplotlib.pyplot as plt
score = []
gamma_range = np.logspace(-10,0,50) # 10的-10次方到0次方，共20个
for gamma in gamma_range:
    model = SVC(kernel='rbf',gamma=gamma)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)
    
plt.plot(figsize=(4,2))
plt.plot(gamma_range,score)
plt.xlabel('gamma')
plt.ylabel('accuracy')
plt.show()

In [13]:
print('最优参数值:',gamma_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))

In [16]:
score=[]
C_range = np.linspace(0.01,30,50)
for c in C_range:
    model = SVC(kernel='linear',C=c)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)

plt.plot(figsize=(4,2))
plt.plot(C_range,score)
plt.xlabel('C')
plt.ylabel('accuracy')
plt.show()
print('最优参数值:',C_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))

In [17]:
score=[]
C_range = np.linspace(0.01,30,50)
for c in C_range:
    model = SVC(kernel='rbf',C=c,gamma=0.01267)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    ac = accuracy_score(y_test,pred)
    score.append(ac)

plt.plot(figsize=(4,2))
plt.plot(C_range,score)
plt.xlabel('C')
plt.ylabel('accuracy')
plt.show()
print('最优参数值:',C_range[score.index(max(score))])
print('参数取最优值时,模型预测准确率为:',max(score))