In [9]:
import numpy as np
import matplotlib.pyplot as plt
import utilities
from sklearn import svm
from sklearn import model_selection
from sklearn.metrics import classification_report

In [10]:
input_file = 'E://tensorflow//jupyter//machine_learing_damo//3.data//data_multivar.txt'
X,Y = utilities.load_data(input_file)
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.25, random_state=5)

In [11]:
# 通过交叉检验设置参数
parameter_grid = [ {'kernel': ['linear'], 'C': [1, 10, 50, 600]},
                    {'kernel': ['poly'], 'degree': [2, 3]},
                    {'kernel': ['rbf'], 'gamma': [0.01, 0.001], 'C': [1, 10, 50, 600]},
] 
#使用的指标
metrics = ['precision', 'recall_weighted'] 

In [26]:
for metric in metrics:
    print("\n#### Searching optimal hyperparameters for", metric)
    classifier = model_selection.GridSearchCV(svm.SVC(C=1),parameter_grid, cv=5, scoring=metric)
    classifier.fit(X_train, Y_train) 
    
    print("Best parameters set found on development set:")
    print()
    print(classifier.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = classifier.cv_results_['mean_test_score']
    stds = classifier.cv_results_['std_test_score']
    #这里输出了各种参数在使用交叉验证的时候得分的均值和方差
    for mean, std, params in zip(means, stds, classifier.cv_results_['params']):
        print("%r --> %0.3f (+/-%0.03f)"
              % (params, mean, std * 2))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    #使用训练出来的最好的参数进行预测
    y_true, y_pred = Y_test, classifier.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()


#### Searching optimal hyperparameters for precision
Best parameters set found on development set:

{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}

Grid scores on development set:

{'C': 1, 'kernel': 'linear'} --> 0.676 (+/-0.263)
{'C': 10, 'kernel': 'linear'} --> 0.676 (+/-0.263)
{'C': 50, 'kernel': 'linear'} --> 0.676 (+/-0.263)
{'C': 600, 'kernel': 'linear'} --> 0.676 (+/-0.263)
{'degree': 2, 'kernel': 'poly'} --> 0.874 (+/-0.107)
{'degree': 3, 'kernel': 'poly'} --> 0.872 (+/-0.103)
{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} --> 0.980 (+/-0.050)
{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'} --> 0.533 (+/-0.000)
{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} --> 0.983 (+/-0.042)
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'} --> 0.543 (+/-0.036)
{'C': 50, 'gamma': 0.01, 'kernel': 'rbf'} --> 0.959 (+/-0.051)
{'C': 50, 'gamma': 0.001, 'kernel': 'rbf'} --> 0.806 (+/-0.327)
{'C': 600, 'gamma': 0.01, 'kernel': 'rbf'} --> 0.967 (+/-0.033)
{'C': 600, 'gamma': 0.001, 'kernel': 'rbf'} --> 0.983 (+/-0.043)

Deta