In [17]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import sys
import csv
from pprint import pprint 

plt.style.use('ggplot')

In [18]:



#=================================================
# csvファイルからデータを取り出し、listに格納
def set_data():

        filename = "../../../data/secondary_selection/input_data2_2"
        
        f = open('%s.csv' % filename, 'rU')
        data = csv.reader(f)

        data_set = []
        target_set = []
        for line in data:
                data_set.append(line[1:16])
                target_set.append(line[16])
        f.close()
        np_dataSet = np.array(data_set, dtype=np.float32)
        np_targetSet = np.array(target_set, dtype=np.int32)
        return np_dataSet, np_targetSet

#=================================================

data, target = set_data()

# <!--- start_debug
#print data.shape
#print target.shape
#       end_debug ----> 


# 学習用データをN個、検証用データを残りの個数と設定
# Nの値は暫定的なもの
#N = 300
#x_train, x_test = np.split( data, [N])  
#y_train, y_test = np.split( target, [N]) 
#N_test = y_test.size
#print data
#print target

#pre_list = []


In [25]:
## トレーニングデータとテストデータに分割．
X_train, X_test, y_train, y_test = train_test_split(
    data, target, test_size=0.25, random_state=43)

#print X_train,y_test


## チューニングパラメータ
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

scores = ['accuracy', 'precision', 'recall']

for score in scores:
    print '\n' + '='*50
    print score
    print '='*50

    clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=4, scoring=score, n_jobs=-1)
    clf.fit(X_train, y_train)

    print "\n+ ベストパラメータ:\n"
    print clf.best_estimator_

    print"\n+ トレーニングデータでCVした時の平均スコア:\n"
    for params, mean_score, all_scores in clf.grid_scores_:
        print "{:.3f} (+/- {:.3f}) for {}".format(mean_score, all_scores.std() / 2, params)

    print "\n+ テストデータでの識別結果:\n"
    y_true, y_pred = y_test, clf.predict(X_test)
    print classification_report(y_true, y_pred)
    
    

[[ 0.281822    0.56588137  0.51339483 ..., -1.50674546  0.23282823
  -1.60471272]
 [ 0.66597009  0.01906184  0.68803471 ...,  1.75918448 -0.10070425
   1.68415391]
 [-0.10232607  0.19304988  0.07679515 ...,  1.75918448  0.56636071
   1.68415391]
 ..., 
 [-0.18769231  0.11848357  0.51339483 ...,  0.12621951 -0.76776922
  -1.60471272]
 [-0.46513259  1.1872673  -0.56937242 ...,  0.12621951  0.89989316
   0.03972061]
 [-0.74257284  0.16819444  0.12918711 ...,  0.12621951  0.23282823
   1.68415391]] [1 0 1 1 0 1 0 1 0 0 0 0 0 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1
 0 0 1 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 0 0 1 1 0 0 0 1 0 0 1 1 0 1 1 0 0 1
 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0]

accuracy

+ ベストパラメータ:

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

+ トレーニングデータでCVした時の平均スコア:

0.927 (+/- 0.007) for {'kernel': 'rbf', 'C': 1, 'gamma'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)



+ ベストパラメータ:

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

+ トレーニングデータでCVした時の平均スコア:

0.924 (+/- 0.008) for {'kernel': 'rbf', 'C': 1, 'gamma': 0.01}
0.879 (+/- 0.028) for {'kernel': 'rbf', 'C': 1, 'gamma': 0.001}
0.000 (+/- 0.000) for {'kernel': 'rbf', 'C': 1, 'gamma': 0.0001}
0.939 (+/- 0.011) for {'kernel': 'rbf', 'C': 10, 'gamma': 0.01}
0.924 (+/- 0.017) for {'kernel': 'rbf', 'C': 10, 'gamma': 0.001}
0.879 (+/- 0.028) for {'kernel': 'rbf', 'C': 10, 'gamma': 0.0001}
0.924 (+/- 0.017) for {'kernel': 'rbf', 'C': 100, 'gamma': 0.01}
0.947 (+/- 0.013) for {'kernel': 'rbf', 'C': 100, 'gamma': 0.001}
0.924 (+/- 0.017) for {'kernel': 'rbf', 'C': 100, 'gamma': 0.0001}
0.939 (+/- 0.015) for {'kernel': 'rbf', 'C': 1000, 'gamma': 0.01}
0.947 (+/- 0.013) for {'kernel': 'rbf', 'C': 1000, 'gamma': 0.001}
0.955 (+/- 0.008) for {'kernel': 'rbf', 'C': 100