# M6. Epsilon-Support Vector Regression ('rbf' and 'linear')

### Grid Search with Cross Validation

In [1]:
# Read Back 
import pickle

with open('datasetIPPD.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [2]:
data.keys()

dict_keys(['X_train', 'y_train', 'X_test', 'y_test'])

In [3]:
X_train = data['X_train']
y_train = data['y_train']

X_test = data['X_test']
y_test = data['y_test']

len(y_train), len(y_test)

(256, 65)

In [4]:
from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVR

print(__doc__)

Automatically created module for IPython interactive environment


In [15]:
from sklearn.metrics import mean_squared_error 
import numpy as np
def rmse_scorer(model, X, y): 
    y_predict = model.predict(X)
    k = np.sqrt(mean_squared_error(y, y_predict))
    #print("RMSE %f " % k)
    return k

In [6]:
# Preprocessing is a must for SVR !!!!

from sklearn import preprocessing
X_train = preprocessing.scale(X_train) 

In [53]:
# Set the parameters by cross-validation

# sklearn.svm.SVR: The free parameters in the model are C and epsilon.

tuned_parameters = []


tuned_parameters.append( {'kernel': ['linear'], 'C': [.2, .4,.6, .8, 1, 100, 1000], 
                          'epsilon':[.15, 0.1, .05, 0.2] })

tuned_parameters.append({'kernel': ['rbf'], 'gamma': [1e-5, 1e-4, 1e-3, 1e-4,1e-2,1e-1], 
                         'C': [ .01, .02, .5, 1,  ], 
                         'epsilon':[0.1, .2] })

scores = ['rmse'] #, 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVR(C=1, epsilon=0.001), tuned_parameters, cv=5, n_jobs=-1,
                      scoring=rmse_scorer)
    print('Starting clf.fit(X_train, y_train)')
    
    clf.fit(X_train, y_train)

    print("\nBest parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    
    means = clf.cv_results_['mean_test_score']
    stds  = clf.cv_results_['std_test_score']

# Tuning hyper-parameters for rmse

Starting clf.fit(X_train, y_train)

Best parameters set found on development set:

{'C': 0.01, 'epsilon': 0.1, 'gamma': 1e-05, 'kernel': 'rbf'}

Grid scores on development set:



In [54]:
print(clf.best_estimator_)
print()
print((clf.best_score_))

SVR(C=0.01, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1e-05,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

1035.31655598


In [55]:
#clf.cv_results_

In [51]:
    
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("Paramters: %r \t:mean+/-std: %0.3f (+/-%0.03f)"
              % (params, mean, std * 2))

Paramters: {'C': 0.2, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1040.918 (+/-19.725)
Paramters: {'C': 0.2, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1040.916 (+/-19.721)
Paramters: {'C': 0.2, 'epsilon': 0.05, 'kernel': 'linear'} 	:mean+/-std: 1040.914 (+/-19.717)
Paramters: {'C': 0.2, 'epsilon': 0.2, 'kernel': 'linear'} 	:mean+/-std: 1040.920 (+/-19.729)
Paramters: {'C': 0.4, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1038.900 (+/-30.428)
Paramters: {'C': 0.4, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1038.898 (+/-30.401)
Paramters: {'C': 0.4, 'epsilon': 0.05, 'kernel': 'linear'} 	:mean+/-std: 1038.895 (+/-30.374)
Paramters: {'C': 0.4, 'epsilon': 0.2, 'kernel': 'linear'} 	:mean+/-std: 1038.903 (+/-30.455)
Paramters: {'C': 0.6, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1033.144 (+/-36.360)
Paramters: {'C': 0.6, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1033.147 (+/-36.387)
Paramters: {'C': 0.6, 'epsilon': 0.05, 'kernel': 'linear'} 	:mean

In [57]:

y_predict = clf.predict(X_test) #Reduce X to the selected features and then predict using the underlying estimator.
rmse_model = np.sqrt(mean_squared_error(y_test, y_predict))
print("(for reporting only) Test Error once optimal model is picked by CV : %f" % rmse_model)

(for reporting only) Test Error once optimal model is picked by CV : 882.068026
