# M6. Epsilon-Support Vector Regression ('rbf' and 'linear')

### Grid Search with Cross Validation

In [1]:
# Read Back 
import pickle

with open('datasetIPPD.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [2]:
data.keys()

dict_keys(['X_train', 'y_train', 'X_test', 'y_test'])

In [3]:
X_train = data['X_train']
y_train = data['y_train']

X_test = data['X_test']
y_test = data['y_test']

len(y_train), len(y_test)

(256, 65)

In [4]:
from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVR

print(__doc__)

Automatically created module for IPython interactive environment


In [5]:
def rmse_scorer(model, X, y):
    from sklearn.metrics import mean_squared_error    
    y_predict = model.predict(X)
    return mean_squared_error(y, y_predict)

In [None]:
# Set the parameters by cross-validation

# sklearn.svm.SVR: The free parameters in the model are C and epsilon.

tuned_parameters = []
tuned_parameters.append({'kernel': ['rbf'], 'gamma': [1e-3, 1e-4,1e-2,1e-1], 
                         'C': [1.0/4, 1.0/2, 1, 10, 100, 1000], 
                         'epsilon':[0.1] })

tuned_parameters.append( {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 
                          'epsilon':[0.1] })

scores = ['rmse'] #, 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVR(C=1, epsilon=0.001), tuned_parameters, cv=5, n_jobs=-1, 
                      scoring=rmse_scorer)
    print('Starting clf.fit(X_train, y_train)')
    
    clf.fit(X_train, y_train)

    print("\nBest parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    
    means = clf.cv_results_['mean_test_score']
    stds  = clf.cv_results_['std_test_score']

# Tuning hyper-parameters for rmse

Starting clf.fit(X_train, y_train)


In [None]:
print(clf.best_estimator_)
print()
print(clf.best_score_)

In [17]:
    
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("Paramters: %r \t:mean+/-std: %0.3f (+/-%0.03f)"
              % (params, mean, std * 2))

Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.116)
Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.116)
Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 0.01, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.117)
Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 0.1, 'kernel': 'rbf'} 	:mean+/-std: -0.077 (+/-0.118)
Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 1, 'kernel': 'rbf'} 	:mean+/-std: -0.080 (+/-0.119)
Paramters: {'C': 0.25, 'epsilon': 0.1, 'gamma': 10, 'kernel': 'rbf'} 	:mean+/-std: -0.080 (+/-0.120)
Paramters: {'C': 0.25, 'epsilon': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.116)
Paramters: {'C': 0.25, 'epsilon': 0.01, 'gamma': 0.0001, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.116)
Paramters: {'C': 0.25, 'epsilon': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} 	:mean+/-std: -0.074 (+/-0.117)
Paramters: {'C': 0.25, 'epsilon': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}

In [10]:
from sklearn.metrics import mean_squared_error
import numpy as np

y_predict = clf.predict(X_train)
rmse_model = np.sqrt(mean_squared_error(y_train, y_predict))
print("(used for picking best model) Training Error after CV : %f" % rmse_model)

(used for picking best model) Training Error after CV : 401.118696


In [11]:

y_predict = clf.predict(X_test) #Reduce X to the selected features and then predict using the underlying estimator.
rmse_model = np.sqrt(mean_squared_error(y_test, y_predict))
print("(for reporting only) Test Error once optimal model is picked by CV : %f" % rmse_model)

(for reporting only) Test Error once optimal model is picked by CV : 327.877737
