# M6. Epsilon-Support Vector Regression ('rbf' and 'linear')

### Grid Search with Cross Validation

In [10]:
# Read Back 
import pickle

with open('datasetIPPD.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [11]:
data.keys()

dict_keys(['X_train', 'y_train', 'X_test', 'y_test'])

In [12]:
X_train = data['X_train']
y_train = data['y_train']

X_test = data['X_test']
y_test = data['y_test']

len(y_train), len(y_test)

(256, 65)

In [13]:
from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVR

print(__doc__)

Automatically created module for IPython interactive environment


In [35]:
from sklearn.metrics import mean_squared_error 
import numpy as np
def rmse_scorer(model, X, y): 
    y_predict = model.predict(X)
    k = np.sqrt(mean_squared_error(y, y_predict))
    return k

## Preprocessing is a must for SVR

In [34]:
from sklearn import preprocessing
X_train = preprocessing.normalize(X_train, norm='l1')

In [28]:
# Set the parameters by cross-validation

# sklearn.svm.SVR: The free parameters in the model are C and epsilon.

tuned_parameters = []
tuned_parameters.append( {'kernel': ['linear'], 'C': [.2, .4,.6, .8, 1, 100, 1000], 
                          'epsilon':[.15, 0.1, .05, 0.2] })

tuned_parameters.append({'kernel': ['rbf'], 'gamma': [1e-5, 1e-4, 1e-3, 1e-4,1e-2,1e-1], 
                         'C': [1e-5, 1e-4, 1e-3, 1e-4,1e-2,1e-1], 
                         'epsilon':[1e-5, 1e-4, 1e-3, 1e-4,1e-2,1e-1] })

scores = ['rmse']  #, 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVR(C=1, epsilon=0.001), tuned_parameters, cv=3, n_jobs=-1, scoring=rmse_scorer)
    print('Starting clf.fit(X_train, y_train)')
    
    clf.fit(X_train, y_train)

    print("\nBest parameters set found on development set:")
    print()
    print(clf.best_params_)


# Tuning hyper-parameters for rmse

Starting clf.fit(X_train, y_train)

Best parameters set found on development set:

{'C': 1e-05, 'epsilon': 1e-05, 'gamma': 1e-05, 'kernel': 'rbf'}


################################################################################
################################################################################
############################  Select Model based on this score #####################################
################################################################################
################################################################################

In [29]:
print(clf.best_estimator_)
print()
print((clf.best_score_))

SVR(C=1e-05, cache_size=200, coef0=0.0, degree=3, epsilon=1e-05, gamma=1e-05,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

1032.95723148


In [30]:
#clf.cv_results_

In [31]:
print()
print("Grid scores on development set:")
print()
    
means = clf.cv_results_['mean_test_score']
stds  = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("Paramters: %r \t:mean+/-std: %0.3f (+/-%0.03f)"
              % (params, mean, std * 2))


Grid scores on development set:

Paramters: {'C': 0.2, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1032.952 (+/-115.382)
Paramters: {'C': 0.2, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1032.952 (+/-115.382)
Paramters: {'C': 0.2, 'epsilon': 0.05, 'kernel': 'linear'} 	:mean+/-std: 1032.952 (+/-115.382)
Paramters: {'C': 0.2, 'epsilon': 0.2, 'kernel': 'linear'} 	:mean+/-std: 1032.952 (+/-115.382)
Paramters: {'C': 0.4, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1032.948 (+/-115.382)
Paramters: {'C': 0.4, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1032.948 (+/-115.382)
Paramters: {'C': 0.4, 'epsilon': 0.05, 'kernel': 'linear'} 	:mean+/-std: 1032.948 (+/-115.382)
Paramters: {'C': 0.4, 'epsilon': 0.2, 'kernel': 'linear'} 	:mean+/-std: 1032.948 (+/-115.382)
Paramters: {'C': 0.6, 'epsilon': 0.15, 'kernel': 'linear'} 	:mean+/-std: 1032.943 (+/-115.383)
Paramters: {'C': 0.6, 'epsilon': 0.1, 'kernel': 'linear'} 	:mean+/-std: 1032.943 (+/-115.383)
Paramters: {'C': 0.6,

################################################################################
################################################################################
############################  R E P O R T I N G #####################################
################################################################################
################################################################################

In [32]:
model = clf.best_estimator_

y_predict = model.predict(X_test) #Reduce X to the selected features and then predict using the underlying estimator.
rmse_model = np.sqrt(mean_squared_error(y_test, y_predict))
print("(for reporting only) Test Error once optimal model is picked by CV : %f" % rmse_model)

(for reporting only) Test Error once optimal model is picked by CV : 882.068031
