##Grupo
227124 Jorge Henrique Monteiro dos Santos

262251 Vitor Mello de Araujo Lima

##Objetivo

Fazer a busca dos melhores hiperparametros para uma SVM para Regressão num banco de dados em particular.

X.npy são os dados de entrada, e y.npy são os valores de saída correspondentes.

Os seguintes algoritmos de otimização serão utilizados durante a execução dessa tarefa:

- Random search
- Grid search
- Bayesian optimization
- PSO
- simulated annealling
- CMA-ES


In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import  datasets
from sklearn.pipeline import  Pipeline
from sklearn.svm import SVR
from sklearn.utils.fixes import loguniform
import scipy.stats as stats
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV


## Importando conjunto de dados

In [2]:
#x_url = "https://www.ic.unicamp.br/~wainer/cursos/1s2021/431/X.npy" 
!wget https://www.ic.unicamp.br/~wainer/cursos/1s2021/431/X.npy
!wget https://www.ic.unicamp.br/~wainer/cursos/1s2021/431/y.npy

x_data = np.load("X.npy")
y_data = np.load("y.npy")

print(x_data.shape)
print(y_data.shape)

--2021-05-03 21:06:23--  https://www.ic.unicamp.br/~wainer/cursos/1s2021/431/X.npy
Resolving www.ic.unicamp.br (www.ic.unicamp.br)... 143.106.7.54, 2801:8a:40c0:cafe::54
Connecting to www.ic.unicamp.br (www.ic.unicamp.br)|143.106.7.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 52752 (52K)
Saving to: ‘X.npy’


2021-05-03 21:06:24 (216 KB/s) - ‘X.npy’ saved [52752/52752]

--2021-05-03 21:06:24--  https://www.ic.unicamp.br/~wainer/cursos/1s2021/431/y.npy
Resolving www.ic.unicamp.br (www.ic.unicamp.br)... 143.106.7.54, 2801:8a:40c0:cafe::54
Connecting to www.ic.unicamp.br (www.ic.unicamp.br)|143.106.7.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4176 (4.1K)
Saving to: ‘y.npy’


2021-05-03 21:06:25 (232 MB/s) - ‘y.npy’ saved [4176/4176]

(506, 13)
(506,)


# RamdomSearch

In [3]:
svr = SVR()

parameters = {'C': loguniform(2**-5, 2**15),
 'gamma': loguniform(2**-15, 2**3),
 'epsilon': stats.uniform(0.05,1.0),
 'kernel': ['rbf']}

rnd_search = RandomizedSearchCV(svr, parameters, n_iter =125, cv=5, scoring='neg_root_mean_squared_error')

rnd_search.fit(x_data, y_data)

RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                 epsilon=0.1, gamma='scale', kernel='rbf',
                                 max_iter=-1, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='deprecated', n_iter=125, n_jobs=None,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f14b242a090>,
                                        'epsilon': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f14a06fb450>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f14a06fb190>,
                                        'kernel': ['rbf']},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False,
                   scoring='neg_root_mean_squared_error', verbose=0)

In [15]:

print("C: ", rnd_search.best_params_['C'])
print("gamma: ", rnd_search.best_params_['gamma'])
print("epsilon: " ,  rnd_search.best_params_['epsilon'])
print("RMSE: ", -rnd_search.best_score_) 

C:  9282.42040909595
gamma:  7.886971178182264e-05
epsilon:  0.09512013326056641
RMSE:  4.741526563155499


## Otimização bayesiana



In [10]:
 !pip install scikit-optimize
 
 from skopt import BayesSearchCV
 from skopt.space import Real, Categorical, Integer

 
 bay_opt = BayesSearchCV(
    SVR(),
     {
         'C': Real(2**-5, 2**15, prior='log-uniform'),
         'gamma': Real(2**-15, 2**3, prior='log-uniform'),
         'epsilon': Real(0.05,1.0),
         'kernel': Categorical(['rbf']),
     },
     n_iter=125,
     cv = 5,
     optimizer_kwargs = {'base_estimator': 'RF'},
     scoring = 'neg_root_mean_squared_error'
      )
 
 bay_opt.fit(x_data, y_data)



BayesSearchCV(cv=5, error_score='raise',
              estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                            epsilon=0.1, gamma='scale', kernel='rbf',
                            max_iter=-1, shrinking=True, tol=0.001,
                            verbose=False),
              fit_params=None, iid=True, n_iter=125, n_jobs=1, n_points=1,
              optimizer_kwargs={'base_estimator': 'RF'},
              pre_dispatch='2*n_jobs', random_state=None, refit=True,
              return_train_sco...alse, scoring='neg_root_mean_squared_error',
              search_spaces={'C': Real(low=0.03125, high=32768, prior='log-uniform', transform='identity'),
                             'epsilon': Real(low=0.05, high=1.0, prior='uniform', transform='identity'),
                             'gamma': Real(low=3.0517578125e-05, high=8, prior='log-uniform', transform='identity'),
                             'kernel': Categorical(categories=('rbf',), prior=None)},
         

In [11]:
print("C: ", bay_opt.best_params_['C'])
print("gamma: ", bay_opt.best_params_['gamma'])
print("epsilon: " ,  bay_opt.best_params_['epsilon'])
print("RMSE: ", -bay_opt.best_score_) 

C:  17584.02397411461
gamma:  3.204648464589454e-05
epsilon:  0.07688532594506936
RMSE:  3.7321192781631205


## CMA-ES

In [13]:
!pip install cma
from sklearn.model_selection import cross_val_score
import cma

def loss(xs, X_data, y_data):  
  C_arg = 2**(xs[0]*20 - 5)
  gamma_arg = 2**(xs[1]*18 - 15)
  epsilon_arg = xs[2] + 0.05
  
  svr = SVR(kernel="rbf", C=C_arg, epsilon=epsilon_arg , gamma=gamma_arg)
  scores = cross_val_score(svr, X_data, y_data, cv=5, scoring = 'neg_root_mean_squared_error')

  return scores.mean()

x0 = np.random.uniform(0,1,3)

es = cma.CMAEvolutionStrategy(
    x0=x0,
    sigma0=0.25,
    inopts={'bounds': [0, 1], "popsize": 30},
)

es.opts.set({'maxiter': 125})

while not es.stop():
  solutions = es.ask()
  es.tell(solutions, [loss(sol,x_data , y_data) for sol in solutions])
  es.logger.add()
  es.disp()


Collecting cma
[?25l  Downloading https://files.pythonhosted.org/packages/36/c0/0a1c41f7cad0a51e07991cf86423d0e6651d035f1fe7dcff48e8858848f2/cma-3.0.3-py2.py3-none-any.whl (230kB)
[K     |████████████████████████████████| 235kB 3.6MB/s 
Installing collected packages: cma
Successfully installed cma-3.0.3
(15_w,30)-aCMA-ES (mu_w=8.6,w_1=20%) in dimension 3 (seed=230894, Mon May  3 21:32:26 2021)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     30 -9.109104720728329e+00 1.0e+00 2.65e-01  2e-01  3e-01 2:50.0
    2     60 -9.189295796230962e+00 1.9e+00 4.66e-01  4e-01  7e-01 4:36.8
    3     90 -9.224472165228972e+00 2.3e+00 5.46e-01  4e-01  6e-01 5:21.1
    4    120 -9.224627726090921e+00 1.8e+00 6.07e-01  4e-01  7e-01 5:32.5
    6    180 -9.226217102165840e+00 2.3e+00 7.79e-01  3e-01  1e+00 5:38.7
    8    240 -9.222536705610654e+00 4.9e+00 9.01e-01  3e-01  1e+00 5:45.7
   10    300 -9.224742106676867e+00 5.8e+00 1.09e+00  2e-01  2e+00 5:51.9
   13    39

In [14]:
C = 2**(es.result.xbest[0]*20 - 5)
gamma = 2**(es.result.xbest[1]*18 - 15)
epsilon = es.result.xbest[2] + 0.05

print("C: ", C)
print("gamma: ", gamma)
print("epsilon: " , epsilon)
print("RMSE: ", -es.result.fbest) 

C:  0.03125000000011353
gamma:  7.999999999698504
epsilon:  0.9431640625000897
RMSE:  9.227736144103195
