### Aplicação de Tuning para os parâmetros de erros

#### Importar base de dados que foi processada anteriormente

In [1]:
import pandas as pd

In [2]:
# base produzida a partir da limpeza no primeiro exemplo
base = pd.read_csv('../../data/baseCarrosProcessada.csv', encoding = 'ISO-8859-1')

In [3]:
base[:10]

Unnamed: 0.1,Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,0,480,test,limousine,1993,manuell,0,golf,150000,0,benzin,volkswagen,nein
1,1,18300,test,coupe,2011,manuell,190,golf,125000,5,diesel,audi,ja
2,2,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,nein
3,3,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein
4,4,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein
5,5,650,test,limousine,1995,manuell,102,3er,150000,10,benzin,bmw,ja
6,6,2200,test,cabrio,2004,manuell,109,2_reihe,150000,8,benzin,peugeot,nein
7,8,14500,control,bus,2014,manuell,125,c_max,30000,8,benzin,ford,nein
8,9,999,test,kleinwagen,1998,manuell,101,golf,150000,0,benzin,volkswagen,nein
9,10,2000,control,limousine,2004,manuell,105,3_reihe,150000,12,benzin,mazda,nein


### Remover a coluna de index gerada anteriormente

In [4]:
base = base.drop('Unnamed: 0', axis=1)

In [5]:
base

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,480,test,limousine,1993,manuell,0,golf,150000,0,benzin,volkswagen,nein
1,18300,test,coupe,2011,manuell,190,golf,125000,5,diesel,audi,ja
2,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,nein
3,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein
4,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein
5,650,test,limousine,1995,manuell,102,3er,150000,10,benzin,bmw,ja
6,2200,test,cabrio,2004,manuell,109,2_reihe,150000,8,benzin,peugeot,nein
7,14500,control,bus,2014,manuell,125,c_max,30000,8,benzin,ford,nein
8,999,test,kleinwagen,1998,manuell,101,golf,150000,0,benzin,volkswagen,nein
9,2000,control,limousine,2004,manuell,105,3_reihe,150000,12,benzin,mazda,nein


### Extrair dados para treinamento predição e os dados da classe (preço real)

In [6]:
previsores = base.iloc[:, 1:13].values

In [7]:
preco_real = base.iloc[:, 0].values

### Transformação dos dados


In [8]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelEncoder_previsores = LabelEncoder()

In [9]:
previsores[:,0] = labelEncoder_previsores.fit_transform(previsores[:,0])
previsores[:,1] = labelEncoder_previsores.fit_transform(previsores[:,1])
previsores[:,3] = labelEncoder_previsores.fit_transform(previsores[:,3])
previsores[:,5] = labelEncoder_previsores.fit_transform(previsores[:,5])
previsores[:,8] = labelEncoder_previsores.fit_transform(previsores[:,8])
previsores[:,9] = labelEncoder_previsores.fit_transform(previsores[:,9])
previsores[:,10] = labelEncoder_previsores.fit_transform(previsores[:,10])

## Categorização

In [10]:
from sklearn.compose import ColumnTransformer

In [11]:
ct = ColumnTransformer([("", OneHotEncoder(),[0,1,3,5,8,9,10])], remainder="passthrough") 
previsores = ct.fit_transform(previsores)

### Criação da Rede

In [12]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


In [13]:
def criarRede(loss):
    regressor = Sequential()
    regressor.add(Dense(units = 158, activation='relu', input_dim=316))
    regressor.add(Dropout(0.2))
    regressor.add(Dense(units = 158, activation='relu'))
    regressor.add(Dropout(0.2))
    regressor.add(Dense(units = 1, activation='linear'))
    regressor.compile(loss = loss, optimizer='adam', metrics = ['mean_absolute_error'])
    return regressor

#### Parâmetros que serão testados

In [14]:
regressor = KerasRegressor(build_fn = criarRede, batch_size = 300, epochs=100, verbose=0)
parametros = {'loss': ['mean_squared_error', 'mean_absolute_error',
                       'mean_absolute_percentage_error', 'mean_squared_logarithmic_error',
                       'squared_hinge']}
grid_search = GridSearchCV(estimator = regressor,
                           param_grid = parametros,
                           cv = 10)

#### Teste dos parâmetros

In [15]:
grid_search = grid_search.fit(previsores, preco_real)

In [16]:
melhores_parametros = grid_search.best_params_

In [17]:
melhores_precisao = grid_search.best_score_

In [18]:
print(melhores_parametros, melhores_precisao)

{'loss': 'squared_hinge'} 0.0
