# OTIMIZAÇÃO

## Ajuste de Hiperparâmetros

- Grid Search Parameter Tuning

In [79]:
# from pandas import read_csv
# from sklearn.model_selection import GridSearchCV
# from sklearn.linear_model import LogisticRegression

# Grid Search Parameter Tuning
val_grid = {'penalty': ['l2'],
            'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

# Criando o modelo
modelo = LogisticRegression()

# Criando o grid
grid = GridSearchCV(estimator = modelo,
                    param_grid = val_grid)
grid.fit(X_escalado, y)

# Imprime o resultado
print("Grid Search Parameter Tuning")
print("-> Acurácia: %.3f" % (grid.best_score_ * 100))
print("-> Melhores parâmetros:", grid.best_estimator_)

Grid Search Parameter Tuning
-> Acurácia: 97.778
-> Melhores parâmetros: LogisticRegression(C=10)


- Random Search Parameter Tuning

In [80]:
from sklearn.model_selection import RandomizedSearchCV

# Definindo os valores que serão testados
iterations = 7

val_grid = {'penalty': ['l2'],
            'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

# Criando o modelo
modelo = LogisticRegression()

# Criando o grid
rsearch = RandomizedSearchCV(estimator = modelo,
                             param_distributions = val_grid,
                             n_iter = iterations,
                             random_state = seed)
rsearch.fit(X_escalado, y)

# Resultados
print("Random Search Parameter Tuning")
print("-> Acurácia: %.3f" % (rsearch.best_score_ * 100))
print("-> Melhores parâmetros:", rsearch.best_estimator_)

Random Search Parameter Tuning
-> Acurácia: 97.778
-> Melhores parâmetros: LogisticRegression(C=10)


## Métodos Ensemble

- Bagged Decision Trees

In [81]:
# from sklearn.model_selection import KFold
# from sklearn.model_selection import cross_val_score
# from sklearn.ensemble import BaggingClassifier
# from sklearn.tree import DecisionTreeClassifier

# Cria o modelo unitário (classificador fraco)
cart = DecisionTreeClassifier()

# Definindo o número de trees
n_trees = 100

# Criando o modelo bagging
modelo = BaggingClassifier(base_estimator = cart,
                           n_estimators = n_trees,
                           random_state = seed)

# Cross Validation
rs = cross_val_score(estimator = modelo,
                     X = X_escalado,
                     y = y,
                     cv = kf)
print("Bagged Decision Trees")
print("-> Acurácia: %.3f" % (rs.mean() * 100))

Bagged Decision Trees
-> Acurácia: 95.523


- Random Forest

In [82]:
# from sklearn.model_selection import KFold
# from sklearn.model_selection import cross_val_score
# from sklearn.ensemble import RandomForestClassifier

# Definindo o número de trees
n_trees = 100
max_features = 3

# Criando o modelo
modelo = RandomForestClassifier(n_estimators = n_trees,
                                max_features = max_features)

# Cross Validation
rs = cross_val_score(estimator = modelo,
                     X = X_escalado,
                     y = y,
                     cv = kf)

# Print do resultado
print("Random Forest")
print("-> Acurácia: %.3f" % (rs.mean() * 100))

Random Forest
-> Acurácia: 98.301


- AdaBoost

In [83]:
# from pandas import read_csv
# from sklearn.model_selection import KFold
# from sklearn.model_selection import cross_val_score
# from sklearn.ensemble import AdaBoostClassifier

# Definindo o número de trees
n_trees = 30

# Criando o modelo
modelo = AdaBoostClassifier(n_estimators = n_trees,
                            random_state = seed)

# Cross Validation
rs = cross_val_score(estimator = modelo,
                     X = X_escalado,
                     y = y,
                     cv = kf)

# Print do resultado
print("AdaBoost")
print("-> Acurácia: %.3f" % (rs.mean() * 100))

AdaBoost
-> Acurácia: 90.458


- Gradient Boosting

In [84]:
# Import dos módulos
# from pandas import read_csv
# from sklearn.model_selection import KFold
# from sklearn.model_selection import cross_val_score
# from sklearn.ensemble import GradientBoostingClassifier

# Definindo o número de trees
n_trees = 100

# Criando o modelo
modelo = GradientBoostingClassifier(n_estimators = n_trees,
                                    random_state = seed)

# Cross Validation
rs = cross_val_score(estimator = modelo,
                     X = X_escalado,
                     y = y,
                     cv = kf)

# Print do resultado
print("Gradient Boosting")
print("-> Acurácia: %.3f" % (rs.mean() * 100))

Gradient Boosting
-> Acurácia: 92.647


- Voting Ensemble

In [85]:
# Import dos módulos
#from pandas import read_csv
#from sklearn.model_selection import KFold
#from sklearn.model_selection import cross_val_score
#from sklearn.linear_model import LogisticRegression
#from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Criando os modelos
estimators = []

modelo1 = LogisticRegression()
estimators.append(('logistic', modelo1))

modelo2 = DecisionTreeClassifier()
estimators.append(('cart', modelo2))

modelo3 = SVC()
estimators.append(('svm', modelo3))

# Criando o modelo ensemble
ensemble = VotingClassifier(estimators)

# Cross Validation
rs = cross_val_score(estimator = ensemble,
                     X = X_escalado,
                     y = y,
                     cv = kf)

# Resultado
print("Voting Ensemble")
print("-> Acurácia: %.3f" % (rs.mean() * 100))

Voting Ensemble
-> Acurácia: 98.856


- XGBoost

In [86]:
# Extreme Gradient Boosting
# !pip install xgboost
# !conda install -c conda-forge xgboost

In [87]:
# Import dos módulos
#from pandas import read_csv
#from sklearn.model_selection import train_test_split
#from sklearn.metrics import accuracy_score
#from xgboost import XGBClassifier

# Definindo o tamanho dos dados de treino e de teste
teste_size = 0.33

# Criando o dataset de treino e de teste
x_treino, x_teste, y_treino, y_teste = train_test_split(X_escalado, y,
                                                        test_size = teste_size,
                                                        random_state = seed)

# Criando o modelo
modelo = XGBClassifier(use_label_encoder=False, 
                       eval_metric='mlogloss')

# Treinando o modelo
modelo.fit(x_treino, y_treino)

# Fazendo previsões
y_pred = modelo.predict(x_teste)
previsoes = [round(value) for value in y_pred]

# Avaliando as previsões
accuracy = accuracy_score(y_teste, previsoes)
print("Acurácia: %.2f%%" % (accuracy * 100.0))

Acurácia: 98.31%
