# Ensemble

In [None]:
import numpy as np
import matplotlib.pyplot as plt
# % matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import tree


In [None]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, random_state=0)

In [None]:
cancer.feature_names

In [None]:
cancer.target_names

In [None]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

acc = rf_model.score(X_test, y_test)*100
print("Test Accuracy {:.2f}%".format(acc))

In [None]:

# Train an Adaboosting model
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)


acc = ada_model.score(X_test, y_test)*100
print("Test Accuracy {:.2f}%".format(acc))


In [None]:
from sklearn.model_selection import GridSearchCV
ada_model = AdaBoostClassifier(random_state=42)
ada_param_grid = {'n_estimators': [50, 100, 200, 250, 300, 350, 400]}
ada_grid_search = GridSearchCV(estimator=ada_model, param_grid=ada_param_grid, return_train_score=True)
ada_grid_search.fit(X_train, y_train)

scores = pd.DataFrame(ada_grid_search.cv_results_)
scores.plot(x='param_n_estimators', y=['mean_train_score', 'mean_test_score'], ax=plt.gca())
plt.legend(loc=(1, 0))

In [None]:
ada_best_params = ada_grid_search.best_params_
ada_best_score = ada_grid_search.best_score_
print("Adaboosting Best Parameters:", ada_best_params)
print("Adaboosting Best Score:", ada_best_score)