# Ensemble Learning

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
from sklearn.datasets import make_moons, load_wine
from sklearn.model_selection import train_test_split

In [4]:
from sklearn.ensemble import VotingClassifier

In [5]:
dtree = DecisionTreeClassifier(random_state=666)
nb = GaussianNB()
knn = KNeighborsClassifier()
logistic = LogisticRegression(random_state=666)

In [6]:
algorithms = [('DTree', dtree), 
              ('NaiveBayes', nb), 
              ('KNN', knn), 
              ('Logistic', logistic)
             ]

ensemble_clf = VotingClassifier(estimators=algorithms, n_jobs=-1, voting='soft')

In [7]:
# X, y = make_moons(n_samples=150, noise=0.15, random_state=666)
wine_data = load_wine()
X = wine_data.data
y = wine_data.target

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666, shuffle=True)

In [9]:
ensemble_clf.fit(X_train, y_train)

In [10]:
y_pred = ensemble_clf.predict(X_test)

In [11]:
y_pred

array([2, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 0, 2,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1])

In [17]:
y_pred

array([2, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 0, 2,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1])

In [12]:
from mlxtend.plotting import plot_decision_regions

In [13]:
# plot_decision_regions(X_test, y_test, ensemble_clf)
# plt.show()

In [14]:
# plot_decision_regions(X_train, y_train, ensemble_clf)
# plt.show()

In [15]:
from sklearn.metrics import accuracy_score

In [16]:
accuracy_score(y_test, y_pred)

1.0

In [18]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier

In [19]:
rand_forest = RandomForestClassifier(n_jobs=-1, n_estimators=50)
ada_boost = AdaBoostClassifier()

In [20]:
rand_forest.fit(X_train, y_train)

In [21]:
ada_boost.fit(X_train, y_train)

In [22]:
y_pred_rand = rand_forest.predict(X_test)
accuracy_score(y_test, y_pred_rand)

1.0

In [23]:
y_pred_ada_boost = ada_boost.predict(X_test)
accuracy_score(y_test, y_pred_ada_boost)

1.0

In [24]:
wine_data.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [25]:
rand_forest.feature_importances_

array([0.11610459, 0.02727661, 0.00562751, 0.02369177, 0.02301652,
       0.06474612, 0.18515161, 0.01546721, 0.02934523, 0.18761306,
       0.14582103, 0.0690276 , 0.10711113])

In [26]:
ada_boost.feature_importances_

array([0.02, 0.  , 0.  , 0.  , 0.  , 0.02, 0.48, 0.  , 0.  , 0.02, 0.4 ,
       0.04, 0.02])

In [28]:
for imp, feat in zip(rand_forest.feature_importances_, wine_data.feature_names):
    print(f'{feat}: {round(imp, 2) * 100}')

alcohol: 12.0
malic_acid: 3.0
ash: 1.0
alcalinity_of_ash: 2.0
magnesium: 2.0
total_phenols: 6.0
flavanoids: 19.0
nonflavanoid_phenols: 2.0
proanthocyanins: 3.0
color_intensity: 19.0
hue: 15.0
od280/od315_of_diluted_wines: 7.000000000000001
proline: 11.0


In [29]:
for imp, feat in zip(ada_boost.feature_importances_, wine_data.feature_names):
    print(f'{feat}: {round(imp, 2) * 100}')

alcohol: 2.0
malic_acid: 0.0
ash: 0.0
alcalinity_of_ash: 0.0
magnesium: 0.0
total_phenols: 2.0
flavanoids: 48.0
nonflavanoid_phenols: 0.0
proanthocyanins: 0.0
color_intensity: 2.0
hue: 40.0
od280/od315_of_diluted_wines: 4.0
proline: 2.0


In [30]:
knn = KNeighborsClassifier()

In [31]:
knn.fit(X_train, y_train)

In [32]:
knn.predict_proba(X_test)

array([[0.2, 0.4, 0.4],
       [0. , 0.8, 0.2],
       [1. , 0. , 0. ],
       [0. , 1. , 0. ],
       [0.2, 0.6, 0.2],
       [0. , 0.4, 0.6],
       [0. , 1. , 0. ],
       [0. , 0.4, 0.6],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [0. , 1. , 0. ],
       [0. , 1. , 0. ],
       [0. , 1. , 0. ],
       [0.4, 0.2, 0.4],
       [1. , 0. , 0. ],
       [0.2, 0.2, 0.6],
       [1. , 0. , 0. ],
       [0. , 1. , 0. ],
       [0. , 1. , 0. ],
       [1. , 0. , 0. ],
       [0.6, 0.2, 0.2],
       [0. , 0.6, 0.4],
       [0. , 0.2, 0.8],
       [0. , 1. , 0. ],
       [0. , 0.6, 0.4],
       [0. , 1. , 0. ],
       [0. , 0.8, 0.2],
       [0. , 1. , 0. ],
       [1. , 0. , 0. ],
       [0.2, 0.6, 0.2],
       [0. , 0.6, 0.4],
       [0. , 1. , 0. ],
       [0.8, 0.2, 0. ],
       [0. , 0.2, 0.8],
       [1. , 0. , 0. ],
       [0. , 0.6, 0.4]])