In [31]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

In [2]:
warnings.filterwarnings("ignore")

## Voting Classifier

In [3]:
mnist = datasets.fetch_openml("mnist_784")

In [9]:
X = mnist.data.values
y = mnist.target.values

X_train = X[:50_000]
y_train = y[:50_000]
X_valid = X[50_000:60_000]
y_valid = y[50_000:60_000]
X_test = X[-10_000:]
y_test = y[-10_000:]

In [32]:
svc = LinearSVC(max_iter=100, tol=20, dual=True, random_state=42)
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
etc = ExtraTreesClassifier(n_estimators=100, random_state=42)
mlp = MLPClassifier(random_state=42)

In [33]:
estimators = [svc, rfc, etc, mlp]

for estimator in estimators:
    print(f"Training the {estimator}")
    estimator.fit(X_train, y_train)

Training the LinearSVC(dual=True, max_iter=100, random_state=42, tol=20)
Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the MLPClassifier(random_state=42)


In [34]:
[estimator.score(X_valid, y_valid) for estimator in estimators]

[0.8662, 0.9736, 0.9743, 0.9648]

In [35]:
named__estimators = [
    ("svc", svc),
    ("rfc", rfc),
    ("etc", etc),
    ("mlp", mlp),
]

voting_clf = VotingClassifier(named__estimators, voting="hard", n_jobs=-1)
voting_clf.fit(X_train, y_train)

In [36]:
voting_clf.score(X_valid, y_valid)

0.9758

In [37]:
encoder = LabelEncoder()
y_valid_encoded = encoder.fit_transform(y_valid)

In [38]:
[estimator.score(X_valid, y_valid_encoded) for estimator in voting_clf.estimators_]

[0.8662, 0.9736, 0.9743, 0.9648]

In [39]:
voting_clf.set_params(svc="drop")

In [40]:
print(f"voting_clf.estimators: {voting_clf.estimators}")
print(f"voting_clf.estimators_: {voting_clf.estimators_}")
print(f"voting_clf.named_estimators: {voting_clf.named_estimators}")
print(f"voting_clf.named_estimators_: {voting_clf.named_estimators_}")

voting_clf.estimators: [('svc', 'drop'), ('rfc', RandomForestClassifier(random_state=42)), ('etc', ExtraTreesClassifier(random_state=42)), ('mlp', MLPClassifier(random_state=42))]
voting_clf.estimators_: [LinearSVC(dual=True, max_iter=100, random_state=42, tol=20), RandomForestClassifier(random_state=42), ExtraTreesClassifier(random_state=42), MLPClassifier(random_state=42)]
voting_clf.named_estimators: {'svc': 'drop', 'rfc': RandomForestClassifier(random_state=42), 'etc': ExtraTreesClassifier(random_state=42), 'mlp': MLPClassifier(random_state=42)}
voting_clf.named_estimators_: {'svc': LinearSVC(dual=True, max_iter=100, random_state=42, tol=20), 'rfc': RandomForestClassifier(random_state=42), 'etc': ExtraTreesClassifier(random_state=42), 'mlp': MLPClassifier(random_state=42)}


In [41]:
svc_trained = voting_clf.named_estimators_.pop("svc")
voting_clf.estimators_.remove(svc_trained)

In [45]:
voting_clf.voting = "hard"
voting_clf.score(X_valid, y_valid)

0.9765

In [46]:
voting_clf.voting = "soft"
voting_clf.score(X_valid, y_valid)

0.9703

In [47]:
voting_clf.voting = "hard"
voting_clf.score(X_test, y_test)

0.9721

In [48]:
encoder = LabelEncoder()
y_test_encoded = encoder.fit_transform(y_test)

In [49]:
[estimator.score(X_test, y_test_encoded) for estimator in voting_clf.estimators_]

[0.968, 0.9703, 0.9617]

## Stacking Ensemble

In [50]:
X_valid_predictions = np.empty(shape=(len(X_valid), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    X_valid_predictions[:, index] = estimator.predict(X_valid)

In [51]:
X_valid_predictions

array([['3', '3', '3', '3'],
       ['8', '8', '8', '8'],
       ['6', '6', '6', '6'],
       ...,
       ['5', '5', '5', '5'],
       ['6', '6', '6', '6'],
       ['8', '8', '8', '8']], dtype=object)

In [53]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_valid_predictions, y_valid)

In [55]:
rnd_forest_blender.oob_score_

0.9723

In [56]:
X_test_predictions = np.empty(shape=(len(X_test), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [57]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [58]:
accuracy_score(y_test, y_pred)

0.9693