In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import VotingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.datasets import fetch_openml
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [21]:
mnist_df = fetch_openml('mnist_784', version=1)

In [23]:
X = mnist_df['data']
y = mnist_df['target']

In [25]:
X.shape

(70000, 784)

In [29]:
X_train = X[:50000]
y_train = y[:50000]
X_val = X[50000:60000]
y_val = y[50000:60000]
X_test = X[60000:70000]
y_test = y[60000:70000]

In [30]:
X_test.shape

(10000, 784)

In [31]:
# SVC
model_svc = SVC()

In [32]:
model_svc.fit(X_train, y_train)

SVC()

In [33]:
y_pred = model_svc.predict(X_val)

In [45]:
acc = accuracy_score(y_val, y_pred)
print(f'SVM accuracy: {acc}')

SVM accuracy: 0.975


In [36]:
# Random Forest
model_rf = RandomForestClassifier()

In [37]:
model_rf.fit(X_train, y_train)

RandomForestClassifier()

In [38]:
y_pred = model_rf.predict(X_val)

In [40]:
acc = accuracy_score(y_val, y_pred)
print(f'Random Forrest accuracy: {acc}')

Random Forrest accuracy: 0.9721


In [41]:
# Extra Trees
model_et = ExtraTreesClassifier()

In [42]:
model_et.fit(X_train, y_train)

ExtraTreesClassifier()

In [43]:
y_pred = model_et.predict(X_val)

In [44]:
acc = accuracy_score(y_val, y_pred)
print(f'Extra Trees accuracy: {acc}')

Extra Trees accuracy: 0.975


In [47]:
# Hard Voting 
voting_clf = VotingClassifier(estimators=[('SVM', model_svc), ('rf', model_rf), ('et', model_et)], voting='hard')

In [48]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('SVM', SVC()), ('rf', RandomForestClassifier()),
                             ('et', ExtraTreesClassifier())])

In [49]:
y_pred = voting_clf.predict(X_val)

In [50]:
acc = accuracy_score(y_val, y_pred)
print(f'Hard voting validation accuracy: {acc}')

Hard voting validation accuracy: 0.9767


In [51]:
y_pred = voting_clf.predict(X_test)

In [52]:
acc = accuracy_score(y_test, y_pred)
print(f'Hard voting test accuracy: {acc}')

Hard voting test accuracy: 0.973


In [62]:
# Soft voting
voting_clf = VotingClassifier(estimators=[('SVM', SVC(probability=True)), ('rf', RandomForestClassifier()), 
                                          ('et', ExtraTreesClassifier())], voting='soft')

In [63]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('SVM', SVC(probability=True)),
                             ('rf', RandomForestClassifier()),
                             ('et', ExtraTreesClassifier())],
                 voting='soft')

In [67]:
y_pred = voting_clf.predict(X_val)

In [68]:
acc = accuracy_score(y_val, y_pred)
print(f'Soft voting validation accuracy: {acc}')

Soft voting validation accuracy: 0.981


In [69]:
y_pred = voting_clf.predict(X_test)

In [71]:
acc = accuracy_score(y_test, y_pred)
print(f'Soft voting test accuracy: {acc}')

Soft voting test accuracy: 0.9784
