In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import VotingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.datasets import fetch_openml
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, roc_curve
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
mnist_df = fetch_openml('mnist_784', version=1)

In [3]:
X = mnist_df['data']
y = mnist_df['target']

In [4]:
X_train = X[:50000]
y_train = y[:50000]
X_val = X[50000:60000]
y_val = y[50000:60000]
X_test = X[60000:70000]
y_test = y[60000:70000]

In [8]:
# SVC
model_svc = SVC()

In [9]:
model_svc.fit(X_train, y_train)

SVC()

In [27]:
y_pred_svc = model_svc.predict(X_train)

In [11]:
acc = accuracy_score(y_val, y_pred_svc)
print(f'SVM accuracy: {acc}')

SVM accuracy: 0.9802


In [12]:
# Random Forest
model_rf = RandomForestClassifier()

In [13]:
model_rf.fit(X_train, y_train)

RandomForestClassifier()

In [28]:
y_pred_rf = model_rf.predict(X_train)

In [15]:
acc = accuracy_score(y_val, y_pred_rf)
print(f'Random Forrest accuracy: {acc}')

Random Forrest accuracy: 0.9718


In [16]:
# Extra Trees
model_et = ExtraTreesClassifier()

In [17]:
model_et.fit(X_train, y_train)

ExtraTreesClassifier()

In [29]:
y_pred_et = model_et.predict(X_train)

In [19]:
acc = accuracy_score(y_val, y_pred_et)
print(f'Extra Trees accuracy: {acc}')

Extra Trees accuracy: 0.9743


In [30]:
X_blend = pd.DataFrame({'y_pred_svc' : y_pred_svc, 'y_pred_rf' : y_pred_rf, 'y_pred_et' : y_pred_et})
X_blend.head()

Unnamed: 0,y_pred_svc,y_pred_rf,y_pred_et
0,5,5,5
1,0,0,0
2,4,4,4
3,1,1,1
4,9,9,9


In [36]:
# SVC Blender
blending_clf = SVC()

In [37]:
blending_clf.fit(X_blend, y_train)

SVC()

In [38]:
y_pred_svc_val = model_svc.predict(X_val)
y_pred_rf_val = model_rf.predict(X_val)
y_pred_et_val = model_et.predict(X_val)

In [39]:
X_blend_val = pd.DataFrame({'y_pred_svc' : y_pred_svc_val, 'y_pred_rf' : y_pred_rf_val, 'y_pred_et' : y_pred_et_val})
X_blend_val.head()

Unnamed: 0,y_pred_svc,y_pred_rf,y_pred_et
0,3,3,3
1,8,8,8
2,6,6,6
3,9,9,9
4,6,6,6


In [40]:
y_pred_blend_val = blending_clf.predict(X_blend_val)

In [41]:
acc = accuracy_score(y_val, y_pred_blend_val)
print(f'Stacking accuracy: {acc}')

Stacking accuracy: 0.9685


In [46]:
y_pred_svc_test = model_svc.predict(X_test)
y_pred_rf_test = model_rf.predict(X_test)
y_pred_et_test = model_et.predict(X_test)


In [47]:
X_blend_test = pd.DataFrame({'y_pred_svc' : y_pred_svc_test, 'y_pred_rf' : y_pred_rf_test, 'y_pred_et' : y_pred_et_test})
X_blend_test.head()

Unnamed: 0,y_pred_svc,y_pred_rf,y_pred_et
0,7,7,7
1,2,2,2
2,1,1,1
3,0,0,0
4,4,4,4


In [48]:
y_pred_blend_test = blending_clf.predict(X_blend_test)

In [49]:
acc = accuracy_score(y_test, y_pred_blend_test)
print(f'Stacking accuracy: {acc}')

Stacking accuracy: 0.9648


In [None]:
acc = accuracy_score(y_val, y_pred)
print(f'Stacking accuracy: {acc}')