#Data input and preprocessing

In [0]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsOneClassifier

In [41]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml("mnist_784", version = 1)
mnist.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'details', 'categories', 'url'])

In [0]:
X, y = mnist['data'], mnist['target']

In [43]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)
X_train.shape


(63000, 784)

#Fine tune the RBF model.
Note: due to the size of our data, the following tuning was run on 3% of the whole data (test_size=0.97).

In [0]:

tuned_parameters_rbf = {'ovr_svm_clf_rbf__estimator__gamma': [0.0007, 0.001,0.0013],
                     'ovr_svm_clf_rbf__estimator__C': [1, 50, 3000]}
svm_clf_rbf = SVC(kernel='rbf', random_state=42)
rbf_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("ovr_svm_clf_rbf", OneVsRestClassifier(svm_clf_rbf))
])
clf = GridSearchCV(rbf_kernel_svm_clf, tuned_parameters_rbf, cv=5,
                       scoring="accuracy")
clf.fit(X_train, y_train)
clf.best_params_

{'ovr_svm_clf_rbf__estimator__C': 50,
 'ovr_svm_clf_rbf__estimator__gamma': 0.001}

In [0]:
cvres = clf.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(mean_score, params)

0.8814285714285715 {'ovr_svm_clf_rbf__estimator__C': 1, 'ovr_svm_clf_rbf__estimator__gamma': 0.0007}
0.89 {'ovr_svm_clf_rbf__estimator__C': 1, 'ovr_svm_clf_rbf__estimator__gamma': 0.001}
0.8961904761904762 {'ovr_svm_clf_rbf__estimator__C': 1, 'ovr_svm_clf_rbf__estimator__gamma': 0.0013}
0.9123809523809524 {'ovr_svm_clf_rbf__estimator__C': 50, 'ovr_svm_clf_rbf__estimator__gamma': 0.0007}
0.9157142857142857 {'ovr_svm_clf_rbf__estimator__C': 50, 'ovr_svm_clf_rbf__estimator__gamma': 0.001}
0.9152380952380952 {'ovr_svm_clf_rbf__estimator__C': 50, 'ovr_svm_clf_rbf__estimator__gamma': 0.0013}
0.9123809523809524 {'ovr_svm_clf_rbf__estimator__C': 3000, 'ovr_svm_clf_rbf__estimator__gamma': 0.0007}
0.9157142857142857 {'ovr_svm_clf_rbf__estimator__C': 3000, 'ovr_svm_clf_rbf__estimator__gamma': 0.001}
0.9152380952380952 {'ovr_svm_clf_rbf__estimator__C': 3000, 'ovr_svm_clf_rbf__estimator__gamma': 0.0013}


#Train RBF model using best parameter
Note: now we use 90% of our data to train RBF model (test_size=0.1).

In [0]:
rbf_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("ovr_svm_clf", OneVsRestClassifier(SVC(kernel='rbf', C=50, gamma = 0.001, random_state = 42)))
])
rbf_kernel_svm_clf.fit(X_train, y_train)


Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('ovr_svm_clf',
                 OneVsRestClassifier(estimator=SVC(C=50, cache_size=200,
                                                   class_weight=None, coef0=0.0,
                                                   decision_function_shape='ovr',
                                                   degree=3, gamma=0.001,
                                                   kernel='rbf', max_iter=-1,
                                                   probability=False,
                                                   random_state=42,
                                                   shrinking=True, tol=0.001,
                                                   verbose=False),
                                     n_jobs=None))],
         verbose=False)

In [0]:
y_pred_rbf = rbf_kernel_svm_clf.predict(X_test)
accuracy_score(y_test, y_pred_rbf)

0.9754285714285714

#Fine tune the poly model. 
Note: due to the size of our data, the following tuning was run on 3% of the whole data (test_size=0.97).

In [36]:
tuned_parameters_poly = {'ovr_svm_clf_poly__estimator__degree': [3, 5],
                        'ovr_svm_clf_poly__estimator__C': [50, 10],
                        'ovr_svm_clf_poly__estimator__gamma': [0.005, 0.006, 0.006125]}
svm_clf_poly = SVC(kernel='poly', random_state=42, coef0=1)
poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("ovr_svm_clf_poly", OneVsRestClassifier(svm_clf_poly))
])
poly_clf = GridSearchCV(poly_kernel_svm_clf, tuned_parameters_poly, cv=3,
                       scoring="accuracy")
poly_clf.fit(X_train, y_train)
poly_clf.best_params_

{'ovr_svm_clf_poly__estimator__C': 50,
 'ovr_svm_clf_poly__estimator__degree': 3,
 'ovr_svm_clf_poly__estimator__gamma': 0.006}

In [37]:
cvres = poly_clf.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(mean_score, params)

0.929047619047619 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 3, 'ovr_svm_clf_poly__estimator__gamma': 0.005}
0.9295238095238095 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 3, 'ovr_svm_clf_poly__estimator__gamma': 0.006}
0.9295238095238095 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 3, 'ovr_svm_clf_poly__estimator__gamma': 0.006125}
0.9176190476190477 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 5, 'ovr_svm_clf_poly__estimator__gamma': 0.005}
0.9176190476190477 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 5, 'ovr_svm_clf_poly__estimator__gamma': 0.006}
0.9176190476190477 {'ovr_svm_clf_poly__estimator__C': 50, 'ovr_svm_clf_poly__estimator__degree': 5, 'ovr_svm_clf_poly__estimator__gamma': 0.006125}
0.929047619047619 {'ovr_svm_clf_poly__estimator__C': 10, 'ovr_svm_clf_poly__estimator__degree': 3, 'ovr_svm_clf_poly__estimat

#Train Poly model using best parameter
Note: now we use 90% of our data to train Poly model (test_size=0.1).

In [39]:

poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("ovr_svm_clf", OneVsRestClassifier(SVC(kernel='poly',degree=3, coef0=1, C=50, random_state=42, gamma=0.006)))
])
poly_kernel_svm_clf.fit(X_train, y_train)
y_pred = poly_kernel_svm_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9821428571428571

# Conclusion
Based on my tuning by GridSearchCV, we found each best parameters for Polynomial Model and RBF Model. 

For Polynomial Model, the best parameter is degree=3, C=50, and gamma=0.006. The accuracy for this parameter is 98.21%. 

For RBF Model, the best parameter is C=50 and gamma=0.001. The accuracy for this parameter is 97.54%. 

In conclusion, the best model is polynomial model with degree=3, C=50, and gamma=0.006, which has the highest accuracy score 98.21%.