In [2]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import style

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from pprint import pprint
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score



# other
from pprint import pprint
from joblib import dump, load
import os
import glob

  from numpy.core.umath_tests import inner1d


In [3]:
train = pd.read_csv("fashion-mnist_train.csv")
test = pd.read_csv("fashion-mnist_test.csv")

In [4]:
X = train.drop(columns = 'label')
y = train[['label']]

print(X.shape, y.shape)

(60000, 784) (60000, 1)


In [8]:
X_test = test.drop(columns = 'label')
y_test = test[['label']]

print(X_test.shape, y_test.shape)

(10000, 784) (10000, 1)


In [9]:
# Standardize from [0,255] to [0,1]
X /= 255
X_test /= 255

### AdaBoost

In [20]:
ada = AdaBoostClassifier(learning_rate = 0.5, n_estimators = 500, random_state=None)

ada.fit(X, y.values.ravel())

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=0.5, n_estimators=500, random_state=None)

In [21]:
# save the model to disk

filename = 'ada_mnist.joblib'
dump(ada, filename)

['ada_mnist.joblib']

In [22]:
ada_pred = ada.predict(X_test)
cr_ada = classification_report(y_test, ada_pred)
print(cr_ada)

ada_acc = accuracy_score(y_test, ada_pred)
print("Accuracy Score (Ada):", ada_acc)

             precision    recall  f1-score   support

          0       0.00      0.00      0.00      1000
          1       0.00      0.00      0.00      1000
          2       0.15      1.00      0.26      1000
          3       0.52      0.06      0.11      1000
          4       0.12      0.00      0.00      1000
          5       0.87      0.52      0.65      1000
          6       0.00      0.00      0.00      1000
          7       0.77      0.76      0.77      1000
          8       0.50      0.34      0.40      1000
          9       0.75      0.80      0.77      1000

avg / total       0.37      0.35      0.30     10000

Accuracy Score (Ada): 0.3462


  'precision', 'predicted', average, warn_for)


### Gradient Boosting Classifier

In [6]:
gbc = GradientBoostingClassifier(n_estimators = 1000, learning_rate = 0.5)

gbc.fit(X, y.values.ravel())

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.5, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=1000,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [10]:
gbc_pred = gbc.predict(X_test)
cr_gbc = classification_report(y_test, gbc_pred)
print(cr_gbc)

gbc_acc = accuracy_score(y_test, gbc_pred)
print("Accuracy Score (GBC):", gbc_acc)

             precision    recall  f1-score   support

          0       0.83      0.85      0.84      1000
          1       0.98      0.98      0.98      1000
          2       0.84      0.82      0.83      1000
          3       0.92      0.90      0.91      1000
          4       0.82      0.84      0.83      1000
          5       0.99      0.95      0.97      1000
          6       0.73      0.72      0.72      1000
          7       0.94      0.96      0.95      1000
          8       0.95      0.96      0.96      1000
          9       0.95      0.96      0.96      1000

avg / total       0.90      0.90      0.89     10000

Accuracy Score (GBC): 0.895


In [15]:
import pickle

# save the model to disk
filename = 'gbc_mnist_0607.sav'
pickle.dump(gbc, open(filename, 'wb'))

In [19]:
# save the model to disk

filename = 'gbc_mnist_jl.joblib'
dump(gbc, filename)

['gbc_mnist_jl.joblib']

In [26]:
# save the model to disk

pred_csv = pd.DataFrame(gbc_pred, columns=['predictions']).to_csv('gbc_preds.csv')