In [None]:
%matplotlib inline


Logistic regression of the MNIST digits dataset
=============================================================



Fetch the MNIST digits dataset
------------------------------------
If fail, retry after rebooting the runtime or kernel.

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
print("Downloading the MNIST digits dataset .. ")
X_all, y_all = fetch_openml('mnist_784', version=1, return_X_y=True)
print("done")
Ximages = np.reshape(X_all, (X_all.shape[0],28,28))

print(X_all.shape)
X_all = X_all / X_all.max()
y_all = np.int64(y_all)

Plot the data: images of digits
-------------------------------


In [None]:
from matplotlib import pyplot as plt
fig = plt.figure(figsize=(6, 6))  # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
print("64 out of %d images" % len(y_all))

p = np.random.randint(0, len(y_all), 64)
for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(Ximages[p[i]], cmap=plt.cm.gray)
    # label the image with the target value
    ax.text(0, 7, str(y_all[p[i]]), color='white')

Choose two classes if you enjoy binary classification
-----------------------------------------------------------------
Skip this cell for ten classes.

In [None]:
c = 2
pos = 1 # choose from 0 to 9
neg = 0 # choose from 0 to 9

X = X_all[np.logical_or(y_all == pos, y_all == neg),:]
y = y_all[np.logical_or(y_all == pos, y_all == neg)]
yp, yn = y == pos, y== neg
y[yp] = 1
y[yn] = 0
lbl = [neg, pos]

Choose all ten classes
-----------------------------------------------------------------
Skip this cell for binary classification.

In [None]:
c = 10
lbl = range(c)
X = X_all
y = y_all

Split the data into training and test sets
--------------------------------------------------
https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html

In [None]:
from sklearn.model_selection import train_test_split

# split the data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
if len(y_test) > 2000:
    X_test = X_test[:2000,:]
    y_test = y_test[:2000]

print("(#training data, dim.)=", X_train.shape)
print("(#test data,)=", X_test.shape)

Run the training (Caution! This will take a few minutes)
---------------------------------
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [None]:
# train the model
from sklearn.linear_model import LogisticRegression
#clf = LogisticRegression(solver= 'lbfgs', multi_class='multinomial')
clf =  LogisticRegression(C=100. / len(y_train), multi_class='multinomial', penalty='l2', solver='saga', tol=0.1)
clf.fit(X_train, y_train)

In [None]:
# use the model to predict the labels of the test data
predicted = clf.predict(X_test)
expected = y_test

# Plot the prediction
fig = plt.figure(figsize=(6, 6))  # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits
idx64 = np.random.choice(len(y_test), 64, replace=False)
for j in range(64):
    i = idx64[j]
    ax = fig.add_subplot(8, 8, j + 1, xticks=[], yticks=[])
    ax.imshow(X_test.reshape(-1, Ximages.shape[1], Ximages.shape[2])[i], cmap=plt.cm.gray)

    # label the image with the target value
    ax.text(0, 7, str(lbl[expected[i]]), color='white')
    if predicted[i] == expected[i]:
        ax.text(21, 7, str(lbl[predicted[i]]), color='#a0ffa0')
    else:
        ax.text(21, 7, str(lbl[predicted[i]]), color='red')

# the number of correct matches / the total number of data points
matches = (predicted == expected)
score = matches.sum()/float(len(matches))
print("%d / %d = %2.1f %%" % (matches.sum(), len(matches), 100*score))

Visualize the weights and classification
-------------------------------------------------
Retry this cell to see another result on a randomly chosen test image. 

In [None]:
w = np.c_[clf.intercept_, clf.coef_]
print(w.shape)

# Plot the weights
fig = plt.figure(figsize=(12, 3))  # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
cmap = plt.cm.seismic

jtest = np.random.randint(len(y_test),size=1) # draw one test sample
vmin = w[:,1:].min()
vmax = -vmin
xmax=max(X_test[jtest,:].ravel())

if c == 2:
    c = 1
for k in range(c):
  
    ax = fig.add_subplot(3, c, k + 1, xticks=[], yticks=[])
    ax.imshow(w[k,1:].reshape(Ximages.shape[1], Ximages.shape[2]),  vmin=vmin,vmax=vmax, cmap=cmap)
    if c == 1:
        ax.text(0, 4, str(lbl[c]) + ' ?', color='k')
    else:
        ax.text(0, 4, str(lbl[k]) + ' ?', color='k')

    ax = fig.add_subplot(3, c, k + c + 1, xticks=[], yticks=[])
    ax.imshow(X_test[jtest,:].reshape(Ximages.shape[1], Ximages.shape[2]), cmap=plt.cm.gray)
    ax.text(0, 7, str(lbl[expected[jtest][0]]), color='white')

    wXk = X_test[jtest,:].ravel()*w[k,1:]
    ax = fig.add_subplot(3, c, k + 2*c + 1, xticks=[], yticks=[])
    ax.imshow(wXk.reshape(Ximages.shape[1], Ximages.shape[2]), vmin=vmin*xmax, vmax=vmax*xmax, cmap=cmap)
    g = wXk.sum()+w[k,0]
    if g >= 0:
        color='r'
    else:
        color='b'
    ax.text(0, 4, str(lbl[predicted[jtest][0]]), color=color)
    ax.text(0, 33, '$g=$'+'{:.1f}'.format(g), fontsize=max(6+2*g,10), color=color)

Quantify the performance detail
------------------------



Print the classification report



In [None]:
from sklearn import metrics
print(metrics.classification_report(expected, predicted))

Print the confusion matrix



In [None]:
import seaborn as sns

# Make predictions on test data
cm = metrics.confusion_matrix(expected, predicted)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(9,9))
sns.heatmap(cm_normalized, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {:.3f}'.format(score) 
plt.title(all_sample_title, size = 15);