In [1]:
# import relevant libraries, datasets of interest, classifiers, and performance metrics
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm, metrics
digits = datasets.load_digits();

Each digit is comprised of an 8x8 pixel image.
We can reshape the image data to make a matrix in which each row represents an image, and the columns are the pixel values. 

In [15]:
nDigits = digits.images.shape[0]
x = digits.images.reshape( nDigits, 64)
x = np.insert( x, 0, 1, axis=1)
y = digits.target.reshape( nDigits, 1)

print 'There are %i total images of digits, each with 64 pixels.' % nDigits
print 'x has dimensions %s' %(x.shape,)
print 'y has dimensions %s' %(y.shape,)

There are 1797 total images of digits, each with 64 pixels.
x has dimensions (1797, 65)
y has dimensions (1797, 1)


We will perform logistic regression on the features contained in the pixel values of the digits.

In [33]:
from scipy.special import expit # the logistic function 

def CostFunction( theta, x, y, l):
    '''
    INPUTS:
    theta = parameters of logistic regression
    x = features of our data
    y = classifiers of our data
    l = "lambda," regularization parameter
    OUTPUTS:
    J = logistic regression cost function
    '''
    m = y.shape[0]
    h = expit(x.dot(theta))
    J =  np.log(h).dot(-y.T) - np.log( 1-h).dot( 1-y.T)
    J += theta.T.dot(theta) * l * 0.5 
    J = J / m;
    return J

def CostGradient( theta, x, y, l):
    '''
    INPUTS:
    theta = parameters of logistic regression
    x = features of our data
    y = classifiers of our data
    l = "lambda," regularization parameter
    OUTPUTS:
    gradJ = gradient of cost function
    '''
    m = y.shape[0];
    h = expit(x.dot(theta));
    gradJ = x.T.dot( expit(x.dot(theta)) - y.T)
    gradJ[1:] += theta[1:]*(1.*l)
    gradJ = gradJ*(1./m)
    return gradJ

In [17]:
# test to see if functions work without any errors
initial_theta = np.zeros((x.shape[1],1))
J = CostFunction( initial_theta, x, y, 0.0)
gradJ = CostGradient( initial_theta, x, y, 0.0)

In [45]:
from scipy.optimize import fmin_cg

def ThetaOneVsAll( Nclass, x, y):
    '''
    Calculate the optimal parameter values of theta for each class
    INPUT:
    Nclass = number of classes
    OUTPUT
    thetaMatrix = Matrix of optimal parameters, each row corresponds to a class
    '''
    thetaMatrix = np.zeros((Nclass, len(x[0])))
    thetai = np.zeros((len(x[0]),1))
    thetai = thetai.reshape(len(thetai),)
    
    ytemp = y.reshape( len(y),)
    # one-vs-all classification
    # perform logistic regression for each class, training each independently
    for i in xrange(Nclass):
        yClass = np.array([ 1 if yi == 1 else 0 for yi in ytemp])
        print 'Optimizing for digit %d '%i
        
        out = fmin_cg( CostFunction, x0=thetai, fprime=CostGradient, \
                    args=(x,yClass,0.0), maxiter=50, disp=False)

        thetaMatrix[i,:] = out[0]

    return thetaMatrix

In [47]:
print x.shape, type(x)
print y.shape, type(y)
thetaMatrix = ThetaOneVsAll( 10, x, y)

(1797, 65) <type 'numpy.ndarray'>
(1797, 1) <type 'numpy.ndarray'>
Optimizing for digit 0 
Optimizing for digit 1 
Optimizing for digit 2 
Optimizing for digit 3 
Optimizing for digit 4 
Optimizing for digit 5 
Optimizing for digit 6 
Optimizing for digit 7 
Optimizing for digit 8 
Optimizing for digit 9 


  from ipykernel import kernelapp as app


In [35]:
# The prediction for the i'th row is found to be the index at which Theta*X[i] is maximized
def PredictOveVsAll( thetaMatrix, x):
    '''
    INPUTS:
    thetaMatrix = Matrix of optimal parameters, each row corresponds to a class
    OUTPUT:
    pred = predicted class for each corresponding to each row in X
    '''
    pred = np.argmax( expit(x.dot(thetaMatrix.T)), axis=1)
    return pred


In [48]:
yPredict = PredictOveVsAll( thetaMatrix, x)

In [50]:
n_correct, n_total = 0., 0.
incorrect_indices = []
for irow in xrange(x.shape[0]):
    n_total += 1
    if yPredict[irow] == y[irow][0]: 
        n_correct += 1
    else: incorrect_indices.append(irow)
print "Training set accuracy: %0.1f%%"%(100*(n_correct/n_total))

Training set accuracy: 9.9%
