## Load MNIST on Python 3.x

In [1]:
import pickle
import gzip

In [2]:
filename = 'mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()

## Load USPS on Python 3.x

In [3]:
from PIL import Image
import os
import numpy as np

In [4]:
USPSMat  = []
USPSTar  = []
curPath  = 'USPSdata/Numerals'
savedImg = []

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

In [23]:
#Reduced size matrix of MNIST Data to be used in fututre
trD = training_data[0][0:50000]
trT = training_data[1][0:50000]
testD = test_data[0][0:100000]
testT = test_data[1][0:10000]
valD = validation_data[0][0:10000]
valT = validation_data[1][0:10000]

# Neural Networks

In [7]:
#Refer to project 3 support pdf where the helper code is given 
import keras
from keras.models import Sequential
from keras.layers import Dense
num_classes=10
image_vector_size=28*28
image_size = 784


x_train = trD.reshape(trD.shape[0], image_vector_size)
y_train = keras.utils.to_categorical(trT, num_classes)

x_test = testD.reshape(testD.shape[0], image_vector_size)
y_test = keras.utils.to_categorical(testT, num_classes)



Using TensorFlow backend.


In [8]:
model = Sequential()
model.add(Dense(units=32, activation='sigmoid', input_shape=(image_size,)))
model.add(Dense(units=num_classes, activation='softmax'))
model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=128, epochs=100,verbose=False,validation_split=.1)
loss,accuracy = model.evaluate(x_test, y_test, verbose=False)
print('Accuracy in neural network ',accuracy*100,'%')
history

Accuracy in neural network  86.3 %


<keras.callbacks.History at 0x19b5b5c5dd8>

# SVM Method (Support Vector Machine)

In [13]:
#Not necessary for validation data but enough data is given to read so just showing possible accuacy
from sklearn.svm import SVC

#svm model with linear kernel
svmModel = SVC(kernel = 'linear')
svmModel.fit(trD,trT)
x = svmModel.score(testD,testT)
y = svmModel.score(valD,valT)
print('Accuracy in Validation Data ', y*100,'%')
print('Accuracy in Testing Data ', x*100,'%')

Accuracy in Validation Data  91.8 %
Accuracy in Testing Data  90.9 %


In [14]:
1+1

2

# Random Forest Method

In [15]:
from sklearn.ensemble import RandomForestClassifier
n_train = 5000
n_test = 1000
#RandomForestClassifier
rfcModel = RandomForestClassifier(n_estimators=100);
rfcModel.fit(trD, trT) 
testedData = rfcModel.predict(testD)

count = 0
total = len(testT)
for i in range(0,total):
    if testT[i] == testedData[i]:
        count +=1
print('Accuracy in RFC Testing data ',count*100/total,'%')
print('Accuracy in RFC Validation data',rfcModel.score(testD,testT)*100,'%')


Accuracy in RFC Testing data  93.7 %
Accuracy in RFC Validation data 93.7 %


In [16]:
uspsMat = np.array(USPSMat)
uspsTar = np.array(USPSTar)
print('Accuracy in SVM for USPS data ',svmModel.score(uspsMat,uspsTar)*100,'%')
print('Accuracy in RFC for USPS data ',rfcModel.score(uspsMat,uspsTar)*100,'%')


Accuracy in SVM for USPS data  33.18165908295415 %
Accuracy in RFC for USPS data  36.14680734036702 %


In [17]:
x_test2 = uspsMat.reshape(uspsMat.shape[0], image_vector_size)
y_test2 = keras.utils.to_categorical(uspsTar, num_classes)
print('Accuracy in Neural Network for USPS data ',model.evaluate(x_test2, y_test2, verbose=False)[1]*100,'%')


Accuracy in Neural Network for USPS data  33.316665833291665 %


# Logistic Regression

In [18]:
from sklearn.linear_model import LogisticRegression
Lr = LogisticRegression()
Lr.fit(trD,trT)
print('Accuracy test data ',Lr.score(testD,testT)*100,'%')
print('Accuracy validation data ',Lr.score(valD,valT)*100,'%')



Accuracy test data  89.0 %
Accuracy validation data  89.7 %


In [19]:
print('Accuracy in SVM for USPS data ',Lr.score(uspsMat,uspsTar)*100,'%')

Accuracy in SVM for USPS data  32.8016400820041 %


# Logistic Regression from Scratch

# Ensembling Classifiers (Voting Method)

In [20]:
def getFrequent(list):
    arr = np.array(list)
    j = np.bincount(arr)
    return np.argmax(j)

def majorVoting(testD):
    newPrediction = []
    nn = model.predict(testD)
    sv = svmModel.predict(testD)
    rf = rfcModel.predict(testD)
    lr = Lr.predict(testD)
    for i in range(0,len(sv)):
        predList = []
        predList.append(np.argmax(nn[i]))
        predList.append(sv[i])
        predList.append(rf[i])
        predList.append(lr[i])
        newPrediction.append(getFrequent(predList))
    return newPrediction
    

In [21]:
def getComb(data, targ):

    majorPredict = np.array(majorVoting(data))
    count = 0
    
    for i in range(0,len(targ)):
        if majorPredict[i] == targ[i]:
            count+=1
    string = (' Combine Accuracy : ' , count/len(targ)*100 , '%')
    return string

In [22]:
print('Testing data',getComb(testD,testT))
print('Validation data',getComb(valD,valT))
print('USPS data',getComb(uspsMat,uspsTar))

Testing data (' Combine Accuracy : ', 91.8, '%')
Validation data (' Combine Accuracy : ', 92.10000000000001, '%')
USPS data (' Combine Accuracy : ', 36.35681784089205, '%')
