In [2]:
# defining the neural network class
import numpy
import matplotlib.pyplot as plt

class neuralNetwork:
    # function for initializing neural network
    # The "self" is used to represent the instance of the class. 
    # With this keyword, you can access the attributes and methods of the class in python.
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each nodes
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        self.lr = learningrate

        # Linking the weight matrices, wih, who
        # wih - weight linked with input and hidden layer
        # who = wieight linked with output and hidden layer
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21 like input to hidden 1, input 2 to hidden 1.
        # w12 w22 etc visualized in paper. 
        self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5),(self.hnodes, self.inodes))
        self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5),(self.onodes, self.hnodes))
    
        # updated code to initialize the weights
        #self.wih = (numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes)))        
        #self.who = (numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes)))        

        # Using the sigmoid function as the activation function
        import scipy.special
        self.activation_function = lambda x: scipy.special.expit(x)
        # lambda is a special way of declaring functions which is anonymous(nameless) - 
        # it takes x as the input and returns the sigmoid function output using expit()

        pass
        #The pass statement is used as a placeholder for future code. 
        # When the pass statement is executed, nothing happens, 
        # but you avoid getting an error when empty code is not allowed. 
        # Empty code is not allowed in loops, function definitions, class definitions, or in if statements.
        

    # function for training the network
    def train(self, input_list, target_list):
        # feeding forward the signal from the input to the final layer
        # convert the input and target list into a 2D matrix and transpose
        inputs = numpy.array(input_list, ndmin=2).T

        # target list contains the answer to the training data in a list for training the network
        targets = numpy.array(target_list, ndmin=2).T

        #calculate signals emerging in the hidden layer from the input
        hidden_inputs = numpy.dot(self.wih, inputs)

        #calculate signals generated by the hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)

        #calculate signals generated into the final layer
        final_inputs = numpy.dot(self.who, hidden_outputs)

        #calculate signals emerging from the final layer (activation sheets by firing neurons)
        final_outputs = self.activation_function(final_inputs)

        #calculating the error between the target and actual for the neural network
        output_errors = targets - final_outputs
        
        # updating the weights according to the error for optimization and weight tuning
        # the output errors found above is the hidden layer errors.
        # the output/hidden layer errors need to be split by weight
        # then the split weight should be recombined with the hidden nodes to adjust/tune the network
        hidden_errors = numpy.dot(self.who.T, output_errors)

        # update the weights for the links between the hidden and output layer nodes
        self.who += self.lr  * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
    

        # update the weights for the links between the input and the hidden  layer nodes
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))

    # function for implementing queries for the neural network
    # takes input to a network and returns the output of the network
    def query(self, input_list):
        # following will use numpy to convert the inputs into 2D array and then transpose with .T
        inputs = numpy.array(input_list, ndmin=2).T

        # calculate the signals emerging in the hidden layer from the inputs
        hidden_inputs = numpy.dot(self.wih, inputs)

        # calculate signals emerging from the hidden layer neurons to the next layer
        hidden_outputs = self.activation_function(hidden_inputs)

        # calculate the signals into the final output layer - will act as inputs to the final layer
        final_inputs = numpy.dot(self.who, hidden_outputs)

        #calculate the signals emerging from the final output layer
        final_outputs = self.activation_function(final_inputs)

        return final_outputs



In [3]:
# enter some small node and learning rate values for testing the network classes

input_nodes = 784
#input dataset was 28*28 pixels =784 nodes that make up the handwritten image dataset
output_nodes = 10
# by changing the output nodes the the number of predicted output class will also change.

hidden_nodes = 100
# hidden nodes selected less than input nodes because the eural network should select features which can be shorter than the input dataset
# chossing smaller value helps the network to summarize the key features of the dataset
# output nodes are 10
# so selecting 100 seems enough for the network to express the features. 
# selecting smaller values may lead the network to not find enough explanatory features
learning_rate = 0.3

# creating instance of the neural network class
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

In [4]:
#n.query([1.0, 0.5, -1.5])

In [5]:
# use numpy to generate matrices of weight values.
# at first assign very low weight values, for instance between 0 and 1. 

import numpy

numpy.random.rand(3, 3) 
# 3*3 matirix with random numbers betwen 0 and 1 for weight values. 
# 3 nodes assigned previously so 3*3 matrix used.


array([[0.15650766, 0.63711432, 0.1556899 ],
       [0.45053378, 0.39166451, 0.56139994],
       [0.91783136, 0.66608566, 0.77482734]])

In [6]:
# at first open the dataset from the local path in read only mode
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')

# use redlines() to read each record at a time as a list.append
training_data_list = training_data_file.readlines()

# close the dataset to avoid conflict elsewhere
training_data_file.close()

In [7]:
len(training_data_list)

60000

In [8]:
training_data_list[7]

'3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38,43,105,255,253,253,253,253,253,174,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43,139,224,226,252,253,252,252,252,252,252,252,158,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,178,252,252,252,252,253,252,252,252,252,252,252,252,59,0,0,0,0,0,0,0,0,0,0,0,0,0,0,109,252,252,230,132,133,132,132,189,252,252,252,252,59,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,29,29,24,0,0,0,0,14,226,252,252,172,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,243,252,252,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,88,189,252,252,252,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,91,212,247,252,252,252,204,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,125,193,193,193,253,252,252,252,238,102,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,222,252,252,252,252,253,252,25

In [9]:
import numpy
import matplotlib.pyplot


# Split the values of the first list based on commas
#all_values = training_data_list[0].split(',')

# remove the first label value and convert the remaining color pixel values into a numeric matrix
#image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))

# now show only the image array
#matplotlib.pyplot.imshow(image_array, cmap="Greys", interpolation='None')

In [10]:
# scale the input range from 0-255 to 0.01-1.00 using formula
# re scaled and shaped the smaller mnist dataset

#scaled_input = (numpy.asfarray(all_values[1:]) / 255 * 0.99) + 0.01
#print(scaled_input)

In [11]:
# the network fires neuron at the label which has the greates prediction accuracy. 
# for example if the output is 0, then the oth neuron will fire and remaining will stay silent. 
# there can also be uncertainities if 2 handwrien digits seem similar like 4 and 9

# setting 10 output nodes for 10 digits
#onodes = 10

#setting all the nodes value to 0.01
#targets = numpy.zeros(onodes) + 0.01

# the first digit is zero, so setting the first digit to fire first neuron with highest value
#targets[int(all_values[0])] = 0.99

In [12]:
#print(targets)

In [13]:
# training the network with the whole dataset

for record in training_data_list:
    #split based on comma
    all_values = record.split(',')

    # scale and shift the inputs between 0.01 to 0.99
    inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

    # create targets of output values (all neurons will fire 0.01 except the one hat is selected)
    # converting all the output nodes to 0,01 
    targets = numpy.zeros(output_nodes) + 0.01

    # converting the ones with the higest similarity to 0.99
    targets[int(all_values[0])] = 0.99

    #training the network with inputs from the dataset and the target containing the out nodes
    n.train(inputs, targets)
    pass
    

In [14]:
# testing phase for the neural network
# first load the mnist test dataset
test_data_file = open('mnist_dataset/mnist_test.csv', 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()

In [15]:
# get the first data file
#all_values = test_data_list[0].split(',')
#print(all_values[0])
# the oth index of the train dataset contains 7
len(test_data_list)

10000

In [16]:
#import numpy
#import matplotlib.pyplot
#image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
#matplotlib.pyplot.imshow(image_array, cmap = "Greys")

# visualizing the oth index handwritten image from the

In [17]:
# n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)

# testing if the neuron fires greatest value for the 7th output node to check the network capability

In [18]:
# testing the neural network with the whole dataset
#plt.axis([0, 10, 0, 10])
# first creating a list of scorecard to check its scores later on
scorecard = []

# going through the enire test dataset from the opened mnist test small above
for record in test_data_list:
    # spliting depending on commas
    all_values = record.split(',')

    # correct answer is the first value which lies in the first column of the dataset
    correct_label = int(all_values[0])
    #print(correct_label, "correct label")

    # scaling the other image pixel inputs between 0.01 to 0.99
    inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

    # query the network
    # it will assign and distribute the weights and prepare the activation map from the network
    outputs = n.query(inputs)

    # the index of the highest value corresponds to the label
    # the network will show the max value on the neuron that has the highest match with the existing data point
    # numpy argmax() finds the largest value in an array
    label = numpy.argmax(outputs)
    # it will also print the output provided by the network
    # print(label, "network's answer")
    # append correct or incorrect to list
    # the correct labels are available in the correct_label column
    # the network predicted labels are in the label column
    # the following condition will match the labels to determine if the network predicted the correct label
    if (label == correct_label):
        # add 1 if the network gives correct answer
        scorecard.append(1)
    else:
        # add 0 if incorrect answer from the network
        scorecard.append(0)
        # from this a list of values will be obtained that will show the number of correct and incorrect predictions made by our network
        # it can be used to determine the network accuracy
        pass
    pass

In [19]:
# print(scorecard)

In [20]:
# Lets now print the performance accuracy percentage
scorecard_array = numpy.asarray(scorecard)
# converted the scorecard to array to individually count the number of elements in the array and operate on each elements
print("Network performance =", (scorecard_array.sum() / scorecard_array.size) * 100, "%")

Network performance = 94.83 %


In [22]:
for lr in [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.99]:
    n_test_lr = neuralNetwork(input_nodes, hidden_nodes, output_nodes, lr)
    print("For learning rate=", lr)
    for record in training_data_list:
        all_values = record.split(',')

        inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

        targets = numpy.zeros(output_nodes) + 0.01

        targets[int(all_values[0])] = 0.99

        n_test_lr.train(inputs, targets)
        pass

    scorecard = []

    for record in test_data_list:

        all_values = record.split(',')

        correct_label = int(all_values[0])
   
        inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

        outputs = n_test_lr.query(inputs)

        label = numpy.argmax(outputs)

        if (label == correct_label):
            scorecard.append(1)
        else:
            scorecard.append(0)

        scorecard_array = numpy.asarray(scorecard)
# converted the scorecard to array to individually count the number of elements in the array and operate on each elements
    print("Network performance =", (scorecard_array.sum() / scorecard_array.size) * 100, "%")
    pass


For learning rate= 0.01
Network performance = 91.97 %
For learning rate= 0.02
Network performance = 93.38 %
For learning rate= 0.03
Network performance = 93.77 %
For learning rate= 0.04
Network performance = 94.42 %
For learning rate= 0.05
Network performance = 94.75 %
For learning rate= 0.06
Network performance = 94.67 %
For learning rate= 0.11
Network performance = 95.41 %
For learning rate= 0.21
Network performance = 94.83 %
For learning rate= 0.31
Network performance = 93.95 %
For learning rate= 0.41
Network performance = 92.23 %
For learning rate= 0.51
Network performance = 91.38 %
For learning rate= 0.61
Network performance = 90.14 %
For learning rate= 0.71
Network performance = 85.39999999999999 %
For learning rate= 0.81
Network performance = 85.0 %
For learning rate= 0.91
Network performance = 84.85000000000001 %
For learning rate= 0.99
Network performance = 84.53 %
