In [1]:
#all import statements
import numpy as np
from PIL import Image

In [2]:
#data transformation on saved input /output matrices
X_train = np.load("X_basic.npy")
X_train /= 255.
Y_train = np.load("Y_basic.npy")
m = X_train.shape[1] #number of training examples

# X_test = None
# Y_test = None

#shape of X should be like (values in vector of single example , number of examples)

In [3]:
#
n_layers = 4
layers = [1024,512 , 512 , 256 , 256 , 100] #List containing number of  neurons in each layer
assert(layers[0] == X_train.shape[0])  #first layer = input layer
assert(layers[-1] == Y_train.shape[0]) #number of outputs should be equal to number of classes

In [4]:
#softmax activation will be used to get number in last layer
#We are using ReLu activation elsewhere
def softmax(z):
    z = np.exp(z)
    z = z/np.sum(z , axis = 0 , keepdims = True , dtype=np.float32) #sum along a column
    return z

def relu(z):
    temp = z > 0
    z = z * temp
    return z

In [5]:
#derivative of softmax and relu activation
#wrong wrong wrong
# def d_softmax(A):
#     retval = np.zeros(A.shape)
#     for i in range(A.shape[0]):
#         for j in range(A.shape[0]):
#             if i != j:
#                 retval[i] += -A[i]*A[j]
#             else:
#                 retval[i] += A[i] - A[i]**2
#     return retval

def d_relu(A):
    return A >= 0

In [6]:
#defining network variables
a = {}  #activations
w = {}  #Weights
b = {}  #bias
# del_a = None   #d(error)/d(activation)
# del_w = None   #d(error)/d(weight)
# del_b = None   #d(error)/d(bias)
a[0] = X_train #input layer

In [7]:
def initialize_nn(layers):
    for i in range(1 , len(layers)):
        w[i] = np.random.randn(layers[i] , layers[i - 1])/100. 
        b[i] = np.zeros((layers[i] , 1))
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i]) #dummy

In [8]:
#trains neural network for single epoch
#It implements Batch Gradient Descent instead of Stochastic Gradient Descent
def train_one_epoch(alpha):
    #Forward propagation:
    
    #using relu for all layers except last
    for i  in range(1 , len(layers) - 1):
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i])
        
    last_index = len(layers) - 1
    
    #using softmax for last layer
    a[last_index] = softmax(np.dot(w[last_index] , a[last_index - 1]) + b[last_index])
    output = a[last_index]
    
    
    #Error Calculation:
    #softmax crossentropy was used 
    error = -1*(Y_train*np.log(output))
    error = 1/m*np.sum(np.sum(error , axis = 1 , keepdims = True) )
    
    
    #Back propagation:
    
    #for last layer (with softmax activation)
    del_a =  -1 * np.divide(Y_train , output)
    del_z = a[last_index] - Y_train      #z represents logits, activation(z) = a
    del_w = 1/m *np.dot(del_z , a[last_index - 1].T)
    del_b = 1/m * np.sum(del_z , axis = 1 , keepdims=True)
    del_a = np.dot(w[last_index].T , del_z)
    
    #weight updation:
    w[last_index] -= del_w*alpha
    b[last_index] -= del_b*alpha
    
    #for all layers except last
    for i in range(last_index - 1, 0, -1):
        del_z = del_a * d_relu(a[i])
        del_w = 1/m *np.dot(del_z , a[i - 1].T)
        del_b = 1/m*np.sum(del_z , axis = 1 , keepdims=True)
        del_a = np.dot(w[i].T , del_z)
        
        #weight updation:
        w[i] -= del_w*alpha
        b[i] -= del_b*alpha
        
    return error

In [9]:
#trains NN for n_epochs epochs
# learning_rate = 1
def train(n_epochs,learning_rate ):
    for i in range(n_epochs):
        print("Training epoch :" + str(i + 1) , end=" ...... ")
        error = train_one_epoch(learning_rate)
        print("\tDone \t error = " + str(error))
        learning_rate *=0.995

In [10]:
#forward propagates a case which we want to test
def find_output(sample_case):
    last_index = len(layers) - 1
    a[0] = sample_case
    for i in range(1 , len(layers) - 1):
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i])
    return softmax(np.dot(w[last_index] , a[last_index - 1]) + b[last_index])

In [12]:
initialize_nn(layers)

In [14]:
train(10000,1)

Training epoch :1 ...... 	Done 	 error = 4.543439352858794
Training epoch :2 ...... 	Done 	 error = 4.528672012281555
Training epoch :3 ...... 	Done 	 error = 4.514279315147552
Training epoch :4 ...... 	Done 	 error = 4.50024919048777
Training epoch :5 ...... 	Done 	 error = 4.486568357656072
Training epoch :6 ...... 	Done 	 error = 4.473222031064144
Training epoch :7 ...... 	Done 	 error = 4.460193806165821
Training epoch :8 ...... 	Done 	 error = 4.447465621766492
Training epoch :9 ...... 	Done 	 error = 4.435017788257492
Training epoch :10 ...... 	Done 	 error = 4.422829141962228
Training epoch :11 ...... 	Done 	 error = 4.410877317486236
Training epoch :12 ...... 	Done 	 error = 4.399139263918682
Training epoch :13 ...... 	Done 	 error = 4.387591809755438
Training epoch :14 ...... 	Done 	 error = 4.376212439077851
Training epoch :15 ...... 	Done 	 error = 4.364980182577865
Training epoch :16 ...... 	Done 	 error = 4.353876597798787
Training epoch :17 ...... 	Done 	 error = 4.342886

KeyboardInterrupt: 

In [34]:
img = np.array(Image.open('Output/16_0_1.jpg').resize((32 , 32)).convert("L")).reshape(1024,1)/255.
np.argmax((find_output(img)))

26

In [32]:
img

array([[0.7372549 ],
       [0.74509804],
       [0.75294118],
       ...,
       [0.22352941],
       [0.29019608],
       [0.41176471]])

In [16]:
for key in w.keys():
 np.save("w"+str(key)+".npy",w[key])

In [17]:
for key in b.keys():
 np.save("b"+str(key)+".npy",b[key])

In [18]:
for key in a.keys():
 np.save("a"+str(key)+".npy",a[key])