In [0]:
import numpy as np

In [0]:
import CNN_core as CNN1

In [0]:
CNN = CNN1.CNN()

In [0]:
#intializing the parameters
parameters = {}
parameters["W1"] = CNN.init_parameter_conv((3, 3, 1, 32))
parameters["b1"] = np.zeros((1, 1, 1, 32))
parameters["W2"] = CNN.init_parameter_conv((3, 3, 32, 64))
parameters["b2"] = np.zeros((1, 1, 1, 64))
parameters["W3"] = CNN.init_parameter_fc((100, 14 * 14 * 64))
parameters["b3"] = np.zeros((100, 1))
parameters["W4"] = CNN.init_parameter_fc((10, 100))
parameters["b4"] = np.zeros((10, 1))

In [0]:
def feed_forward(X, parameters):
    caches = {}
    A_prev = X
    Z, caches["conv1"] = CNN.conv_forward(A_prev, parameters["W1"], parameters["b1"], stride = 1, padding = "same")
    Z, caches["conv2"] = CNN.conv_forward(Z, parameters["W2"], parameters["b2"], stride = 1, padding = "same")
    Z, caches["pool"] = CNN.pool_forward(Z, 2, 2, mode = "max")
    
    fc1 = Z.reshape((14 * 14 * 64, X.shape[0]))
    
    fc2, caches["fc2"] = CNN.fc_forward(fc1, parameters["W3"], parameters["b3"], activation = "relu")
    out, caches["out"] = CNN.fc_forward(fc2, parameters["W4"], parameters["b4"], activation = "sigmoid")
    
    return out, caches

In [6]:
from keras.datasets import mnist

Using TensorFlow backend.


In [0]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [0]:
X_train = X_train.reshape(60000, 28, 28, 1)
X_test = X_test.reshape((10000, 28, 28, 1))

In [0]:
def back_propagation(AL, Y, caches):
    grads = {}
    Y = Y.reshape(AL.shape)
    dAL = -np.divide(Y, (AL + 1e-8)) + np.divide((1 - Y), (1 - AL + 1e-8))
    dA, grads["dW4"], grads["db4"] = CNN.fc_back(dAL, caches["out"], activation = "sigmoid")
    dA, grads["dW3"], grads["db3"] = CNN.fc_back(dA, caches["fc2"], activation = "relu")
    dA = dA.reshape(AL.shape[1], 14, 14, 64)
    dA = CNN.pool_backward(dA, caches["pool"])
    dA, grads["dW2"], grads["db2"] = CNN.conv_backward(dA, caches["conv2"])
    dA, grads["dW1"], grads["db1"] = CNN.conv_backward(dA, caches["conv1"])
    
    return grads

In [0]:
def train(X, Y, parameters, learning_rate = 0.0075, no_of_iterations = 1, print_cost = True, batch_size = 64, optimizer = "adam_optimizer"):
    if optimizer == "adam_optimizer":
        v, s = CNN.adam_initializer(parameters)
    X = X.reshape(784, X.shape[0])
    for i in range(no_of_iterations):
        mini_batches = CNN.random_minibatches(X, Y, batch_size)
        for mini_batch in mini_batches:
            (mini_batch_X, mini_batch_Y) = mini_batch
            mini_batch_X = mini_batch_X.reshape(batch_size, 28, 28, 1)
            AL, caches = feed_forward(mini_batch_X, parameters)
            present_cost = CNN.cost(AL, mini_batch_Y, parameters)
            grads = back_propagation(AL, mini_batch_Y, caches)
            if optimizer == "gradient_descent_optimizer":
                parameters = CNN.gradient_descent_update(grads, parameters, learning_rate)
            else:
                parameters = CNN.adam_optimizer_update(v, s, grads, learning_rate, parameters)
            if print_cost == True:
                print ("Cost of {}th iterations is {}".format(i, present_cost))
        if print_cost == True:
            print ("Cost of {}th iteration is {}".format(i, present_cost))
    print ("Final cost is : {}".format(present_cost))
    return parameters
        

In [0]:
X_train_mean = int(np.mean(X_train))
X_train_sd = int(np.std(X_train))
X_train = X_train - X_train_mean
X_test = X_test - X_train_mean
X_train = X_train / X_train_sd
X_test = X_test / X_train_sd

In [0]:
Y_test = list(Y_test)
for i in range(len(Y_test)):
    test = [0] * 10
    test[Y_test[i]] = 1
    Y_test[i] = test
Y_test = np.array(Y_test)

In [0]:
Y_train = list(Y_train)
for i in range(len(Y_train)):
    test = [0] * 10
    test[Y_train[i]] = 1
    Y_train[i] = test
Y_train = np.array(Y_train)

In [0]:
Y_train, Y_test = Y_train.T, Y_test.T

In [15]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1), (10, 60000), (10, 10000))

In [0]:
parameters = train(X_train, Y_train, parameters, batch_size = 32, no_of_iterations = 2, optimizer = "gradient_descent_optimizer")

Cost of 0th iterations is 7.062388149291554
Cost of 0th iterations is 7.063440340935154
Cost of 0th iterations is 7.066575023750786
Cost of 0th iterations is 7.067631930625154
Cost of 0th iterations is 7.067625805754361
Cost of 0th iterations is 7.067619681170667
Cost of 0th iterations is 7.06761355687406
Cost of 0th iterations is 7.067607432864526
Cost of 0th iterations is 7.067601309142052
Cost of 0th iterations is 7.067595185706624
Cost of 0th iterations is 7.067589062558229
Cost of 0th iterations is 7.067582939696853
Cost of 0th iterations is 7.067576817122482
Cost of 0th iterations is 7.067570694835104
Cost of 0th iterations is 7.067564572834705
Cost of 0th iterations is 7.067558451121271
Cost of 0th iterations is 7.067552329694789
Cost of 0th iterations is 7.067546208555246
Cost of 0th iterations is 7.0675400877026275
Cost of 0th iterations is 7.067533967136921
Cost of 0th iterations is 7.067527846858112
Cost of 0th iterations is 7.067521726866188
Cost of 0th iterations is 7.0675