In [83]:
import numpy as np
from sklearn.metrics import mean_squared_error
from scipy import signal

In [110]:
mat=np.asarray([[1,2,3,4],[1,2,3,4],[1,2,3,4]])
kernel=np.asarray([[1,2],[1,2]])
mat.shape,kernel.shape
bias=1

In [118]:
sig=signal.correlate2d(mat,kernel,'valid')+bias
output=np.zeros((2,3),dtype='float')
for i in range(output.shape[0]):
    for j in range(output.shape[1]):
        s = 0
                        
        for ii in range(kernel.shape[0]):
            for jj in range(kernel.shape[1]):
                s +=  kernel[ii,jj] * mat[ii+i,jj+j]
        s += bias
        output[i,j] += s

In [119]:
sig

array([[11, 17, 23],
       [11, 17, 23]])

In [120]:
output

array([[11., 17., 23.],
       [11., 17., 23.]])

In [103]:
class Layer:
    def __init__(self):
        self.input=None
        self.output=None
    
    def forward_propagation(self, input_data):
        raise NotImplementedError
    def backward_propagation(self,output_error, learning_rate):
        raise NotImplementedError


In [31]:
class FCLayer(Layer):
    def __init__(self,input_size, output_size):
        self.weights = np.random.rand(input_size,output_size)-0.5
        self.bias = np.random.rand(1,output_size)-0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error
        

In [32]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output
    
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error


In [148]:
class ConvolutionalLayer(Layer):
    def __init__(self, input_shape, kernel_shape,layer_depth):
        assert len(input_shape) == 3
        self.input_shape=input_shape
        self.input_depth=input_shape[2]
        self.kernel_shape=kernel_shape
        self.layer_depth=layer_depth
        self.output_shape=(input_shape[0]-kernel_shape[0]+1,input_shape[1]-kernel_shape[1]+1,layer_depth)
        self.weights=np.random.rand(kernel_shape[0],kernel_shape[1],self.input_depth,layer_depth)-0.5
        self.bias=np.random.rand(layer_depth)-0.5
    
    def forward_propagation(self,input):
        assert len(input.shape) == 3
        self.input=input
        self.output=np.zeros(self.output_shape)
        for l in range(self.layer_depth):
            for c in range(self.input_depth):
                self.output[:,:,l]+=signal.correlate2d(self.input[:,:,c],self.weights[:,:,c,l],'valid')+self.bias[l]
        return self.output

    
    # # implementation of signal.correlate2d

#         for l in range(self.layer_depth):
#             for c in range(self.input_depth):
#                 for i in range(self.output_shape[0]):
#                     for j in range(self.output_shape[1]):
#                         s = 0
                        
#                         for ii in range(self.kernel_shape[0]):
#                             for jj in range(self.kernel_shape[1]):
#                                 s += self.weights[ii,jj,c,l] * self.input[ii+i,jj+j,c]
#                         s += self.bias[l]
#                         self.output[i,j,l] += s
#         return self.output
                        
    def backward_propagation(self,output_error,learning_rate):
        in_error = np.zeros(self.input_shape)
        dW = np.zeros((self.kernel_shape[0], self.kernel_shape[1], self.input_depth, self.layer_depth))
        dB = np.zeros(self.layer_depth)
        for l in range(self.layer_depth):
            for c in range(self.input_depth):
                in_error[:,:,c] += signal.convolve2d(output_error[:,:,l],self.weights[:,:,c,l],'full')
                dW[:,:,c,l] = signal.correlate2d(self.input[:,:,c],output_error[:,:,l],'valid')
            dB[l] = self.layer_depth * np.sum(output_error[:,:,l])
        self.weights -= learning_rate * dW
        self.bias -= learning_rate * dB
        return in_error

In [149]:
class FlattenLayer(Layer):
    # returns the flattened input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = input_data.flatten().reshape((1,-1))
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return output_error.reshape(self.input.shape)

In [150]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return np.exp(-x) / (1 + np.exp(-x))**2

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')

In [151]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [152]:
class Network:
    def __init__(self,loss, loss_prime):
        self.layers = []
        self.loss = loss
        self.loss_prime = loss_prime
    
    def add(self, layer):
        self.layers.append(layer)
    
    def predict(self, input_data):
        result = []
        for i in range(len(input_data)):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result
    
    def fit(self, X,y,epochs=10,learning_rate=0.1):
        for i in range(epochs):
            err = 0
            for j in range(len(X)):
                output = X[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                err += self.loss(y[j],output)
                
                error = self.loss_prime(y[j],output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error,learning_rate)
                
            err /= len(X)
            print('epoch {}/{} error = {}'.format(i+1,epochs,err))

In [178]:
from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data 
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

# Network
net = Network(mse, mse_prime)
net.add(ConvolutionalLayer((28, 28, 1), (3, 3), 1))  # input_shape=(28, 28, 1)   ;   output_shape=(26, 26, 1) 
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FlattenLayer())                     # input_shape=(26, 26, 1)   ;   output_shape=(1, 26*26*1)
net.add(FCLayer(26*26*1, 100))              # input_shape=(1, 26*26*1)  ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 10))                   # input_shape=(1, 100)      ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.fit(x_train[0:1000], y_train[0:1000], epochs=100, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

epoch 1/100 error = 0.3858793091327782
epoch 2/100 error = 0.3547261847131821
epoch 3/100 error = 0.35484234882646737
epoch 4/100 error = 0.3513294099399341
epoch 5/100 error = 0.3524267957225265
epoch 6/100 error = 0.3522898540277533
epoch 7/100 error = 0.36139841514167387
epoch 8/100 error = 0.3507025114469523
epoch 9/100 error = 0.3582148876345007
epoch 10/100 error = 0.3366924759603889
epoch 11/100 error = 0.32736468842507405
epoch 12/100 error = 0.2909584742540604
epoch 13/100 error = 0.2613744073903674
epoch 14/100 error = 0.2372267503734116
epoch 15/100 error = 0.23980315299868865
epoch 16/100 error = 0.21615612902962614
epoch 17/100 error = 0.20693927814673135
epoch 18/100 error = 0.18850889622462583
epoch 19/100 error = 0.17766238860215586
epoch 20/100 error = 0.16892780482704228
epoch 21/100 error = 0.1601911981256773
epoch 22/100 error = 0.15518722047511718
epoch 23/100 error = 0.14690068493897318
epoch 24/100 error = 0.13789220577655636
epoch 25/100 error = 0.13137054397609

In [179]:
import numpy as np


# training data
x_train = [np.random.rand(10,10,1)]
y_train = [np.random.rand(4,4,2)]

# network
net = Network(mse, mse_prime)
net.add(ConvolutionalLayer((10,10,1), (3,3), 1))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(ConvolutionalLayer((8,8,1), (3,3), 2))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(ConvolutionalLayer((6,6,2), (3,3), 2))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.fit(x_train, y_train, epochs=100, learning_rate=0.3)

# test
out = net.predict(x_train)

epoch 1/100 error = 1.0608564055803789
epoch 2/100 error = 0.10253111369477164
epoch 3/100 error = 0.06796558820130144
epoch 4/100 error = 0.06603954796694209
epoch 5/100 error = 0.06452300615264832
epoch 6/100 error = 0.06346555499197998
epoch 7/100 error = 0.0630710896420194
epoch 8/100 error = 0.06432746486397993
epoch 9/100 error = 0.06831810568506197
epoch 10/100 error = 0.08130923923366046
epoch 11/100 error = 0.0906983184501293
epoch 12/100 error = 0.10573473191626623
epoch 13/100 error = 0.0814971333108073
epoch 14/100 error = 0.06405290138577888
epoch 15/100 error = 0.06318921293626549
epoch 16/100 error = 0.06666211623449785
epoch 17/100 error = 0.06520955804384133
epoch 18/100 error = 0.06780048928720189
epoch 19/100 error = 0.06418754831101439
epoch 20/100 error = 0.06481270769556977
epoch 21/100 error = 0.06099338619851897
epoch 22/100 error = 0.06083079256346641
epoch 23/100 error = 0.05779271388161534
epoch 24/100 error = 0.05736535657984483
epoch 25/100 error = 0.055063

## 