**Convolutional Neural Network**:
We will now replace the hidden layer from our simple neural network model with a `convolutional layer`. In the usual hidden layer, we multiply the input vector `L0` with a weights matrix `W0`. When the input size is large, i.e. each instance has a large number of feature attributes (e.g. in the case of image data with lots of pixels), we end up with a large number of weights in `W0`. This can lead to the model overfitting the training data and lower the accuracy of the predictions. This problem can be mitigated by introducing a smaller weights matrix, also called a `kernel`, and applying this `kernel` repeatedly over different subsections of the data. So for example, if we have a 28x28 (=784) pixel image input, then instead of multiplying with a weight matrix with 28x28 columns, we can use a 6x6 kernel and multiply it with every 6x6 subsection of the image. We can also use multiple different kernels to process the inputs and pass on a combination of the different kernel outputs onto the next layer.         

In [64]:
import numpy as np

# Relu function
def Relu(x):
    return x*(x > 0)

# Relu derivative function
def Relu_deriv(x):
    return (x > 0)

def tanh(x):
    return np.tanh(x)

def tanh_deriv(x):
    return (1.0 - np.tanh(x)**2)

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))

def sigmoid_deriv(x):
    return sigmoid(x) * (1.0 - sigmoid(x)) 

def softmax(x): 
    ex = np.exp(x)
    return ex/np.sum(ex, axis = 1, keepdims = True)  


class convolutional_layer(object):

    '''
        class constructor
    '''
    def __init__(self, K, image_rows, image_cols, kernel_rows, kernel_cols, activation) -> None:
        self.K = K
        self.image_cols = image_cols
        self.image_rows = image_rows
        self.kernel_cols = kernel_cols
        self.kernel_rows = kernel_rows
        self.activation = activation
    
    ''' 
        convolutional layer forward propagation
    '''
    def forward(self, L, dropout):
       
        # reshape the input image array
        L = L.reshape(L.shape[0], self.image_rows, self.image_cols)

        # get all sub-sections from the image
        sections = []
        for i in range(self.image_rows-self.kernel_rows+1):
            for j in range(self.image_cols-self.kernel_cols+1):
                section = L[:,i:i+self.kernel_rows, j:j+self.kernel_cols]   
                section = section.reshape(-1,1,self.kernel_rows,self.kernel_cols)
                sections.append(section)
     
        # concatenate all sections into a single array
        expanded_input = np.concatenate(sections, axis=1)    
        input_shape = expanded_input.shape 
        print(f"expanded input shape: {input_shape} ")
        
        # flatten the sections
        self.flattened_input = expanded_input.reshape(expanded_input.shape[0]*expanded_input.shape[1], -1) 
        print(f"flattened input shape: {self.flattened_input.shape}")
        #print(expanded_input)

        return self.kernel_mult(input_shape, dropout)
    
    def kernel_mult(self, input_shape, dropout):

        # matrix multiplication of flattened image sections with kernels
        self.Z = np.dot(self.flattened_input, self.K) 
        print(f"kernel output shape: {self.Z.shape}")
        print(self.Z)
        
        # flatten the kernel output for each image
        self.Zflat = self.Z.copy()        
        self.Zflat = self.Zflat.reshape(input_shape[0], -1)        
        print(f"kernel output flattened shape: {self.Zflat.shape}")
        print(self.Zflat)

        self.dropout = dropout 
        if(self.dropout):
            # generate a random dropout mask with rougly equal numbers of 0s and 1s
            self.dropout_mask = np.random.randint(0,2,size=(self.Zflat.shape))
    
        if(self.activation == "relu"):
            if(self.dropout):
              # multiply by a factor of 2 to compensate for rougly 1/2 the neurons being turned off by the masking
              return 2 * self.dropout_mask * self.forward_relu()
            else:
                return self.forward_relu()
    
        elif(self.activation == "sigmoid"):
            if(self.dropout):
              return 2 * self.dropout_mask * self.forward_sigmoid()
            else:
                return self.forward_sigmoid()
    
        elif(self.activation == "tanh"):
            if(self.dropout):
              return 2 * self.dropout_mask * self.forward_tanh()
            else:
                return self.forward_tanh()
    
    def forward_relu(self):
        return Relu(self.Zflat)
    
    def forward_sigmoid(self):
        return sigmoid(self.Zflat)
   
    def forward_tanh(self):
        return tanh(self.Zflat)
    
    ''' 
        convolutional layer backpropagation
    '''
    def backward(self, D):
        if(self.activation == "relu"):
           self.backward_relu(D)
        elif(self.activation == "sigmoid"):
           self.backward_sigmoid(D)
        elif(self.activation == "tanh"):
           self.backward_tanh(D)

    def backward_relu(self, D):
        # dE/dZ
        dE_dZ = D * Relu_deriv(self.Zflat) 
        self.backward_kernel_mult(dE_dZ)
    
    def backward_sigmoid(self, D):
        # dE/dZ
        dE_dZ = D * sigmoid_deriv(self.Zflat) 
        self.backward_kernel_mult(dE_dZ)
    
    def backward_tanh(self, D):
        # dE/dZ
        dE_dZ = D * tanh_deriv(self.Zflat) 
        self.backward_kernel_mult(dE_dZ)
    
    def backward_kernel_mult(self, D):
        # dE/dW0
        if(self.dropout):
            self.W_grad = np.dot((self.flattened_input).T, self.dropout_mask * D.reshape(self.Z))
        else:
            self.W_grad = np.dot((self.flattened_input).T, D.reshape(self.Z))
    

In [65]:
num_images = 2
image_rows = 6
image_cols = 6
images = np.zeros(shape=(num_images,image_rows,image_cols)) # 2 images

for k in range(num_images):
    for i in range(6):
        for j in range(6):
            images[k,i,j] = (k+1)*(i + j + 1)

#print(images)
kernel_rows = 3
kernel_cols = 3
num_kernels = 2
hidden_neurons = (image_rows-kernel_rows+1) * (image_cols-kernel_cols+1) * num_kernels
output_neurons = 10 # number of image labels

print(f"Hidden neurons: {hidden_neurons}, Output neurons: {output_neurons}")

# initiailize kernels and output layer weights 
kernels = np.random.random(size=(kernel_rows*kernel_cols, num_kernels))
W1 = np.random.random(size=(hidden_neurons, output_neurons))


clayer = convolutional_layer(kernels,image_rows,image_cols,kernel_rows,kernel_cols, activation = "tanh")

L0 = images
L1 = clayer.forward(L0, dropout = False)
print(f"L1 shape: {L1.shape}")

Hidden neurons: 32, Output neurons: 10
expanded input shape: (2, 16, 3, 3) 
flattened input shape: (32, 9)
kernel output shape: (32, 2)
[[ 12.97390274  16.5086609 ]
 [ 17.4617429   22.51895906]
 [ 21.94958306  28.52925723]
 [ 26.43742322  34.53955539]
 [ 17.4617429   22.51895906]
 [ 21.94958306  28.52925723]
 [ 26.43742322  34.53955539]
 [ 30.92526338  40.54985356]
 [ 21.94958306  28.52925723]
 [ 26.43742322  34.53955539]
 [ 30.92526338  40.54985356]
 [ 35.41310354  46.56015173]
 [ 26.43742322  34.53955539]
 [ 30.92526338  40.54985356]
 [ 35.41310354  46.56015173]
 [ 39.9009437   52.57044989]
 [ 25.94780547  33.01732179]
 [ 34.92348579  45.03791812]
 [ 43.89916612  57.05851446]
 [ 52.87484644  69.07911079]
 [ 34.92348579  45.03791812]
 [ 43.89916612  57.05851446]
 [ 52.87484644  69.07911079]
 [ 61.85052676  81.09970712]
 [ 43.89916612  57.05851446]
 [ 52.87484644  69.07911079]
 [ 61.85052676  81.09970712]
 [ 70.82620708  93.12030345]
 [ 52.87484644  69.07911079]
 [ 61.85052676  81.0997