**Convolutional Neural Network**:
We will now replace the hidden layer from our simple neural network model with a `convolutional layer`. In the usual hidden layer, we multiply the input vector `L0` with a weights matrix `W0`. When the input size is large, i.e. each instance has a large number of feature attributes (e.g. in the case of image data with lots of pixels), we end up with a large number of weights in `W0`. This can lead to the model overfitting the training data and lower the accuracy of the predictions. This problem can be mitigated by introducing a smaller weights matrix, also called a `kernel`, and applying this `kernel` repeatedly over different subsections of the data. So for example, if we have a 28x28 (=784) pixel image input, then instead of multiplying with a weight matrix with 28x28 columns, we can use a 6x6 kernel and multiply it with every 6x6 subsection of the image. We can also use multiple different kernels to process the inputs and pass on a combination of the different kernel outputs onto the next layer.         

In [36]:
import numpy as np

class convolutional_layer(object):

    '''
        class constructor
    '''
    def __init__(self, K, image_rows, image_cols, kernel_rows, kernel_cols) -> None:
        self.K = K
        self.image_cols = image_cols
        self.image_rows = image_rows
        self.kernel_cols = kernel_cols
        self.kernel_rows = kernel_rows
    
    def forward(self, L):
        
        # reshape the input image array
        L = L.reshape(L.shape[0], self.image_rows, self.image_cols)

        # get all sub-sections from the image
        sections = []
        section_count = 0
        for i in range(self.image_rows-self.kernel_rows+1):
            for j in range(self.image_cols-self.kernel_cols+1):
                section = L[:,i:i+self.kernel_rows, j:j+self.kernel_cols]   
                section = section.reshape(-1,1,self.kernel_rows,self.kernel_cols)
                section_count += 1
                #print(f"Section#{section_count}: , shape: {section.shape}")
                #print(section) 
                sections.append(section)
     
        # concatate all sections into a single array
        expanded_input = np.concatenate(sections, axis=1)    
        #print(f"expanded input: {expanded_input.shape} ")
        #print(expanded_input)
        
        # flatten the sections
        expanded_input = expanded_input.reshape(expanded_input.shape[0]*expanded_input.shape[1], -1) 
        #print(f"flattened expanded input: {expanded_input.shape}")
        #print(expanded_input)

        # matrix multiplication of flattened image sections with kernels
        kernel_output = np.dot(expanded_input, self.K) 
        return kernel_output
        

In [37]:
num_images = 2
image_rows = 6
image_cols = 6
images = np.zeros(shape=(num_images,image_rows,image_cols)) # 2 images

for k in range(2):
    for i in range(6):
        for j in range(6):
            images[k,i,j] = (k+1)*(i + j + 1)

#print(images)
kernel_rows = 3
kernel_cols = 3
num_kernels = 1
hidden_neurons = (image_rows-kernel_rows+1) * (image_cols-kernel_cols+1) * num_kernels
output_neurons = 10 # number of image labels

# initiailize kernels and output layer weights 
kernels = np.random.random(size=(kernel_rows*kernel_cols, num_kernels))
W1 = np.random.random(size=(hidden_neurons, output_neurons))

clayer = convolutional_layer(kernels,image_rows,image_cols,kernel_rows,kernel_cols)

L0 = images
L1 = clayer.forward(L0)
print(f"L1 shape: {L1.shape}")

L1 shape: (32, 1)
