In [1]:
import numpy as np

class ConvLayer2D:
   
    '''
   
    INPUT DIMS: (B, C, H, W), only suitable for B = 1 (1 sample)
    OUTPUT DIMS: (C, H, W) | NO BATCH DIMENSION FOR NOW
    
    '''
    
    def __init__(self, seed = None):
        self.seed = seed
        self._set_seed() 
        
    def forward(self, X, output_channels, kernel_size, padding = 0, stride = 1, dilation_rate = 1):
        self.padding = padding
        self.X = self._pad(X)
        self.output_channels = output_channels
        self.kernel_size = kernel_size # tuple to specify height and width, otherwise if int will be nxn where n = kernel_size
        self.stride = stride
        self.dilation_rate = dilation_rate
        
        Y = self._forward_util()
        
        return Y
        
    def _forward_util(self):
       
        self._create_kernel()
        
        output_height = int(((self.X.shape[2] - self.kernel.shape[2]) / self.stride) + 1)
        output_width = int(((self.X.shape[3] - self.kernel.shape[3]) / self.stride) + 1)
    
        Y = np.zeros(shape = (self.output_channels, output_height, output_width)) 
    
        for out_ch in range(Y.shape[0]): # for each output channel
            for m in range(Y.shape[1]): # for each row idx in a given channel
                for n in range(Y.shape[2]): # for each column idx in a given channel
                    for in_ch in range(self.X.shape[1]): # for each input channel in X
                        n_slice_idx = n * self.stride # position of kernel on the feature map, (top left of kernel)
                        current_slice = self.X[0, in_ch, m:m+self.kernel.shape[2], n_slice_idx:(n_slice_idx + self.kernel.shape[3])]
                        conv_out = [] 
                    
                        if self.kernel.shape[0] == 1 and self.X.shape[1] == 1:
                            if current_slice.size != self.kernel.size:
                                break 
                        elif self.kernel.shape[0] >= 1 and self.X.shape[1] >= 1:
                            if current_slice.size != self.kernel[out_ch, in_ch].size:
                                break
                        
                        for k in range(self.kernel.shape[1]): 
                            conv_ch = current_slice * self.kernel[out_ch, k]
                            conv_out.append(conv_ch)
                        
                            
                        Y[out_ch, m ,n] = np.sum(conv_out)
    
        print(f"Input | {self.X.shape}:\n\n{self.X}\n\n")
        print(f"Kernel | {self.kernel.shape}:\n\n{self.kernel}\n\n")
        print(f"Output | {Y.shape}:\n\n{Y}\n\n")

        return Y
        
    def _create_kernel(self):
      
        if self.dilation_rate != 1: 
            kernel_mask = np.random.random_sample(size = (self.output_channels, self.X.shape[1], *self.kernel_size)) # create a 4darray, shape: (Output Size, Input Size, Height, Width)
            self.kernel = self._dilate_multiple_in_out_ch(kernel_mask) 
            assert self.kernel.shape[2] <= self.X.shape[2], ValueError('height of kernel cannot be larger than height of input feature map')
            assert self.kernel.shape[3] <= self.X.shape[3], ValueError('width of kernel cannot be larger than width of input feature map')

        elif self.dilation_rate == 1:
            self.kernel = np.random.random_sample(size = (self._output_channels, self.X.shape[1], *self.kernel_size))

    def _dilate_multiple_in_out_ch(self, kernel_mask):
       
        '''
        Dilatation for Kernels of multiple input and output channels
        
        kernel_mask | (Out, In, H, W) 
        
        '''
        
        if kernel_mask.shape[2] == 1 and kernel_mask.shape[3] == 1:
            return kernel_mask

        k_h = kernel_mask.shape[2] * self.dilation_rate - (self.dilation_rate - 1)
        k_w = kernel_mask.shape[3] * self.dilation_rate - (self.dilation_rate - 1)
        
        out_kernel = np.zeros(shape = (kernel_mask.shape[0], kernel_mask.shape[1], k_h, k_w))       
        
        dilation_rate = self.dilation_rate - 1
        
        for out_ch in range(kernel_mask.shape[0]):
            for in_ch in range(kernel_mask.shape[1]):
                for row in range(kernel_mask.shape[2]):
                    
                    m = 0 # initializing row indices
                    
                    k_matrix_m = kernel_mask[out_ch, in_ch, row] # choosing the mth row
                    
                    while m < len(k_matrix_m): # while we aren't at the last position of a given row of the kernel
                        if k_matrix_m[m] != 0:
                            k_matrix_m = np.concatenate((k_matrix_m[:m +1], [0 for _ in range(dilation_rate)], k_matrix_m[m+1:]))
                            m += dilation_rate
                        
                        m += +1
                        
                        if m == (len(k_matrix_m) - 1):
                            out_kernel[out_ch, in_ch, (row * (dilation_rate + 1))] = k_matrix_m
                            break
        return out_kernel
         
    def _pad(self, X):
        pad_width = ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding))
        X = np.pad(X, pad_width = pad_width)
        return X 
          
    def _set_seed(self):
        if self.seed is not None:
            np.random.seed(self.seed)
            
    @property
    def dilation_rate(self):
        return self._dilation_rate
    
    @dilation_rate.setter
    def dilation_rate(self, dilation_rate):
        assert dilation_rate >= 1, ValueError('Dilation cannot be less than 1 for the Kernel!')
        self._dilation_rate = dilation_rate
    
    @property
    def output_channels(self):
        return self._output_channels
    
    @output_channels.setter
    def output_channels(self, output_channels):
        assert output_channels >= 1, ValueError('Output Channels cannot be less than 1!')
        self._output_channels = output_channels
        
    @property
    def kernel_size(self):
        return self._kernel_size     
   
    @kernel_size.setter
    def kernel_size(self, kernel_size):
        if isinstance(kernel_size, int):
            self._kernel_size = (kernel_size, kernel_size)
        elif kernel_size[1] == 1 or kernel_size[0] == 1:
            raise ValueError('Kernel cannot be 1-Dimensional for a 2D convolution!')
        else:
            assert self.X.shape[2] >= self._kernel_size[2], ValueError('Kernel height cannot be greater than height of input tensor!')
            assert self.X.shape[3] >= self._kernel_size[3], ValueError('Kernel width cannot be greater than height of input tensor!')
            self._kernel_size = kernel_size

In [2]:
np.random.seed(1)
np.set_printoptions(suppress=True)

x = np.random.random_sample(size = (1, 2, 5, 5))

model = ConvLayer2D(seed = 1)

Y = model.forward(x, output_channels=1, kernel_size = 2, padding = 0, stride = 1, dilation_rate = 3) 


Input | (1, 2, 5, 5):

[[[[0.417022   0.72032449 0.00011437 0.30233257 0.14675589]
   [0.09233859 0.18626021 0.34556073 0.39676747 0.53881673]
   [0.41919451 0.6852195  0.20445225 0.87811744 0.02738759]
   [0.67046751 0.4173048  0.55868983 0.14038694 0.19810149]
   [0.80074457 0.96826158 0.31342418 0.69232262 0.87638915]]

  [[0.89460666 0.08504421 0.03905478 0.16983042 0.8781425 ]
   [0.09834683 0.42110763 0.95788953 0.53316528 0.69187711]
   [0.31551563 0.68650093 0.83462567 0.01828828 0.75014431]
   [0.98886109 0.74816565 0.28044399 0.78927933 0.10322601]
   [0.44789353 0.9085955  0.29361415 0.28777534 0.13002857]]]]


Kernel | (1, 2, 4, 4):

[[[[0.417022   0.         0.         0.72032449]
   [0.         0.         0.         0.        ]
   [0.         0.         0.         0.        ]
   [0.00011437 0.         0.         0.30233257]]

  [[0.14675589 0.         0.         0.09233859]
   [0.         0.         0.         0.        ]
   [0.         0.         0.         0.        ]
 