In [1]:
import numpy as np

## convolution and pooling operations

In [2]:
'''
- expected input volume to be an array of 3d images
- filters is a list of 3d array filters
- biases is a list of bias terms, one for each filter
'''
def convolution(input_volume, filters, biases, stride=1, zero_padding=0):
    
    # assume square images
    num_images, num_channels, _, img_dim_orig = input_volume.shape
    num_filters, _, __, filter_dim = filters.shape
    
    
    # zero padding adds zeroes around the input, but not along the depth dimension of each image
    image = input_volume
    if zero_padding != 0:
        image = np.zeros(shape=(num_images, num_channels, img_dim_orig + 2 * zero_padding, img_dim_orig + 2 * zero_padding))
        image[:, :, zero_padding:-zero_padding, zero_padding:-zero_padding] = input_volume
    
    img_dim = img_dim_orig + 2 * zero_padding
    
    
    # im2col 3d from:
    # https://stackoverflow.com/questions/50292750/python-the-implementation-of-im2col-which-takes-the-advantages-of-6-dimensional
    img_stride, channel_stride, row_stride, col_stride = image.strides
    out_dim = (img_dim - filter_dim) // stride + 1
    col = np.lib.stride_tricks.as_strided(image, shape=(num_images, out_dim, out_dim, num_channels, filter_dim, filter_dim), strides=(img_stride, stride * row_stride, stride * col_stride, channel_stride, row_stride, col_stride)).astype(float)
    col = col.reshape(np.multiply.reduceat(col.shape, (0, 3)))
    
    # each 2d slice of col has rows containing each extended receptive field
    # similarly, the filters will be flattened into a 2d array (col: each filter stretched out)
    filt_stride, filt_depth_stride, filt_row_stride, filt_col_stride = filters.strides
    filt_col = np.lib.stride_tricks.as_strided(filters, shape=(num_channels * filter_dim ** 2, num_filters), strides=(filt_col_stride, filt_stride))
    
    # perform matrix multiplication
    # each col is a different filter; every out_dim^2 rows corresponds to one image's convolved activations
    conv = np.dot(col, filt_col)
    conv_row_stride, conv_col_stride = conv.strides
    
    # add bias term (each filter should have one)
    conv += biases
    
    # reshape into list of activation volumes (1 volume per image)
    conv = np.lib.stride_tricks.as_strided(conv, shape=(num_images, num_filters, out_dim, out_dim), strides=(out_dim ** 2 * conv_row_stride, conv_col_stride, out_dim * conv_row_stride, conv_row_stride))

    return conv


In [3]:
'''
- expected input volume to be an array of 3d images
- (square) sliding window dimensions
- modes: max pooling, min pooling, mean pooling
- stride: for sliding viewing window
- zero_padding: zero padding
'''
def pool(input_volumes, filter_dim, mode='max', stride=1, zero_padding=0): 
    # assume square images
    num_images, num_channels, _, img_dim_orig = input_volumes.shape   
    
    # zero padding adds zeroes around the input, but not along the depth dimension of each image
    image = input_volumes
    if zero_padding != 0:
        image = np.zeros(shape=(num_images, num_channels, img_dim_orig + 2 * zero_padding, img_dim_orig + 2 * zero_padding))
        image[:, :, zero_padding:-zero_padding, zero_padding:-zero_padding] = input_volume
    
    img_dim = img_dim_orig + 2 * zero_padding
    
    # im2col 3d from:
    # https://stackoverflow.com/questions/50292750/python-the-implementation-of-im2col-which-takes-the-advantages-of-6-dimensional
    img_stride, channel_stride, row_stride, col_stride = image.strides
    out_dim = (img_dim - filter_dim) // stride + 1
    col = np.lib.stride_tricks.as_strided(image, shape=(num_images, out_dim, out_dim, num_channels, filter_dim, filter_dim), strides=(img_stride, stride * row_stride, stride * col_stride, channel_stride, row_stride, col_stride)).astype(float)
    # col = col.reshape(np.multiply.reduceat(col.shape, (0, 3)))
    col = col.reshape((num_images, num_channels * out_dim ** 2, filter_dim ** 2))
    
    # perform the pooling operations
    result = None
    if mode == 'max':
        result = col.max(axis=2)
    elif mode == 'min':
        result = col.min(axis=2)
    elif mode == 'mean':
        result = col.mean(axis=2)
        
    # reshape result into list of images
    row_stride, col_stride = result.strides
    result = np.lib.stride_tricks.as_strided(result, shape=(num_images, num_channels, out_dim, out_dim), strides=(row_stride, col_stride, num_channels * out_dim * col_stride, num_channels * col_stride))
    return result

## forward propagation

In [4]:
# layer class to group information about layers
class Layer:
    '''
    layer_type: 'conv' or 'pool'
    filters_shape: shape tuple if layer_type is 'conv' (expected 4D); single dimension for square window if layer_type is 'pool'
    stride, zero_padding: constants representing the stride and amount of zeroes added to the border of an input
    pooling_mode: method used in pooling: 'max', 'min', or 'mean'
    '''
    def __init__(self, layer_type, filters_shape, stride=1, zero_padding=0, pooling_mode='max'):
        self.layer_type = layer_type
        
        # initialize filter weights
        self.filters = None
        self.filter_dim = None
        
        if layer_type == 'conv':
            self.filters = np.random.normal(size=filters_shape)
            self.biases = np.random.normal(size=self.filters.shape[0])
        elif layer_type == 'pool':
            self.filter_dim = filters_shape
            
        self.stride = stride
        self.zero_padding = zero_padding
        self.pooling_mode = pooling_mode
        
    def __str__(self):
        if self.layer_type == 'conv':
            return f'conv(filters_shape=({self.filters.shape}), stride={self.stride}, zero_padding={self.zero_padding})'
            
        elif self.layer_type == 'pool':
            return f'pool(filter_dim={self.filter_dim}, stride={self.stride}, zero_padding={self.zero_padding}, pooling_mode={self.pooling_mode})'

In [5]:
# makeshift way of specifying structure of the layers
# separate layers with a pipe: |
# start each layer with the type of layer and a semi colon: ie conv; or pool;
# no spaces?
# separate parameters with a semicolon: ;
# conv params: filter f=shape tuple; stride s=num; zero padding z=num
# - note that the depth of the shape will be overridden by the previous layer's depth, since the filter extends through the input volume
# pooling params: filter dimension fdim=num; mode m='max' (or mean or min); stride s=num, zero padding z=num

structure_str = 'conv;f=4,1,3,3;s=1;z=1|conv;f=3,1,3,3;s=1;z=0|pool;fdim=2;m=max;s=2;z=0'
imgs = np.arange(108).reshape((3, 1, 6, 6))
prev_layer_depth = imgs.shape[1]

layers = np.asarray([])
for layer_str in structure_str.split('|'):
    param_str = layer_str.split(';')
    layer_type = param_str[0]
    
    # read in expected params for conv
    if layer_type == 'conv':
        shape = stride = padding = None
        
        for param in param_str[1:]:
            p, value = param.split('=')
            if p == 'f':
                shape = [int(v) for v in value.split(',')]
                # the depth of the filter is equal to the depth of the input volume; the depth of the lext layer will equal number of filters
                shape[1] = prev_layer_depth
                prev_layer_depth = shape[0] 
            elif p == 's':
                stride = int(value)
            elif p == 'z':
                padding = int(value)
                
        layers = np.append(layers, Layer('conv', shape, stride, padding, pooling_mode=None))
                
    # read in expected params for pool
    elif layer_type == 'pool':
        window_dim = stride = padding = pooling_method = None
        
        for param in param_str[1:]:
            p, value = param.split('=')
            if p == 'fdim':
                window_dim = int(value)
            elif p == 's':
                stride = int(value)
            elif p == 'z':
                padding = int(value)
            elif p == 'm':
                pooling_method = value
                
        layers = np.append(layers, Layer('pool', window_dim, stride, padding, pooling_method))
        
for l in layers:
    print(l)

conv(filters_shape=((4, 1, 3, 3)), stride=1, zero_padding=1)
conv(filters_shape=((3, 4, 3, 3)), stride=1, zero_padding=0)
pool(filter_dim=2, stride=2, zero_padding=0, pooling_mode=max)


In [21]:
def conv_fprop(imgs, layers):
    output = imgs.copy()
    activation_volumes = [output]
    for l in layers:
        if l.layer_type =='conv':
            output = convolution(output, l.filters, l.biases, l.stride, l.zero_padding)

            # i think this goes before the activation function, for use in backprop
            activation_volumes.append(output)
            # todo activation function
            
            # ie ReLU
            output[output < 0] = 0 
            
            
        elif l.layer_type == 'pool':
            output = pool(output, l.filter_dim, mode=l.pooling_mode, stride=l.stride, zero_padding=l.zero_padding)
            activation_volumes.append(output)
            
    return activation_volumes
        
# note that the first element contains the input images
activations = conv_fprop(imgs, layers) 
print(activations[-1].shape)
activations[-1]

(3, 3, 2, 2)


array([[[[ 23.9304939 ,   0.        ],
         [ 40.08501628,   0.        ]],

        [[ 35.18164043,   7.07675235],
         [ 39.47660821,  10.93602584]],

        [[ 61.42979627,  92.61387321],
         [ 86.97909482, 124.03782622]]],


       [[[ 31.62567712,   0.        ],
         [ 49.11547309,   0.        ]],

        [[ 60.95144711,  44.03602332],
         [ 65.24641489,  34.09166679]],

        [[214.72558761, 281.15759129],
         [240.27488616, 312.58154431]]],


       [[[ 42.07533635,   0.        ],
         [ 58.14592991,   0.        ]],

        [[ 86.7212538 ,  87.59800474],
         [ 91.01622158,  57.24730774]],

        [[368.02137895, 469.70130938],
         [393.5706775 , 501.12526239]]]])