In [2]:
import numpy as np

## convolution

In [45]:
'''
- expected input volume to be an array of 3d images
- filters is a list of 3d array filters
- biases is a list of bias terms, one for each filter
- maintain_depth: convolve channel by channel instead of one volume becoming a single depth slice
--> if true, does not use bias, since it should not be applied to each depth slice
'''
def convolution(input_volume, filters, biases, stride=1, zero_padding=0, maintain_depth=False):
    
    # assume square images
    num_images, num_channels, _, img_dim_orig = input_volume.shape
    num_filters, _, __, filter_dim = filters.shape
    
    
    # zero padding adds zeroes around the input, but not along the depth dimension of each image
    image = input_volume
    if zero_padding != 0:
        image = np.zeros(shape=(num_images, num_channels, img_dim_orig + 2 * zero_padding, img_dim_orig + 2 * zero_padding))
        image[:, :, zero_padding:-zero_padding, zero_padding:-zero_padding] = input_volume
    
    img_dim = img_dim_orig + 2 * zero_padding
    
    
    # im2col 3d from:
    # https://stackoverflow.com/questions/50292750/python-the-implementation-of-im2col-which-takes-the-advantages-of-6-dimensional
    img_stride, channel_stride, row_stride, col_stride = image.strides
    out_dim = (img_dim - filter_dim) // stride + 1
    col = np.lib.stride_tricks.as_strided(image, shape=(num_images, out_dim, out_dim, num_channels, filter_dim, filter_dim), strides=(img_stride, stride * row_stride, stride * col_stride, channel_stride, row_stride, col_stride)).astype(float)
    
    if maintain_depth:
        col = col.reshape((num_images * out_dim ** 2 * num_channels, filter_dim ** 2))
    else:
        col = col.reshape(np.multiply.reduceat(col.shape, (0, 3)))
    
    # each 2d slice of col has rows containing each extended receptive field
    # similarly, the filters will be flattened into a 2d array (col: each filter stretched out)
    filt_stride, filt_depth_stride, filt_row_stride, filt_col_stride = filters.strides
                        
    filt_col = None
    if (maintain_depth):
        filt_col = np.lib.stride_tricks.as_strided(filters, 
                                                   shape=(filter_dim ** 2, num_channels * num_filters), 
                                                   strides=(filt_col_stride, filt_depth_stride))
    else:
        filt_col = np.lib.stride_tricks.as_strided(filters, 
                                                   shape=(num_channels * filter_dim ** 2, num_filters), 
                                                   strides=(filt_col_stride, filt_stride))
                              
    # perform matrix multiplication
    # each col is a different filter; every out_dim^2 rows corresponds to one image's convolved activations
    conv = np.dot(col, filt_col)
                          
    if maintain_depth:
        # conv contains convolutions of depth slices with other slices, so the correct ones must be extracted
        # has #columns = num_filters * num_channels
        # up to num_channel th column, shift 1st column up 0, 2nd up 1, 3rd up 2; then repeat for each filter's columns
        # then take every num_channel th row
        rows, cols = conv.shape
        for col in range(cols):
            shift = col % num_channels
            if shift != 0:
                conv[:-shift, [col]] = conv[shift:, [col]]
            
        conv = conv[np.arange(0, rows, step=num_channels), :]
        
        # reshape into a 5d array of outputs
        # 5th dim contains the result for each image 
        # 4th dimension contains convolutions maintaining depth, for each filter
        # 3rd, 2nd, 1st dimensions are the outputs with the depths maintained
        conv_row_stride, conv_col_stride = conv.strides
        conv = np.lib.stride_tricks.as_strided(conv, 
                                               shape=(num_images, num_filters, num_channels, out_dim, out_dim),
                                               strides=(out_dim ** 2 * conv_row_stride, num_channels * conv_col_stride, conv_col_stride, out_dim * conv_row_stride, conv_row_stride))
        
    else:
        # add bias term (each filter should have one)
        conv += biases
    
        # reshape into list of activation volumes (1 volume per image)
        conv_row_stride, conv_col_stride = conv.strides
        conv = np.lib.stride_tricks.as_strided(conv, shape=(num_images, num_filters, out_dim, out_dim), strides=(out_dim ** 2 * conv_row_stride, conv_col_stride, out_dim * conv_row_stride, conv_row_stride))

    return conv
        

### convolution testing

In [50]:
# test convolution from https://cs231n.github.io/convolutional-networks/
img = np.asarray([0, 2, 0, 2, 2, 0, 1, 2, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 1, 0, 2, 0, 1, 1, 2,
                 0, 0, 0, 2, 1, 2, 0, 1, 2, 1, 2, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
                 1, 0, 0, 2, 1, 2, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 1, 1, 2, 1, 2])
img = img.reshape((1, 3, 5, 5))

w0 = np.asarray([-1, 0, 1, 1, 1, -1, 0, 1, 0,
                -1, -1, 1, -1, 1, -1, 1, 0, 1,
                0, -1, -1, 1, -1, 1, 1, 1, 1])
w0 = w0.reshape((3, 3, 3))

w1 = np.asarray([-1, 1, 1, 0, -1, 0, 0, 0, -1,
                1, 0, 0, -1, -1, -1, 0, -1, 0,
                0, -1, -1, 1, 1, -1, 1, 1, 0])
w1 = w1.reshape((3, 3, 3))

filters = np.asarray([w0, w1]).reshape((2, 3, 3, 3))
biases = np.asarray([1, 0])

convolution(img, filters, biases, stride=2, zero_padding=1)

array([[[[ 1.,  7., 11.],
         [ 2.,  9., -3.],
         [ 3., -5.,  3.]],

        [[ 0., -5., -1.],
         [-9.,  1.,  3.],
         [-3., -1., -1.]]]])

In [52]:
# test for convolution maintaining the depth
z = convolution(img, filters, biases, stride=2, zero_padding=1, maintain_depth=True)
z = np.sum(z, axis=2)
for i, l in enumerate(z):
    l += biases[i]
z

array([[[[ 1.,  7., 11.],
         [ 2.,  9., -3.],
         [ 3., -5.,  3.]],

        [[ 1., -4.,  0.],
         [-8.,  2.,  4.],
         [-2.,  0.,  0.]]]])

## pooling

In [36]:
'''
- expected input volume to be an array of 3d images
- (square) sliding window dimensions
- modes: max pooling, min pooling, mean pooling
- stride: for sliding viewing window
- zero_padding: zero padding
'''
def pool(input_volumes, filter_dim, mode='max', stride=1, zero_padding=0): 
    # assume square images
    num_images, num_channels, _, img_dim_orig = input_volumes.shape   
    
    # zero padding adds zeroes around the input, but not along the depth dimension of each image
    image = input_volumes
    if zero_padding != 0:
        image = np.zeros(shape=(num_images, num_channels, img_dim_orig + 2 * zero_padding, img_dim_orig + 2 * zero_padding))
        image[:, :, zero_padding:-zero_padding, zero_padding:-zero_padding] = input_volume
    
    img_dim = img_dim_orig + 2 * zero_padding
    
    # im2col 3d from:
    # https://stackoverflow.com/questions/50292750/python-the-implementation-of-im2col-which-takes-the-advantages-of-6-dimensional
    img_stride, channel_stride, row_stride, col_stride = image.strides
    out_dim = (img_dim - filter_dim) // stride + 1
    col = np.lib.stride_tricks.as_strided(image, shape=(num_images, out_dim, out_dim, num_channels, filter_dim, filter_dim), strides=(img_stride, stride * row_stride, stride * col_stride, channel_stride, row_stride, col_stride)).astype(float)
    # col = col.reshape(np.multiply.reduceat(col.shape, (0, 3)))
    col = col.reshape((num_images, num_channels * out_dim ** 2, filter_dim ** 2))
    
    # perform the pooling operations
    result = None
    if mode == 'max':
        result = col.max(axis=2)
    elif mode == 'min':
        result = col.min(axis=2)
    elif mode == 'mean':
        result = col.mean(axis=2)
        
    # reshape result into list of images
    row_stride, col_stride = result.strides
    result = np.lib.stride_tricks.as_strided(result, shape=(num_images, num_channels, out_dim, out_dim), strides=(row_stride, col_stride, num_channels * out_dim * col_stride, num_channels * col_stride))
    return result

### pooling testing

In [72]:
img = np.random.randint(0, 5, size=75).reshape((1, 3, 5, 5))
print(f'image:\n{img}')

print(f'\nmax pooling:\n{pool(img, 3, mode="max", stride=2)}\n')
print(f'min pooling:\n{pool(img, 3, mode="min", stride=2)}\n')
print(f'mean pooling:\n{pool(img, 3, mode="mean", stride=2)}')

image:
[[[[2 2 3 3 0]
   [1 4 4 4 0]
   [2 4 0 3 2]
   [4 4 3 1 4]
   [2 3 4 1 3]]

  [[4 0 2 0 2]
   [0 3 1 1 1]
   [0 1 0 2 0]
   [0 1 0 2 2]
   [4 2 3 1 1]]

  [[0 3 4 3 4]
   [3 3 4 4 0]
   [3 1 3 2 1]
   [3 1 1 1 3]
   [4 3 3 4 2]]]]

max pooling:
[[[[4. 4.]
   [4. 4.]]

  [[4. 2.]
   [4. 3.]]

  [[4. 4.]
   [4. 4.]]]]

min pooling:
[[[[0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]]

  [[0. 0.]
   [1. 1.]]]]

mean pooling:
[[[[2.44444444 2.11111111]
   [2.88888889 2.33333333]]

  [[1.22222222 1.        ]
   [1.22222222 1.22222222]]

  [[2.66666667 2.77777778]
   [2.44444444 2.22222222]]]]
