In [3]:
import numpy as np

In [73]:
'''
- expected input volume to be an array of 3d images
- filters is a list of 3d array filters
- biases is a list of bias terms, one for each filter
'''
def convolution(input_volume, filters, biases, stride=1, zero_padding=0):
    
    # assume square images
    num_images, num_channels, _, img_dim_orig = input_volume.shape
    num_filters, _, __, filter_dim = filters.shape
    
    
    # zero padding adds zeroes around the input, but not along the depth dimension of each image
    image = input_volume
    if zero_padding != 0:
        image = np.zeros(shape=(num_images, num_channels, img_dim_orig + 2 * zero_padding, img_dim_orig + 2 * zero_padding))
        image[:, :, zero_padding:-zero_padding, zero_padding:-zero_padding] = input_volume
    
    img_dim = img_dim_orig + 2 * zero_padding
    
    
    # im2col 3d from:
    # https://stackoverflow.com/questions/50292750/python-the-implementation-of-im2col-which-takes-the-advantages-of-6-dimensional
    img_stride, channel_stride, row_stride, col_stride = image.strides
    out_dim = (img_dim - filter_dim) // stride + 1
    col = np.lib.stride_tricks.as_strided(image, shape=(num_images, out_dim, out_dim, num_channels, filter_dim, filter_dim), strides=(img_stride, stride * row_stride, stride * col_stride, channel_stride, row_stride, col_stride)).astype(float)
    col = col.reshape(np.multiply.reduceat(col.shape, (0, 3)))
    
    # each 2d slice of col has rows containing each extended receptive field
    # similarly, the filters will be flattened into a 2d array (col: each filter stretched out)
    filt_stride, filt_depth_stride, filt_row_stride, filt_col_stride = filters.strides
    filt_col = np.lib.stride_tricks.as_strided(filters, shape=(num_channels * filter_dim ** 2, num_filters), strides=(filt_col_stride, filt_stride))
    
    # perform matrix multiplication
    # each col is a different filter; every out_dim^2 rows corresponds to one image's convolved activations
    conv = np.dot(col, filt_col)
    conv_row_stride, conv_col_stride = conv.strides
    
    # add bias term (each filter should have one)
    conv += biases
    
    # reshape into list of activation volumes (1 volume per image)
    conv = np.lib.stride_tricks.as_strided(conv, shape=(num_images, num_filters, out_dim, out_dim), strides=(out_dim ** 2 * conv_row_stride, conv_col_stride, out_dim * conv_row_stride, conv_row_stride))

    return conv
        

In [76]:
# test convolution from https://cs231n.github.io/convolutional-networks/
img = np.asarray([0, 2, 0, 2, 2, 0, 1, 2, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 1, 0, 2, 0, 1, 1, 2,
                 0, 0, 0, 2, 1, 2, 0, 1, 2, 1, 2, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
                 1, 0, 0, 2, 1, 2, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 1, 1, 2, 1, 2])
img = img.reshape((1, 3, 5, 5))

w0 = np.asarray([-1, 0, 1, 1, 1, -1, 0, 1, 0,
                -1, -1, 1, -1, 1, -1, 1, 0, 1,
                0, -1, -1, 1, -1, 1, 1, 1, 1])
w0 = w0.reshape((3, 3, 3))

w1 = np.asarray([-1, 1, 1, 0, -1, 0, 0, 0, -1,
                1, 0, 0, -1, -1, -1, 0, -1, 0,
                0, -1, -1, 1, 1, -1, 1, 1, 0])
w1 = w1.reshape((3, 3, 3))

filters = np.asarray([w0, w1]).reshape((2, 3, 3, 3))
biases = np.asarray([1, 0])

convolution(img, filters, biases, stride=2, zero_padding=1)

array([[[[ 1.,  7., 11.],
         [ 2.,  9., -3.],
         [ 3., -5.,  3.]],

        [[ 0., -5., -1.],
         [-9.,  1.,  3.],
         [-3., -1., -1.]]]])