In [1]:
import numpy as np

In [2]:
np.random.seed(1)
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride" : 2, "f": 3}

In [3]:
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k.astype(int), i.astype(int), j.astype(int))


def im2col_indices(x, field_height, field_width, padding=1, stride=1):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    p = padding
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride)

    cols = x_padded[:, k, i, j]
    C = x.shape[1]
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols


def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
                   stride=1):
    """ An implementation of col2im based on fancy indexing and np.add.at """
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride)
    cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
    cols_reshaped = cols_reshaped.transpose(2, 0, 1)
    np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]

def conv_forward(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1

    if not h_out.is_integer() or not w_out.is_integer():
        raise Exception('Invalid output dimension!')

    h_out, w_out = int(h_out), int(w_out)

    X_col = im2col_indices(X, h_filter, w_filter, padding=padding, stride=stride)
    W_col = W.reshape(n_filters, -1)

    #out = W_col @ X_col + b
    out = W_col @ X_col
    out = out.reshape(n_filters, h_out, w_out, n_x)
    out = out.transpose(3, 0, 1, 2)

    cache = (X, W, b, stride, padding, X_col)

    return out, cache


def conv_backward(dout, cache):
    X, W, b, stride, padding, X_col = cache
    n_filter, d_filter, h_filter, w_filter = W.shape

    db = np.sum(dout, axis=(0, 2, 3))
    db = db.reshape(n_filter, -1)

    dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(n_filter, -1)
    dW = dout_reshaped @ X_col.T
    dW = dW.reshape(W.shape)

    W_reshape = W.reshape(n_filter, -1)
    dX_col = W_reshape.T @ dout_reshaped
    dX = col2im_indices(dX_col, X.shape, h_filter, w_filter, padding=padding, stride=stride)

    return dX, dW, db

In [4]:
x1 = np.random.randn(1, 3, 3)
x2 = np.random.randn(1, 3, 3)
x = np.array([x1, x2])
#x = np.reshape(x, newshape=(-1, -1, 2, 2))

In [5]:
x

array([[[[-0.34385368,  0.04359686, -0.62000084],
         [ 0.69803203, -0.44712856,  1.2245077 ],
         [ 0.40349164,  0.59357852, -1.09491185]]],


       [[[ 0.16938243,  0.74055645, -0.9537006 ],
         [-0.26621851,  0.03261455, -1.37311732],
         [ 0.31515939,  0.84616065, -0.85951594]]]])

In [6]:
im2col_indices(x, 2, 2, 0, 1)

array([[-0.34385368,  0.16938243,  0.04359686,  0.74055645,  0.69803203,
        -0.26621851, -0.44712856,  0.03261455],
       [ 0.04359686,  0.74055645, -0.62000084, -0.9537006 , -0.44712856,
         0.03261455,  1.2245077 , -1.37311732],
       [ 0.69803203, -0.26621851, -0.44712856,  0.03261455,  0.40349164,
         0.31515939,  0.59357852,  0.84616065],
       [-0.44712856,  0.03261455,  1.2245077 , -1.37311732,  0.59357852,
         0.84616065, -1.09491185, -0.85951594]])

In [7]:
x1 = np.random.randn(1, 3, 3)
x2 = np.random.randn(1, 3, 3)
x = np.array([x1])

In [8]:
x

array([[[[ 0.35054598, -1.31228341, -0.03869551],
         [-1.61577235,  1.12141771,  0.40890054],
         [-0.02461696, -0.77516162,  1.27375593]]]])

In [9]:
im2col_indices(x, 2, 2, 0, 1)

array([[ 0.35054598, -1.31228341, -1.61577235,  1.12141771],
       [-1.31228341, -0.03869551,  1.12141771,  0.40890054],
       [-1.61577235,  1.12141771, -0.02461696, -0.77516162],
       [ 1.12141771,  0.40890054, -0.77516162,  1.27375593]])

In [10]:
w = np.random.randn(3, 1, 2, 2)

In [11]:
w

array([[[[-1.23005814,  0.5505375 ],
         [ 0.79280687, -0.62353073]]],


       [[[ 0.52057634, -1.14434139],
         [ 0.80186103,  0.0465673 ]]],


       [[[-0.18656977, -0.10174587],
         [ 0.86888616,  0.75041164]]]])

In [12]:
w.reshape(3, -1)

array([[-1.23005814,  0.5505375 ,  0.79280687, -0.62353073],
       [ 0.52057634, -1.14434139,  0.80186103,  0.0465673 ],
       [-0.18656977, -0.10174587,  0.86888616,  0.75041164]])

In [13]:
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python/31404264#31404264

In [23]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (0,0), (pad, pad), (pad, pad)), 'constant', constant_values=(0, 0))
    return X_pad

def im2col_sliding(image, f, pad=0, stride=1):
    M, C, h, w, = image.shape
    if pad > 0:
        image_pad = zero_pad(image, pad)
        #print(image_pad)
    else:
        image_pad = image
    
    h_new = int((h - f + 2*pad) / stride + 1)
    w_new = int((w - f + 2*pad) / stride + 1)
    print('h_new: {}'.format(h_new))
    print('w_new: {}'.format(w_new))
    
    output_vectors = np.zeros((M*h_new*w_new, f*f*C))
    print('output_vectors: {}'.format(output_vectors.shape))
    
    itr = 0
    for m in range(M):
        for i in range(h_new):
            for j in range(w_new):
                start_i = stride * i
                end_i = stride * i + f
                start_j = stride * j
                end_j = stride * j + f
                output_vectors[itr, :] = image_pad[m, :, start_i:end_i, start_j:end_j].ravel()
                itr += 1                    
    return output_vectors

def col2img_sliding(cols,  x_shape, f, pad=0, stride=1):
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * pad, W + 2 * pad
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    
    idx = 0
    for i in range(H_padded):
        for j in range(W_padded):
            col = cols[idx, :]
            start_i =     

def conv_forward_naive(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = int((h_x - h_filter + 2 * padding) / stride) + 1
    w_out = int((w_x - w_filter + 2 * padding) / stride) + 1
    
    X_col = im2col_sliding(X, f=h_filter, pad=padding, stride=stride)
    W_col = W.reshape(-1, d_filter*h_filter*w_filter).T
    out = X_col @ W_col + b
    out = out.T
    out = out.reshape(n_x, n_filters, w_out, h_out)
    return out

In [24]:
X = np.arange(1, 37).reshape(2, 2, 3, 3)
W = np.random.randn(2, 2, 2, 2)
b = np.random.randn(2,)
#conv_forward_naive(X, W, b)

out, _ = conv_forward(X, W, b, stride=1, padding=1)
print(out)
out.shape

[[[[ -19.13831239    1.19283214    1.80767911   27.77403072]
   [ -23.51280678   14.9051147    16.67198877   48.41418362]
   [ -29.97532078   20.20573691   21.97261098   60.17731982]
   [   1.34005435   18.77990424   19.93193134   19.99322583]]

  [[  -3.12171341   12.18524078   15.50908096   23.56246247]
   [  -6.09119687   21.26005167   24.65581681   33.72508878]
   [  -5.67350685   31.44734707   34.8431122    43.49469416]
   [   2.68938036   -0.46515992   -0.39323496   -5.03620832]]]


 [[[ -56.41746588   12.26007761   12.87492458   76.12042967]
   [ -62.28789078   46.70884794   48.475722    118.99300086]
   [ -68.75040478   52.00947014   53.77634421  130.75613706]
   [  -0.15587616   39.51639201   40.6684191    42.22564412]]

  [[ -12.76868095   72.01436399   75.33820416   93.03855322]
   [  -3.58505673   82.38382405   85.77958919   92.34272102]
   [  -3.1673667    92.57111945   95.96688458  102.11232639]
   [  14.84248805    0.82948926    0.90141421  -15.89466683]]]]


(2, 2, 4, 4)

In [25]:
conv_forward_naive(X, W, b)

[[[[ 0  0  0  0  0]
   [ 0  1  2  3  0]
   [ 0  4  5  6  0]
   [ 0  7  8  9  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 10 11 12  0]
   [ 0 13 14 15  0]
   [ 0 16 17 18  0]
   [ 0  0  0  0  0]]]


 [[[ 0  0  0  0  0]
   [ 0 19 20 21  0]
   [ 0 22 23 24  0]
   [ 0 25 26 27  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 28 29 30  0]
   [ 0 31 32 33  0]
   [ 0 34 35 36  0]
   [ 0  0  0  0  0]]]]
h_new: 4
w_new: 4
output_vectors: (32, 8)
[[[[ -18.45824255    1.87290198    2.48774895   28.45410056]
   [ -22.83273694   15.58518454   17.35205861   49.09425346]
   [ -29.29525094   20.88580675   22.65268082   60.85738966]
   [   2.02012419   19.45997408   20.61200118   20.67329567]]

  [[ -55.73739604   12.94014745   13.55499442   76.80049951]
   [ -61.60782094   47.38891778   49.15579184  119.6730707 ]
   [ -68.07033494   52.68953998   54.45641405  131.4362069 ]
   [   0.52419368   40.19646185   41.34848894   42.90571396]]]


 [[[  -3.44151501   11.86543918   15.18927936   23.24

In [17]:
n_filters, d_filter, h_filter, w_filter = W.shape

In [18]:
W.reshape(-1, d_filter*h_filter*w_filter).shape

(2, 8)

In [19]:
b = np.linspace(-0.1, 0.2, num=3)

In [20]:
b

array([-0.1 ,  0.05,  0.2 ])

In [21]:
xx = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10,11,12]])
xx

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [22]:
xx.transpose(1, 0)

array([[ 1,  4,  7, 10],
       [ 2,  5,  8, 11],
       [ 3,  6,  9, 12]])