In [1]:
import numpy as np

In [2]:
np.random.seed(1)
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride" : 2, "f": 3}

In [3]:
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k.astype(int), i.astype(int), j.astype(int))


def im2col_indices(x, field_height, field_width, padding=1, stride=1):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    p = padding
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride)

    cols = x_padded[:, k, i, j]
    C = x.shape[1]
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols


def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
                   stride=1):
    """ An implementation of col2im based on fancy indexing and np.add.at """
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride)
    cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
    cols_reshaped = cols_reshaped.transpose(2, 0, 1)
    np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]

def conv_forward(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1

    if not h_out.is_integer() or not w_out.is_integer():
        raise Exception('Invalid output dimension!')

    h_out, w_out = int(h_out), int(w_out)

    X_col = im2col_indices(X, h_filter, w_filter, padding=padding, stride=stride)
    W_col = W.reshape(n_filters, -1)

    #out = W_col @ X_col + b
    out = W_col @ X_col
    out = out.reshape(n_filters, h_out, w_out, n_x)
    out = out.transpose(3, 0, 1, 2)

    cache = (X, W, b, stride, padding, X_col)

    return out, cache


def conv_backward(dout, cache):
    X, W, b, stride, padding, X_col = cache
    n_filter, d_filter, h_filter, w_filter = W.shape

    db = np.sum(dout, axis=(0, 2, 3))
    db = db.reshape(n_filter, -1)

    dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(n_filter, -1)
    dW = dout_reshaped @ X_col.T
    dW = dW.reshape(W.shape)

    W_reshape = W.reshape(n_filter, -1)
    dX_col = W_reshape.T @ dout_reshaped
    dX = col2im_indices(dX_col, X.shape, h_filter, w_filter, padding=padding, stride=stride)

    return dX, dW, db

In [4]:
x1 = np.random.randn(1, 3, 3)
x2 = np.random.randn(1, 3, 3)
x = np.array([x1, x2])
#x = np.reshape(x, newshape=(-1, -1, 2, 2))

In [5]:
x

array([[[[-0.34385368,  0.04359686, -0.62000084],
         [ 0.69803203, -0.44712856,  1.2245077 ],
         [ 0.40349164,  0.59357852, -1.09491185]]],


       [[[ 0.16938243,  0.74055645, -0.9537006 ],
         [-0.26621851,  0.03261455, -1.37311732],
         [ 0.31515939,  0.84616065, -0.85951594]]]])

In [6]:
im2col_indices(x, 2, 2, 0, 1)

array([[-0.34385368,  0.16938243,  0.04359686,  0.74055645,  0.69803203,
        -0.26621851, -0.44712856,  0.03261455],
       [ 0.04359686,  0.74055645, -0.62000084, -0.9537006 , -0.44712856,
         0.03261455,  1.2245077 , -1.37311732],
       [ 0.69803203, -0.26621851, -0.44712856,  0.03261455,  0.40349164,
         0.31515939,  0.59357852,  0.84616065],
       [-0.44712856,  0.03261455,  1.2245077 , -1.37311732,  0.59357852,
         0.84616065, -1.09491185, -0.85951594]])

In [7]:
x1 = np.random.randn(1, 3, 3)
x2 = np.random.randn(1, 3, 3)
x = np.array([x1])

In [8]:
x

array([[[[ 0.35054598, -1.31228341, -0.03869551],
         [-1.61577235,  1.12141771,  0.40890054],
         [-0.02461696, -0.77516162,  1.27375593]]]])

In [9]:
im2col_indices(x, 2, 2, 0, 1)

array([[ 0.35054598, -1.31228341, -1.61577235,  1.12141771],
       [-1.31228341, -0.03869551,  1.12141771,  0.40890054],
       [-1.61577235,  1.12141771, -0.02461696, -0.77516162],
       [ 1.12141771,  0.40890054, -0.77516162,  1.27375593]])

In [10]:
w = np.random.randn(3, 1, 2, 2)

In [11]:
w

array([[[[-1.23005814,  0.5505375 ],
         [ 0.79280687, -0.62353073]]],


       [[[ 0.52057634, -1.14434139],
         [ 0.80186103,  0.0465673 ]]],


       [[[-0.18656977, -0.10174587],
         [ 0.86888616,  0.75041164]]]])

In [12]:
w.reshape(3, -1)

array([[-1.23005814,  0.5505375 ,  0.79280687, -0.62353073],
       [ 0.52057634, -1.14434139,  0.80186103,  0.0465673 ],
       [-0.18656977, -0.10174587,  0.86888616,  0.75041164]])

In [13]:
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python/31404264#31404264

In [14]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (0,0), (pad, pad), (pad, pad)), 'constant', constant_values=(0, 0))
    return X_pad

def im2col_sliding(image, f, pad=0, stride=1):
    M, C, h, w, = image.shape
    if pad > 0:
        image_pad = zero_pad(image, pad)
        #print(image_pad)
    else:
        image_pad = image
    
    h_new = int((h - f + 2*pad) / stride + 1)
    w_new = int((w - f + 2*pad) / stride + 1)
    print('h_new: {}'.format(h_new))
    print('w_new: {}'.format(w_new))
    
    output_vectors = np.zeros((M*h_new*w_new, f*f*C))
    print('output_vectors: {}'.format(output_vectors.shape))
    
    itr = 0
    for m in range(M):
        for i in range(h_new):
            for j in range(w_new):
                start_i = stride * i
                end_i = stride * i + f
                start_j = stride * j
                end_j = stride * j + f
                output_vectors[itr, :] = image_pad[m, :, start_i:end_i, start_j:end_j].ravel()
                itr += 1                    
    return output_vectors

def col2img_sliding(cols,  x_shape, f, pad=0, stride=1):
    #N, C, H, W = x_shape
    H, W = x_shape
    H_padded, W_padded = H + 2 * pad, W + 2 * pad
    #x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    x_padded = np.zeros((H_padded, W_padded), dtype=cols.dtype)
    
    idx = 0
    for i in range(0, H_padded -f + 1, stride):
        for j in range(0, W_padded - f + 1, stride):
            #print('i: {}, j:{}'.format(i, j))
            col = cols[idx, :]
            col = col.reshape((f, f))
            
            x_padded[i:i+stride, j:j+stride] = col[0:stride, 0:stride]
            
            if stride < f:
                if (j == W_padded - f) and (i == H_padded - f):
                    x_padded[i:i+f, j:j+f] = col
                elif j == W_padded - f:
                    x_padded[i:i+stride, j+stride:] = col[0:stride, stride:]
                elif i == H_padded - f:
                    x_padded[i+stride:, j:j+stride] = col[stride:, 0:stride]
            
            #if j == W_padded - f:
                #print(col[0:stride, stride:])
                #x_padded[i:i+stride, j+stride:] = col[0:stride, stride:]
            
            #if i == H_padded - f:
                #print(col[stride:, 0:stride])
                #x_padded[i+stride:, j:j+stride] = col[stride:, 0:stride]
                
            #if (j == W_padded - f) and (i == H_padded - f):
                #x_padded[i:i+f, j:j+f] = col
            
            #print(x_padded)
            idx += 1
    if pad > 0:
        return x_padded[pad:-pad, pad:-pad]
    else:
        return x_padded

def conv_forward_naive(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = int((h_x - h_filter + 2 * padding) / stride) + 1
    w_out = int((w_x - w_filter + 2 * padding) / stride) + 1
    
    X_col = im2col_sliding(X, f=h_filter, pad=padding, stride=stride)
    W_col = W.reshape(-1, d_filter*h_filter*w_filter).T
    out = X_col @ W_col + b
    out = out.T
    out = out.reshape(n_x, n_filters, w_out, h_out)
    return out

In [15]:
zz = np.array([[1, 2, 5, 6], [2, 3, 6, 7], [3, 4, 7, 8], [5, 6, 9, 10], [6, 7, 10, 11], [7, 8, 11, 12]])
print(zz)
col2img_sliding(zz, (3, 4), 2)

#zz = np.array([[1, 2, 5, 6], [3, 4, 7, 8]])
#col2img_sliding(zz, (2, 4), 2, stride=2, pad=1)

#zz = np.array([[0, 0, 0, 1], [0, 0, 2, 0], [0, 3, 0, 0], [4, 0, 0, 0]])
#col2img_sliding(zz, (2, 2), 2, stride=2, pad=1)

[[ 1  2  5  6]
 [ 2  3  6  7]
 [ 3  4  7  8]
 [ 5  6  9 10]
 [ 6  7 10 11]
 [ 7  8 11 12]]
i: 0, j:0
i: 0, j:1
i: 0, j:2
i: 1, j:0
i: 1, j:1
i: 1, j:2


array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [16]:
X = np.arange(1, 37).reshape(2, 2, 3, 3)
W = np.random.randn(2, 2, 2, 2)
b = np.random.randn(2,)
#conv_forward_naive(X, W, b)

out, _ = conv_forward(X, W, b, stride=1, padding=1)
print(out)
out.shape

[[[[-23.72999738 -28.56980149 -30.61855464  -3.4879379 ]
   [-22.21615461 -24.07818212 -24.54472277   0.19350585]
   [-25.20476926 -25.47780406 -25.94434471   1.78249856]
   [ 11.88473098  20.13115381  21.71336631   8.95008998]]

  [[-11.32034252 -11.92705796 -12.69045807   1.53300623]
   [-13.99042664 -11.4785179   -9.27161269   4.38846466]
   [-11.45317215  -4.85780227  -2.65089707   8.47192579]
   [ 13.40399052  20.56057229  23.53087761   6.89341603]]]


 [[[-56.42623242 -65.44735815 -67.4961113   -7.66925952]
   [-40.14784254 -32.47591377 -32.94245442   9.72746213]
   [-43.1364572  -33.87553571 -34.34207636  11.31645484]
   [ 26.64927808  48.61097882  50.19319132  22.66536789]]

  [[-33.31947384 -25.66825994 -26.43166005   9.79093556]
   [  1.23310031  28.24577583  30.45268104  28.88923144]
   [  3.7703548   34.86649145  37.07339666  32.97269257]
   [ 50.62664879  74.026068    76.99637332  23.13625347]]]]


(2, 2, 4, 4)

In [17]:
conv_forward_naive(X, W, b)

h_new: 4
w_new: 4
output_vectors: (32, 8)


array([[[[-24.24709184, -29.08689595, -31.13564909,  -4.00503236],
         [-22.73324906, -24.59527658, -25.06181723,  -0.32358861],
         [-25.72186372, -25.99489852, -26.46143917,   1.2654041 ],
         [ 11.36763652,  19.61405936,  21.19627186,   8.43299553]],

        [[-56.94332687, -65.96445261, -68.01320575,  -8.18635398],
         [-40.664937  , -32.99300823, -33.45954888,   9.21036767],
         [-43.65355165, -34.39263017, -34.85917082,  10.79936039],
         [ 26.13218363,  48.09388436,  49.67609686,  22.14827343]]],


       [[[-12.31736935, -12.92408478, -13.68748489,   0.5359794 ],
         [-14.98745347, -12.47554472, -10.26863952,   3.39143784],
         [-12.45019897,  -5.8548291 ,  -3.6479239 ,   7.47489897],
         [ 12.4069637 ,  19.56354547,  22.53385078,   5.8963892 ]],

        [[-34.31650066, -26.66528677, -27.42868688,   8.79390874],
         [  0.23607348,  27.248749  ,  29.45565421,  27.89220461],
         [  2.77332798,  33.86946462,  36.07636983,  3

In [18]:
n_filters, d_filter, h_filter, w_filter = W.shape

In [19]:
W.reshape(-1, d_filter*h_filter*w_filter).shape

(2, 8)

In [20]:
b = np.linspace(-0.1, 0.2, num=3)

In [21]:
b

array([-0.1 ,  0.05,  0.2 ])

In [22]:
xx = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10,11,12]])
xx

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
xx.transpose(1, 0)

array([[ 1,  4,  7, 10],
       [ 2,  5,  8, 11],
       [ 3,  6,  9, 12]])

In [24]:
r = np.arange(20)
for i in range(0, len(r), 1):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [25]:
col = np.array([[ 5,  6], [ 9, 10]])
col

array([[ 5,  6],
       [ 9, 10]])

In [26]:
col[1:, 0:1]

array([[9]])

In [27]:
dd = np.array([[9]])

In [28]:
dd

array([[9]])

In [29]:
dd.shape

(1, 1)