In [1]:
import numpy as np

In [2]:
np.random.seed(1)
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride" : 2, "f": 3}

In [3]:
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k.astype(int), i.astype(int), j.astype(int))


def im2col_indices(x, field_height, field_width, padding=1, stride=1):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    p = padding
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride)

    cols = x_padded[:, k, i, j]
    C = x.shape[1]
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols


def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
                   stride=1):
    """ An implementation of col2im based on fancy indexing and np.add.at """
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride)
    cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
    cols_reshaped = cols_reshaped.transpose(2, 0, 1)
    np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]

def conv_forward(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1

    if not h_out.is_integer() or not w_out.is_integer():
        raise Exception('Invalid output dimension!')

    h_out, w_out = int(h_out), int(w_out)

    X_col = im2col_indices(X, h_filter, w_filter, padding=padding, stride=stride)
    W_col = W.reshape(n_filters, -1)

    #out = W_col @ X_col + b
    out = W_col @ X_col
    out = out.reshape(n_filters, h_out, w_out, n_x)
    out = out.transpose(3, 0, 1, 2)

    cache = (X, W, b, stride, padding, X_col)

    return out, cache


def conv_backward(dout, cache):
    X, W, b, stride, padding, X_col = cache
    n_filter, d_filter, h_filter, w_filter = W.shape

    db = np.sum(dout, axis=(0, 2, 3))
    db = db.reshape(n_filter, -1)

    dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(n_filter, -1)
    dW = dout_reshaped @ X_col.T
    dW = dW.reshape(W.shape)

    W_reshape = W.reshape(n_filter, -1)
    dX_col = W_reshape.T @ dout_reshaped
    dX = col2im_indices(dX_col, X.shape, h_filter, w_filter, padding=padding, stride=stride)

    return dX, dW, db

In [78]:
from builtins import range
import numpy as np


def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = (H + 2 * padding - field_height) // stride + 1
    out_width = (W + 2 * padding - field_width) // stride + 1

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k, i, j)


def im2col_indices(x, field_height, field_width, padding=1, stride=1):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    p = padding
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
                                 stride)

    cols = x_padded[:, k, i, j]
    C = x.shape[1]
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols


def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
                   stride=1):
    """ An implementation of col2im based on fancy indexing and np.add.at """
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
                                 stride)
    cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
    cols_reshaped = cols_reshaped.transpose(2, 0, 1)
    np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]

In [79]:
xx = np.arange(1, 19).reshape((2, 1, 3, 3))
print(xx)
i2cc = im2col_indices(xx, 2, 2, 0, 1)
print(i2cc)
col2im_indices(i2cc, (2, 1, 3, 3), field_height=2, field_width=2, padding=0, stride=1)

[[[[ 1  2  3]
   [ 4  5  6]
   [ 7  8  9]]]


 [[[10 11 12]
   [13 14 15]
   [16 17 18]]]]
[[ 1 10  2 11  4 13  5 14]
 [ 2 11  3 12  5 14  6 15]
 [ 4 13  5 14  7 16  8 17]
 [ 5 14  6 15  8 17  9 18]]


array([[[[ 1,  4,  3],
         [ 8, 20, 12],
         [ 7, 16,  9]]],


       [[[10, 22, 12],
         [26, 56, 30],
         [16, 34, 18]]]])

In [5]:
x

array([[[[-0.34385368,  0.04359686, -0.62000084],
         [ 0.69803203, -0.44712856,  1.2245077 ],
         [ 0.40349164,  0.59357852, -1.09491185]]],


       [[[ 0.16938243,  0.74055645, -0.9537006 ],
         [-0.26621851,  0.03261455, -1.37311732],
         [ 0.31515939,  0.84616065, -0.85951594]]]])

In [6]:
im2col_indices(x, 2, 2, 0, 1)

array([[-0.34385368,  0.16938243,  0.04359686,  0.74055645,  0.69803203,
        -0.26621851, -0.44712856,  0.03261455],
       [ 0.04359686,  0.74055645, -0.62000084, -0.9537006 , -0.44712856,
         0.03261455,  1.2245077 , -1.37311732],
       [ 0.69803203, -0.26621851, -0.44712856,  0.03261455,  0.40349164,
         0.31515939,  0.59357852,  0.84616065],
       [-0.44712856,  0.03261455,  1.2245077 , -1.37311732,  0.59357852,
         0.84616065, -1.09491185, -0.85951594]])

In [7]:
x1 = np.random.randn(1, 3, 3)
x2 = np.random.randn(1, 3, 3)
x = np.array([x1])

In [8]:
x

array([[[[ 0.35054598, -1.31228341, -0.03869551],
         [-1.61577235,  1.12141771,  0.40890054],
         [-0.02461696, -0.77516162,  1.27375593]]]])

In [9]:
im2col_indices(x, 2, 2, 0, 1)

array([[ 0.35054598, -1.31228341, -1.61577235,  1.12141771],
       [-1.31228341, -0.03869551,  1.12141771,  0.40890054],
       [-1.61577235,  1.12141771, -0.02461696, -0.77516162],
       [ 1.12141771,  0.40890054, -0.77516162,  1.27375593]])

In [10]:
w = np.random.randn(3, 1, 2, 2)

In [11]:
w

array([[[[-1.23005814,  0.5505375 ],
         [ 0.79280687, -0.62353073]]],


       [[[ 0.52057634, -1.14434139],
         [ 0.80186103,  0.0465673 ]]],


       [[[-0.18656977, -0.10174587],
         [ 0.86888616,  0.75041164]]]])

In [12]:
w.reshape(3, -1)

array([[-1.23005814,  0.5505375 ,  0.79280687, -0.62353073],
       [ 0.52057634, -1.14434139,  0.80186103,  0.0465673 ],
       [-0.18656977, -0.10174587,  0.86888616,  0.75041164]])

In [13]:
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python
# https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python/31404264#31404264

In [14]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (0,0), (pad, pad), (pad, pad)), 'constant', constant_values=(0, 0))
    return X_pad

def im2col_sliding(image, f, pad=0, stride=1):
    M, C, h, w, = image.shape
    if pad > 0:
        image_pad = zero_pad(image, pad)
        #print(image_pad)
    else:
        image_pad = image
    
    h_new = int((h - f + 2*pad) / stride + 1)
    w_new = int((w - f + 2*pad) / stride + 1)
    print('h_new: {}'.format(h_new))
    print('w_new: {}'.format(w_new))
    
    output_vectors = np.zeros((M*h_new*w_new, f*f*C))
    print('output_vectors: {}'.format(output_vectors.shape))
    
    itr = 0
    for m in range(M):
        for i in range(h_new):
            for j in range(w_new):
                start_i = stride * i
                end_i = stride * i + f
                start_j = stride * j
                end_j = stride * j + f
                output_vectors[itr, :] = image_pad[m, :, start_i:end_i, start_j:end_j].ravel()
                itr += 1                    
    return output_vectors

def col2img_sliding(cols,  x_shape, f, pad=0, stride=1):
    N, C, H, W = x_shape
    #C, H, W = x_shape
    H_padded, W_padded = H + 2 * pad, W + 2 * pad
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    #x_padded = np.zeros((C, H_padded, W_padded), dtype=cols.dtype)
    
    idx = 0
    for m in range(N):
        for i in range(0, H_padded -f + 1, stride):
            for j in range(0, W_padded - f + 1, stride):
                col = cols[idx, :]
                col = col.reshape((C, f, f))
            
                x_padded[m, :, i:i+stride, j:j+stride] = col[:, 0:stride, 0:stride]
            
                if stride < f:
                    if (j == W_padded - f) and (i == H_padded - f):
                        x_padded[m, :, i:i+f, j:j+f] = col
                    elif j == W_padded - f:
                        x_padded[m, :, i:i+stride, j+stride:] = col[:, 0:stride, stride:]
                    elif i == H_padded - f:
                        x_padded[m, :, i+stride:, j:j+stride] = col[:, stride:, 0:stride]
                idx += 1
    if pad > 0:
        return x_padded[:, :, pad:-pad, pad:-pad]
    else:
        return x_padded

def conv_forward_naive(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = int((h_x - h_filter + 2 * padding) / stride) + 1
    w_out = int((w_x - w_filter + 2 * padding) / stride) + 1
    
    X_col = im2col_sliding(X, f=h_filter, pad=padding, stride=stride)
    W_col = W.reshape(-1, d_filter*h_filter*w_filter).T
    out = X_col @ W_col + b
    out = out.T
    out = out.reshape(n_x, n_filters, w_out, h_out)
    
    cache = (X, W, b, stride, padding, X_col)
    
    return out, cache

def conv_backword_naive(dout, cache):
    X, W, b, stride, padding, X_col = cache
    n_filter, d_filter, h_filter, w_filter = W.shape

    db = np.sum(dout, axis=(0, 2, 3))
    db = db.reshape(n_filter, -1)

    dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(n_filter, -1)
    dW = dout_reshaped @ X_col.T
    dW = dW.reshape(W.shape)

    W_reshape = W.reshape(n_filter, -1)
    dX_col = W_reshape.T @ dout_reshaped
    dX = col2img_sliding(dX_col, X.shape, h_filter, pad=padding, stride=stride)

    return dX, dW, db
    

In [15]:
zz = np.array([[1, 2, 5, 6], [2, 3, 6, 7], [3, 4, 7, 8], [5, 6, 9, 10], [6, 7, 10, 11], [7, 8, 11, 12]])
print(zz)
#col2img_sliding(zz, (3, 4), 2)

#zz = np.array([[1, 2, 5, 6], [3, 4, 7, 8]])
#col2img_sliding(zz, (2, 4), 2, stride=2, pad=1)

#zz = np.array([[0, 0, 0, 1], [0, 0, 2, 0], [0, 3, 0, 0], [4, 0, 0, 0]])
#col2img_sliding(zz, (2, 2), 2, stride=2, pad=1)

[[ 1  2  5  6]
 [ 2  3  6  7]
 [ 3  4  7  8]
 [ 5  6  9 10]
 [ 6  7 10 11]
 [ 7  8 11 12]]


In [16]:
X = np.arange(1, 37).reshape(2, 2, 3, 3)
W = np.random.randn(2, 2, 2, 2)
b = np.random.randn(2,)
#conv_forward_naive(X, W, b)

out, _ = conv_forward(X, W, b, stride=1, padding=1)
print(out)
out.shape

[[[[-23.72999738 -28.56980149 -30.61855464  -3.4879379 ]
   [-22.21615461 -24.07818212 -24.54472277   0.19350585]
   [-25.20476926 -25.47780406 -25.94434471   1.78249856]
   [ 11.88473098  20.13115381  21.71336631   8.95008998]]

  [[-11.32034252 -11.92705796 -12.69045807   1.53300623]
   [-13.99042664 -11.4785179   -9.27161269   4.38846466]
   [-11.45317215  -4.85780227  -2.65089707   8.47192579]
   [ 13.40399052  20.56057229  23.53087761   6.89341603]]]


 [[[-56.42623242 -65.44735815 -67.4961113   -7.66925952]
   [-40.14784254 -32.47591377 -32.94245442   9.72746213]
   [-43.1364572  -33.87553571 -34.34207636  11.31645484]
   [ 26.64927808  48.61097882  50.19319132  22.66536789]]

  [[-33.31947384 -25.66825994 -26.43166005   9.79093556]
   [  1.23310031  28.24577583  30.45268104  28.88923144]
   [  3.7703548   34.86649145  37.07339666  32.97269257]
   [ 50.62664879  74.026068    76.99637332  23.13625347]]]]


(2, 2, 4, 4)

In [17]:
conv_forward_naive(X, W, b)

h_new: 4
w_new: 4
output_vectors: (32, 8)


(array([[[[-24.24709184, -29.08689595, -31.13564909,  -4.00503236],
          [-22.73324906, -24.59527658, -25.06181723,  -0.32358861],
          [-25.72186372, -25.99489852, -26.46143917,   1.2654041 ],
          [ 11.36763652,  19.61405936,  21.19627186,   8.43299553]],
 
         [[-56.94332687, -65.96445261, -68.01320575,  -8.18635398],
          [-40.664937  , -32.99300823, -33.45954888,   9.21036767],
          [-43.65355165, -34.39263017, -34.85917082,  10.79936039],
          [ 26.13218363,  48.09388436,  49.67609686,  22.14827343]]],
 
 
        [[[-12.31736935, -12.92408478, -13.68748489,   0.5359794 ],
          [-14.98745347, -12.47554472, -10.26863952,   3.39143784],
          [-12.45019897,  -5.8548291 ,  -3.6479239 ,   7.47489897],
          [ 12.4069637 ,  19.56354547,  22.53385078,   5.8963892 ]],
 
         [[-34.31650066, -26.66528677, -27.42868688,   8.79390874],
          [  0.23607348,  27.248749  ,  29.45565421,  27.89220461],
          [  2.77332798,  33.8694646

In [18]:
n_filters, d_filter, h_filter, w_filter = W.shape

In [19]:
W.reshape(-1, d_filter*h_filter*w_filter).shape

(2, 8)

In [20]:
b = np.linspace(-0.1, 0.2, num=3)

In [21]:
b

array([-0.1 ,  0.05,  0.2 ])

In [22]:
xx = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10,11,12]])
xx

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
dd = np.array([[9]])

In [24]:
dd

array([[9]])

In [25]:
dd.shape

(1, 1)

In [26]:
x = np.arange(1, 19).reshape((2, 3, 3))
x

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [27]:
#rr = np.array([[1, 2, 3, 4, 10, 11, 13, 14],
         #[2, 3, 5, 6, 11, 12, 14, 15],
         #[4, 5, 7, 8, 13, 14, 16, 17],
         #[5, 6, 8, 9, 14, 15, 17, 18]])
#col2img_sliding(rr, (2, 3, 3), 2)

In [28]:
np.array([ 1,  2,  3,  4, 10, 11, 13, 14]).reshape((2, 2, 2))

array([[[ 1,  2],
        [ 3,  4]],

       [[10, 11],
        [13, 14]]])

In [29]:
img = np.arange(64*30*30*30).reshape((64, 30, 30, 30))

In [30]:
im2col = im2col_sliding(img, 2)
#im2col

h_new: 29
w_new: 29
output_vectors: (53824, 120)


In [31]:
%%timeit 
img_ = col2img_sliding(im2col, (64, 30, 30, 30), 2)

10 loops, best of 3: 120 ms per loop


In [32]:
np.allclose(img, col2img_sliding(im2col, (64, 30, 30, 30), 2))

True

In [33]:
X = np.arange(1, 37).reshape(2, 2, 3, 3)
im2col = im2col_sliding(X, 2, pad=1, stride=1)

h_new: 4
w_new: 4
output_vectors: (32, 8)


In [34]:
col2img_sliding(im2col, (2, 2, 3, 3), 2, pad=1, stride=1)

array([[[[  1.,   2.,   3.],
         [  4.,   5.,   6.],
         [  7.,   8.,   9.]],

        [[ 10.,  11.,  12.],
         [ 13.,  14.,  15.],
         [ 16.,  17.,  18.]]],


       [[[ 19.,  20.,  21.],
         [ 22.,  23.,  24.],
         [ 25.,  26.,  27.]],

        [[ 28.,  29.,  30.],
         [ 31.,  32.,  33.],
         [ 34.,  35.,  36.]]]])

In [35]:
X 

array([[[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],

        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]],


       [[[19, 20, 21],
         [22, 23, 24],
         [25, 26, 27]],

        [[28, 29, 30],
         [31, 32, 33],
         [34, 35, 36]]]])

In [36]:
default = object()

class Cout(object):
    def __init__(self, s=''):
        self.s = str(s)
    def __lshift__(self, s):
        return Cout(self.s + str(s))
    def __repr__(self):
        print(self.s, end='')
        return ''
cout = Cout()
endl = '\n'
            
class Case(object):
    def __call__(self, x):
        return x
case = Case()

class Switch(object):
    def __call__(self, obj):
        self.obj = obj
        return self
    def __or__(self, items):
        output = items.get(self.obj, items[default])
        if isinstance(output, Cout):
            print(output.s)
        else:
            return output
switch = Switch()

In [37]:
x = 1

switch (x) | {
    case (1):
        cout << "about sqrt(2), as much as it matters",
    case (2):
        cout << "about e, more or less",
    case (3):
        cout << "about pi, to a good approximation",
    default:
        cout << "I don't know what you're talking about",
}

about sqrt(2), as much as it matters


In [38]:
r = np.random.rand(2, 4, 4)
r

array([[[  1.30996845e-01,   8.09490692e-01,   3.44736653e-01,
           9.40107482e-01],
        [  5.82014180e-01,   8.78831984e-01,   8.44734445e-01,
           9.05392319e-01],
        [  4.59880266e-01,   5.46346816e-01,   7.98603591e-01,
           2.85718852e-01],
        [  4.90253523e-01,   5.99110308e-01,   1.55332756e-02,
           5.93481408e-01]],

       [[  4.33676349e-01,   8.07360529e-01,   3.15244803e-01,
           8.92888709e-01],
        [  5.77857215e-01,   1.84010202e-01,   7.87929234e-01,
           6.12031177e-01],
        [  5.39092721e-02,   4.20193680e-01,   6.79068837e-01,
           9.18601778e-01],
        [  4.02024891e-04,   9.76759149e-01,   3.76580315e-01,
           9.73783538e-01]]])

In [39]:
r.transpose((1, 0, 2))

array([[[  1.30996845e-01,   8.09490692e-01,   3.44736653e-01,
           9.40107482e-01],
        [  4.33676349e-01,   8.07360529e-01,   3.15244803e-01,
           8.92888709e-01]],

       [[  5.82014180e-01,   8.78831984e-01,   8.44734445e-01,
           9.05392319e-01],
        [  5.77857215e-01,   1.84010202e-01,   7.87929234e-01,
           6.12031177e-01]],

       [[  4.59880266e-01,   5.46346816e-01,   7.98603591e-01,
           2.85718852e-01],
        [  5.39092721e-02,   4.20193680e-01,   6.79068837e-01,
           9.18601778e-01]],

       [[  4.90253523e-01,   5.99110308e-01,   1.55332756e-02,
           5.93481408e-01],
        [  4.02024891e-04,   9.76759149e-01,   3.76580315e-01,
           9.73783538e-01]]])

In [40]:
np.reshape?

In [41]:
r.shape

(2, 4, 4)

In [42]:
np.sum(r, axis=(0, 1))

array([ 2.72898967,  5.22210336,  4.16243115,  6.12200526])

In [43]:
r

array([[[  1.30996845e-01,   8.09490692e-01,   3.44736653e-01,
           9.40107482e-01],
        [  5.82014180e-01,   8.78831984e-01,   8.44734445e-01,
           9.05392319e-01],
        [  4.59880266e-01,   5.46346816e-01,   7.98603591e-01,
           2.85718852e-01],
        [  4.90253523e-01,   5.99110308e-01,   1.55332756e-02,
           5.93481408e-01]],

       [[  4.33676349e-01,   8.07360529e-01,   3.15244803e-01,
           8.92888709e-01],
        [  5.77857215e-01,   1.84010202e-01,   7.87929234e-01,
           6.12031177e-01],
        [  5.39092721e-02,   4.20193680e-01,   6.79068837e-01,
           9.18601778e-01],
        [  4.02024891e-04,   9.76759149e-01,   3.76580315e-01,
           9.73783538e-01]]])

In [52]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

out, cache = conv_forward_naive(x, w, b, stride=2, padding=1)
correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

# Compare your output to ours; difference should be around 2e-8
print('Testing conv_forward_naive')
print('difference: ', rel_error(out, correct_out))

print(correct_out.shape)
print(out.shape)

h_new: 2
w_new: 2
output_vectors: (8, 48)
Testing conv_forward_naive
difference:  1.0
(2, 3, 2, 2)
(2, 3, 2, 2)


In [62]:
def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.
    The input consists of N data points, each with C channels, height H and width
    W. We convolve each input with F different filters, where each filter spans
    all C channels and has height HH and width HH.
    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input.
    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    ##########################################################################
    # TODO: Implement the convolutional forward pass.                           #
    # Hint: you can use the function np.pad for padding.                        #
    ##########################################################################
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    S = conv_param['stride']
    P = conv_param['pad']

    # Add padding to each image
    x_pad = np.pad(x, ((0,), (0,), (P,), (P,)), 'constant')
    # Size of the output
    Hh = 1 + int((H + 2 * P - HH) / S)
    Hw = 1 + int((W + 2 * P - WW) / S)

    out = np.zeros((N, F, Hh, Hw))

    for n in range(N):  # First, iterate over all the images
        for f in range(F):  # Second, iterate over all the kernels
            for k in range(Hh):
                for l in range(Hw):
                    out[n, f, k, l] = np.sum(
                        x_pad[n, :, k * S:k * S + HH, l * S:l * S + WW] * w[f, :]) + b[f]

    cache = (x, w, b, conv_param)
    return out, cache

def eval_numerical_gradient_array(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        pos = f(x).copy()
        x[ix] = oldval - h
        neg = f(x).copy()
        x[ix] = oldval
        
        #print('---')
        #print(pos.shape)
        #print(neg.shape)
        #print(grad.shape)
        #print('-----')
        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        it.iternext()
    return grad

np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(x, w, b, stride=2, padding=1)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(x, w, b, stride=2, padding=1)[0], b, dout)

out, cache = conv_forward_naive(x, w, b,  stride=2, padding=1)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-8'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

TypeError: conv_forward_naive() got an unexpected keyword argument 'stride'

In [65]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

conv_param = {'stride': 2, 'pad': 1}
out, _ = conv_forward_naive(x, w, b, conv_param)
correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

# Compare your output to ours; difference should be around 2e-8
print('Testing conv_forward_naive')
print('difference: ', rel_error(out, correct_out))

Testing conv_forward_naive
difference:  2.21214764175e-08


In [87]:
def im2col(x,hh,ww,stride):

    """
    Args:
      x: image matrix to be translated into columns, (C,H,W)
      hh: filter height
      ww: filter width
      stride: stride
    Returns:
      col: (new_h*new_w,hh*ww*C) matrix, each column is a cube that will convolve with a filter
            new_h = (H-hh) // stride + 1, new_w = (W-ww) // stride + 1
    """

    c,h,w = x.shape
    new_h = (h-hh) // stride + 1
    new_w = (w-ww) // stride + 1
    col = np.zeros([new_h*new_w,c*hh*ww])

    for i in range(new_h):
        for j in range(new_w):
            patch = x[...,i*stride:i*stride+hh,j*stride:j*stride+ww]
            col[i*new_w+j,:] = np.reshape(patch,-1)
    return col

def col2im(mul,h_prime,w_prime,C):
    """
      Args:
      mul: (h_prime*w_prime*w,F) matrix, each col should be reshaped to C*h_prime*w_prime when C>0, or h_prime*w_prime when C = 0
      h_prime: reshaped filter height
      w_prime: reshaped filter width
      C: reshaped filter channel, if 0, reshape the filter to 2D, Otherwise reshape it to 3D
    Returns:
      if C == 0: (F,h_prime,w_prime) matrix
      Otherwise: (F,C,h_prime,w_prime) matrix
    """
    F = mul.shape[1]
    if(C == 1):
        out = np.zeros([F,h_prime,w_prime])
        for i in range(F):
            col = mul[:,i]
            out[i,:,:] = np.reshape(col,(h_prime,w_prime))
    else:
        out = np.zeros([F,C,h_prime,w_prime])
        for i in range(F):
            col = mul[:,i]
            out[i,:,:] = np.reshape(col,(C,h_prime,w_prime))

    return out

In [92]:
x = np.arange(1, 19).reshape(2, 3, 3)
print(x)
i2c = im2col(x, 2, 2, 1)
print(i2c)
col2im(i2c,3,3,1)

[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]]
[[  1.   2.   4.   5.  10.  11.  13.  14.]
 [  2.   3.   5.   6.  11.  12.  14.  15.]
 [  4.   5.   7.   8.  13.  14.  16.  17.]
 [  5.   6.   8.   9.  14.  15.  17.  18.]]


ValueError: cannot reshape array of size 4 into shape (3,3)