In [1]:
import numpy as np

In [40]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (0,0), (pad, pad), (pad, pad)), 'constant', constant_values=(0, 0))
    return X_pad

def im2col_sliding(image, f, pad=0, stride=1):
    M, C, h, w, = image.shape
    if pad > 0:
        image_pad = zero_pad(image, pad)
        #print(image_pad)
    else:
        image_pad = image
    
    h_new = int((h - f + 2*pad) / stride + 1)
    w_new = int((w - f + 2*pad) / stride + 1)
    #print('h_new: {}'.format(h_new))
    #print('w_new: {}'.format(w_new))
    
    output_vectors = np.zeros((M*h_new*w_new, f*f*C))
    #print('output_vectors: {}'.format(output_vectors.shape))
    
    itr = 0
    for m in range(M):
        for i in range(h_new):
            for j in range(w_new):
                start_i = stride * i
                end_i = stride * i + f
                start_j = stride * j
                end_j = stride * j + f
                output_vectors[itr, :] = image_pad[m, :, start_i:end_i, start_j:end_j].ravel()
                itr += 1                    
    return output_vectors

def col2img_sliding(cols,  x_shape, f, pad=0, stride=1):
    N, C, H, W = x_shape
    #C, H, W = x_shape
    H_padded, W_padded = H + 2 * pad, W + 2 * pad
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    #x_padded = np.zeros((C, H_padded, W_padded), dtype=cols.dtype)
    
    idx = 0
    for m in range(N):
        for i in range(0, H_padded -f + 1, stride):
            for j in range(0, W_padded - f + 1, stride):
                col = cols[idx, :]
                col = col.reshape((C, f, f))
            
                x_padded[m, :, i:i+stride, j:j+stride] = col[:, 0:stride, 0:stride]
            
                if stride < f:
                    if (j == W_padded - f) and (i == H_padded - f):
                        x_padded[m, :, i:i+f, j:j+f] = col
                    elif j == W_padded - f:
                        x_padded[m, :, i:i+stride, j+stride:] = col[:, 0:stride, stride:]
                    elif i == H_padded - f:
                        x_padded[m, :, i+stride:, j:j+stride] = col[:, stride:, 0:stride]
                idx += 1
    if pad > 0:
        return x_padded[:, :, pad:-pad, pad:-pad]
    else:
        return x_padded

def conv_forward_naive(X, W, b, stride=1, padding=1):
    cache = W, b, stride, padding
    n_filters, d_filter, h_filter, w_filter = W.shape
    n_x, d_x, h_x, w_x = X.shape
    h_out = int((h_x - h_filter + 2 * padding) / stride) + 1
    w_out = int((w_x - w_filter + 2 * padding) / stride) + 1
    
    X_col = im2col_sliding(X, f=h_filter, pad=padding, stride=stride)
    W_col = W.reshape(-1, d_filter*h_filter*w_filter).T
    out = X_col @ W_col + b
    #out = out.T
    out = out.reshape(n_x, w_out, h_out, n_filters)
    out = out.transpose((0, 3, 1, 2))
    
    cache = (X, W, b, stride, padding, X_col)
    
    return out, cache

def eval_numerical_gradient_array(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        pos = f(x).copy()
        x[ix] = oldval - h
        neg = f(x).copy()
        x[ix] = oldval
        
        #print('---')
        #print(pos.shape)
        #print(neg.shape)
        #print(grad.shape)
        #print('-----')
        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        it.iternext()
    return grad

def conv_backward_naive(dout, cache):
    X, W, b, stride, padding, X_col = cache
    n_filter, d_filter, h_filter, w_filter = W.shape

    db = np.sum(dout, axis=(0, 2, 3))
    
    dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(n_filter, -1)
    dW = dout_reshaped @ X_col
    dW = dW.reshape(W.shape)

    W_reshape = W.transpose(0, 2, 3, 1).reshape(n_filter, -1)
    #print(W_reshape.shape)
    #print(dout_reshaped.shape)
    #dout_reshaped = dout.transpose(1, 3, 0, 2).reshape(n_filter, -1)
    #xx = dout.transpose(1, 0, 3, 2).reshape(n_filter, -1)
    dX_col =  (W_reshape.T @ dout_reshaped).T
    print(dX_col)
    dX = col2img_sliding(dX_col, X.shape, h_filter, pad=padding, stride=stride)
    #dX = dX.transpose(0, 1, 3, 2)

    return dX, dW, db

In [41]:
np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(x, w, b, stride=1, padding=1)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(x, w, b, stride=1, padding=1)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(x, w, b, stride=1, padding=1)[0], b, dout)

out, cache = conv_forward_naive(x, w, b,  stride=1, padding=1)
dx, dw, db = conv_backward_naive(dout, cache)
#print(dx)

# Your errors should be around 1e-8'
#print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

[[-0.36375483 -1.34141551 -0.62912487 ..., -0.50475491 -0.06006965
   1.11913233]
 [ 0.02876399  0.03557739  0.0646925  ..., -0.1133004   0.09252262
  -0.42514379]
 [-0.16909617 -0.5425873  -0.30962538 ..., -0.05862499 -0.12875998
   0.90699488]
 ..., 
 [-0.74425808 -3.08850963 -1.21430991 ..., -1.7802138   0.30529865
   0.64743525]
 [ 0.24423141  1.03025033  0.39493154 ...,  0.62057362 -0.12103141
  -0.13250316]
 [ 0.52058072  2.2192805   0.83686051 ...,  1.37338714 -0.2869843
  -0.17118336]]
dx error:  1.0
dw error:  4.53106118517e-10
db error:  6.00767181599e-11


In [33]:
dx_num

array([[[[  2.02022648e+00,   1.31557244e+00,  -3.15087854e+00,
            3.23892197e+00,  -4.01214844e+00],
         [  1.71832340e+00,  -7.09240979e-01,  -5.32421591e+00,
            7.82298212e-01,  -2.18995738e+00],
         [ -8.29200893e-01,   2.82134457e+00,  -1.25541821e+00,
           -1.13170679e+00,  -3.55839346e+00],
         [ -3.35783037e-01,  -2.76913948e+00,   4.25723911e+00,
           -8.11824513e-01,   2.70901018e+00],
         [  8.47052634e-02,   4.30588591e-01,   2.95022069e+00,
           -2.92763735e+00,  -9.38719074e-01]],

        [[  3.36982195e-01,   7.66793750e+00,   1.57619395e+00,
            5.02203786e+00,  -1.69753665e+00],
         [ -1.93329123e+00,  -9.86659460e+00,   3.53426045e+00,
            7.35637014e-01,  -3.43491944e+00],
         [ -5.79144643e-01,   6.55462096e-01,   1.89050154e+00,
           -6.40445092e-01,  -2.87212774e+00],
         [ -7.13485097e+00,   8.58935000e+00,  -3.51576934e+00,
            4.39354129e+00,  -4.79888306e-02],

In [23]:
dx

array([[[[ 0.08245347,  1.19462927, -1.10830994,  0.44196542, -0.5170116 ],
         [ 0.2274243 , -0.58189853,  0.10344211,  0.16734016, -0.40660052],
         [-0.13432467, -0.11248655, -0.14206619, -0.41726292,  0.17214761],
         [-0.08155141, -0.42523959,  0.46236571,  0.09803062, -0.20097616],
         [-0.23351574, -1.47116598,  1.67099995,  0.48155474, -0.01799402]],

        [[ 1.46923586,  1.36871618,  1.49491249, -2.48298903, -1.80438526],
         [-0.03923595, -1.28522699, -0.65671459, -1.97727779, -0.68698328],
         [ 1.98147841, -1.24666944,  3.03581522,  0.78996758,  1.69784857],
         [-0.54413761, -0.78068404,  0.26970135, -0.97525095, -0.40177646],
         [ 1.09635439,  3.49999171, -3.1813225 ,  0.43832852, -1.03441395]],

        [[ 0.98238562,  2.52668611, -0.71919903, -0.82216644, -0.28710974],
         [ 0.30480825, -1.59426768, -0.23684335, -0.92086032, -0.0811628 ],
         [ 0.97092626, -0.89487387,  1.57943227, -0.13726253,  0.31234023],
        

In [6]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

conv_param = {'stride': 2, 'pad': 1}
out, _ = conv_forward_naive(x, w, b, stride=2, padding=1)
correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

# Compare your output to ours; difference should be around 2e-8
print('Testing conv_forward_naive')
print('difference: ', rel_error(out, correct_out))

Testing conv_forward_naive
difference:  2.21214765759e-08


In [7]:
dx.shape

(4, 3, 5, 5)

In [8]:
db

array([ 0.47050388,  1.80007328])

In [9]:
dw

array([[[[  3.74083229,  -1.11736932,   5.65008475],
         [-19.11962989,   5.65406037,   4.53124725],
         [ -3.6329338 ,   3.76169087,   7.20472087]],

        [[  7.03395312,  -4.5727834 ,  -7.8451423 ],
         [  5.66691388, -12.92959222,  -0.40313598],
         [  5.28682252,  11.21425667,  -3.41223078]],

        [[  9.39415069, -22.2121015 , -16.20336304],
         [-14.39395302, -15.52157928,  -8.1869118 ],
         [  2.5098062 ,  -2.08364313,  -1.90675446]]],


       [[[ -9.06570861,  11.87244296,  -7.41391816],
         [ -0.12837405,  -7.23888876,   9.17635209],
         [  4.14548712,  23.18841142,  -9.49222648]],

        [[  2.58171801,  -1.78163112,  -1.10845452],
         [-13.76007062,   3.91269497,  -6.56275002],
         [  3.27507715,  -4.38790371,  10.09310656]],

        [[-14.34939674,  -2.80296041,  -3.72368111],
         [  0.65154294,  24.28001799, -10.36012214],
         [ -3.16236407,   1.52530603,   3.68371982]]]])

In [10]:
dw_num

array([[[[  3.74083229,  -1.11736932,   5.65008475],
         [-19.11962989,   5.65406037,   4.53124725],
         [ -3.6329338 ,   3.76169087,   7.20472087]],

        [[  7.03395312,  -4.5727834 ,  -7.8451423 ],
         [  5.66691388, -12.92959222,  -0.40313598],
         [  5.28682252,  11.21425667,  -3.41223078]],

        [[  9.39415069, -22.2121015 , -16.20336304],
         [-14.39395302, -15.52157928,  -8.1869118 ],
         [  2.5098062 ,  -2.08364313,  -1.90675446]]],


       [[[ -9.06570861,  11.87244296,  -7.41391816],
         [ -0.12837405,  -7.23888876,   9.17635209],
         [  4.14548712,  23.18841142,  -9.49222648]],

        [[  2.58171801,  -1.78163112,  -1.10845452],
         [-13.76007062,   3.91269497,  -6.56275002],
         [  3.27507715,  -4.38790371,  10.09310656]],

        [[-14.34939674,  -2.80296041,  -3.72368111],
         [  0.65154294,  24.28001799, -10.36012214],
         [ -3.16236407,   1.52530603,   3.68371982]]]])