In [1]:
import numpy as np

In [2]:
#data = (batch_size, depth, image_height, image_width) format

def max_pool_forward(data, filter_height, filter_width, stride_height, stride_width):
    """
    Implements the forward pass of the pooling layer
    
    Arguments:
    A_prev -- Input data, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    hparameters -- python dictionary containing "f" and "stride"
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    A -- output of the pool layer, a numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters 
    """
    
    # Retrieve dimensions from the input shape
    #(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    batch_size = data.shape[0]
    n_colr = data.shape[1]
    height = data.shape[2]
    width = data.shape[3]
    
    # Retrieve hyperparameters from "hparameters"
    #f = hparameters["f"]
    #stride = hparameters["stride"]
    
    # Define the dimensions of the output
    n_H = int(1 + (height - filter_height) / stride_height)
    n_W = int(1 + (width - filter_width) / stride_width)
    n_C = n_colr
    #print('n_H: {}'.format(n_H))
    #print('n_W: {}'.format(n_W))
    
    # Initialize output matrix A
    A = np.zeros((batch_size, n_C, n_H, n_W))              
    
    ### START CODE HERE ###
    for i in range(batch_size):                         # loop over the training examples
        for c in range (n_C):            # loop over the channels of the output volume
            for h in range(n_H):                     # loop on the vertical axis of the output volume
                for w in range(n_W):                 # loop on the horizontal axis of the output volume
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h*stride_height
                    vert_end = h*stride_height + filter_height
                    horiz_start = w*stride_width
                    horiz_end = w*stride_width + filter_width
                    
                    assert vert_end <= data.shape[2], '{}, {}'.format(vert_end, data.shape[2])
                    assert horiz_end <= data.shape[3], '{}, {}'.format(horiz_end, data.shape[3])
                    
                    # Use the corners to define the current slice on the ith training example of A_prev, channel c. (≈1 line)
                    a_prev_slice = data[i, c, vert_start:vert_end, horiz_start:horiz_end]
                    #print(a_prev_slice)
                    
                    # Compute the pooling operation on the slice. Use an if statment to differentiate the modes. Use np.max/np.mean.
                    A[i, c, h, w] = np.max(a_prev_slice)
                   
    
    ### END CODE HERE ###
    
    # Store the input and hparameters in "cache" for pool_backward()
    #cache = (data, None)
    
    # Making sure your output shape is correct
    assert(A.shape == (batch_size, n_C, n_H, n_W,))
    
    return A

In [3]:
%load_ext Cython

In [4]:
%%cython -a

cimport cython

import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
def max_pool_forward_pyx(np.float64_t[:, :, :, :] data, int filter_height, int filter_width, int stride_height, int stride_width):
        
    # Retrieve dimensions from the input shape
    #(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    cdef int batch_size = data.shape[0]
    cdef int n_colr = data.shape[1]
    cdef int height = data.shape[2]
    cdef int width = data.shape[3]
    
    # Retrieve hyperparameters from "hparameters"
    #f = hparameters["f"]
    #stride = hparameters["stride"]
    
    # Define the dimensions of the output
    cdef int n_H = int(1 + (height - filter_height) / stride_height)
    cdef int n_W = int(1 + (width - filter_width) / stride_width)
    cdef int n_C = n_colr
    #print('n_H: {}'.format(n_H))
    #print('n_W: {}'.format(n_W))
    
    # Initialize output matrix A
    cdef np.float64_t[:, :, :, :] A = np.zeros((batch_size, n_C, n_H, n_W))              
    
    ### START CODE HERE ###
    cdef int i, c, h, w
    cdef int vert_start, vert_end, horiz_start, horiz_end
    
    cdef float max_val = 1e-99
    cdef int ii, jj
        
    for i in range(batch_size):                         # loop over the training examples
        for c in range (n_C):            # loop over the channels of the output volume
            for h in range(n_H):                     # loop on the vertical axis of the output volume
                for w in range(n_W):                 # loop on the horizontal axis of the output volume
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h*stride_height
                    vert_end = h*stride_height + filter_height
                    horiz_start = w*stride_width
                    horiz_end = w*stride_width + filter_width
                    
                    #assert vert_end <= data.shape[2]
                    #assert horiz_end <= data.shape[3]
                    
                    # Use the corners to define the current slice on the ith training example of A_prev, channel c. (≈1 line)
                    max_val = 1e-99
                    for ii in range(vert_start, vert_end):
                        for jj in range(horiz_start, horiz_end):
                            if data[i, c, ii, jj] > max_val:
                                max_val = data[i, c, ii, jj]                           
                    
                    
                    # Compute the pooling operation on the slice. Use an if statment to differentiate the modes. Use np.max/np.mean.
                    A[i, c, h, w] = max_val
    
    return A


In [5]:
data = np.arange(108).reshape((1, 3, 6, 6)).astype(np.float64)

x1 = max_pool_forward(data, filter_height=2, filter_width=2, stride_height=2, stride_width=2)
x2 = max_pool_forward_pyx(data, filter_height=2, filter_width=2, stride_height=2, stride_width=2)

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

rel_error(x1, x2)

0.0

In [6]:
big_batch = data = np.arange(8*32*128*128).reshape((8, 32, 128, 128)).astype(np.float64)
%timeit max_pool_forward(data, filter_height=2, filter_width=2, stride_height=2, stride_width=2)
%timeit max_pool_forward_pyx(data, filter_height=2, filter_width=2, stride_height=2, stride_width=2)

2.7 s ± 84.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
8.57 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
2.62 * 1000 / 8.25 

317.57575757575756

In [8]:
def create_mask_from_window(x):
    """
    Creates a mask from an input matrix x, to identify the max entry of x.
    
    Arguments:
    x -- Array of shape (f, f)
    
    Returns:
    mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
    """
    
    ### START CODE HERE ### (≈1 line)
    mask = (x == np.max(x))
    ### END CODE HERE ###
    
    return mask

def distribute_value(dz, shape):
    """
    Distributes the input value in the matrix of dimension shape
    
    Arguments:
    dz -- input scalar
    shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz
    
    Returns:
    a -- Array of size (n_H, n_W) for which we distributed the value of dz
    """
    
    ### START CODE HERE ###
    # Retrieve dimensions from shape (≈1 line)
    (n_H, n_W) = shape
    
    # Compute the value to distribute on the matrix (≈1 line)
    average = dz / (n_H*n_W)
    
    # Create a matrix where every entry is the "average" value (≈1 line)
    a = np.full(shape, average)
    ### END CODE HERE ###
    
    return a

In [9]:
def pool_backward(dA, cache, mode = "max"):
    """
    Implements the backward pass of the pooling layer
    
    Arguments:
    dA -- gradient of cost with respect to the output of the pooling layer, same shape as A
    cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters 
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev
    """
    
    ### START CODE HERE ###
    
    # Retrieve information from cache (≈1 line)
    (A_prev, hparameters) = cache
    
    # Retrieve hyperparameters from "hparameters" (≈2 lines)
    stride = hparameters['stride']
    f = hparameters['f']
    
    # Retrieve dimensions from A_prev's shape and dA's shape (≈2 lines)
    #m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    
    # Initialize dA_prev with zeros (≈1 line)
    dA_prev = np.zeros_like(A_prev)
    
    for i in range(m):                       # loop over the training examples
        
        # select training example from A_prev (≈1 line)
        a_prev = A_prev[i, :, :, :]
        
        for h in range(n_H):                   # loop on the vertical axis
            for w in range(n_W):               # loop on the horizontal axis
                for c in range(n_C):           # loop over the channels (depth)
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h
                    vert_end = h + f
                    horiz_start = w
                    horiz_end = w + f
                    
                    # Compute the backward propagation in both modes.
                    if mode == "max":
                        
                        # Use the corners and "c" to define the current slice from a_prev (≈1 line)
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        # Create the mask from a_prev_slice (≈1 line)
                        mask = create_mask_from_window(a_prev_slice)
                        # Set dA_prev to be dA_prev + (the mask multiplied by the correct entry of dA) (≈1 line)
                        dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask * dA[i, h, w, c]
                        
                    elif mode == "average":
                        
                        # Get the value a from dA (≈1 line)
                        da = dA[i, h, w, c]
                        # Define the shape of the filter as fxf (≈1 line)
                        shape = (f, f)
                        # Distribute it to get the correct slice of dA_prev. i.e. Add the distributed value of da. (≈1 line)
                        dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(da, shape)
                        
    ### END CODE ###
    
    # Making sure your output shape is correct
    assert(dA_prev.shape == A_prev.shape)
    
    return dA_prev

In [10]:
%%cython -a

cimport cython

import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
cdef create_mask_from_window(np.float64_t[:, :] x):
    """
    Creates a mask from an input matrix x, to identify the max entry of x.
    
    Arguments:
    x -- Array of shape (f, f)
    
    Returns:
    mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
    """
    
    ### START CODE HERE ### (≈1 line)
    mask = (x == np.max(x))
    ### END CODE HERE ###
    #cdef max_val = np.max(x)
    #cdef int width = x.shape[1]
    #cdef int height = x.shape[0]
    #cdef np.float64_t[:, :] mask = np.zeros((width, height))
    #for h in range(height):
        #for w in range(width):
            #if x[h, w] == x:
                
    return mask

@cython.boundscheck(False)
@cython.wraparound(False)
def pool_backward(np.float64_t[:, :, :, :] output_grad, np.float64_t[:, :, :, :] input_data, 
                  int filter_height=2, int filter_width=2, int stride_height=2, 
                  int stride_width=2):

    batch_size = output_grad.shape[0]
    channels = output_grad.shape[1]
    height = output_grad.shape[2]
    width = output_grad.shape[3]
    
    # Initialize dA_prev with zeros (≈1 line)
    dA_prev = np.zeros_like(input_data)
    cdef np.float64_t[:, :, :]  a_prev
    cdef int h, w, c, vert_start, vert_end, horiz_start, horiz_end
    for i in range(batch_size):                       # loop over the training examples
        
        # select training example from A_prev (≈1 line)
        a_prev = input_data[i, :, :, :]
        
        
        for h in range(height):                   # loop on the vertical axis
            for w in range(width):               # loop on the horizontal axis
                for c in range(channels):           # loop over the channels (depth)
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h*stride_height
                    vert_end = h*stride_height + filter_height
                    horiz_start = w*stride_width
                    horiz_end = w*stride_width + filter_width
                    
                    # Compute the backward propagation in both modes.

                    # Use the corners and "c" to define the current slice from a_prev (≈1 line)
                    a_prev_slice = a_prev[c, vert_start:vert_end, horiz_start:horiz_end]
                    # Create the mask from a_prev_slice (≈1 line)
                    mask = (a_prev_slice == np.max(a_prev_slice)) #create_mask_from_window(a_prev_slice)
                    dA_prev[i, c, vert_start: vert_end, horiz_start: horiz_end] += mask * output_grad[i, c, h, w]

    
    # Making sure your output shape is correct
    #assert(dA_prev.shape == input_data.shape)
    
    return dA_prev

In [11]:
data = np.random.normal(scale=0.1, size=(3, 2, 4, 4))
grad = np.random.normal(scale=0.1, size=(3, 2, 2, 2))

In [12]:
data

array([[[[ -3.84669204e-02,  -2.82905691e-02,  -8.15264464e-02,
            1.71240002e-02],
         [  1.99351296e-01,  -9.84231097e-02,   1.15834685e-03,
           -1.46557717e-02],
         [ -3.70742481e-02,  -4.62699395e-03,  -1.12283039e-01,
            1.77621338e-01],
         [ -4.51879013e-02,   8.92375391e-02,  -1.83970800e-01,
            1.29630683e-01]],

        [[ -4.11768032e-02,   6.99800938e-02,  -3.40702074e-02,
           -7.69429144e-02],
         [ -5.23995715e-02,  -9.03500925e-02,   1.12420274e-01,
           -1.41652599e-01],
         [ -1.21089709e-03,  -1.78732609e-02,   1.19192716e-01,
           -1.07456338e-01],
         [ -1.55875468e-02,   8.15053645e-02,  -1.86027926e-01,
            7.89987962e-02]]],


       [[[ -3.67011247e-02,  -3.44297194e-03,   2.40391302e-02,
           -1.30291864e-02],
         [ -1.00086596e-01,   4.46524254e-02,  -8.20463136e-02,
            5.76456971e-02],
         [  1.26883526e-01,   1.93263033e-01,   1.97884464e-01,


In [13]:
grad

array([[[[ 0.00957358, -0.01392462],
         [ 0.06622484, -0.05384413]],

        [[ 0.08253644, -0.02605153],
         [-0.04882409,  0.03744073]]],


       [[[ 0.27201098,  0.08723505],
         [ 0.03530847,  0.09743216]],

        [[-0.0303486 , -0.23405824],
         [-0.07265566,  0.13522   ]]],


       [[[-0.10084152,  0.01718352],
         [-0.0724673 ,  0.03594434]],

        [[-0.09913458,  0.06200986],
         [-0.00508236, -0.10808686]]]])

In [14]:
pool_backward(grad, data)

array([[[[ 0.        ,  0.        ,  0.        , -0.01392462],
         [ 0.00957358,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        , -0.05384413],
         [ 0.        ,  0.06622484,  0.        ,  0.        ]],

        [[ 0.        ,  0.08253644,  0.        ,  0.        ],
         [ 0.        ,  0.        , -0.02605153,  0.        ],
         [ 0.        ,  0.        ,  0.03744073,  0.        ],
         [ 0.        , -0.04882409,  0.        ,  0.        ]]],


       [[[ 0.        ,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.27201098,  0.        ,  0.08723505],
         [ 0.        ,  0.03530847,  0.09743216,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ]],

        [[-0.0303486 ,  0.        , -0.23405824,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ],
         [-0.07265566,  0.        ,  0.        