In [1]:
from Activations import *
from Layer import *
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('mnist_train_small.csv')

In [11]:
df.head()
X = df.iloc[:,1:].values
Y = df.iloc[:,0].values
labels = list(set(Y))

def one_hot(s):
  out = np.ones(len(labels))
  out[s] = 1
  return out

mean, std = np.mean(X), np.std(X)

normalize = lambda x : (x-mean)/std
reshape = lambda z: z.reshape((28,28))
process = lambda x : reshape(normalize(x))


x_train = np.array(list(map(process, X)))
y_train = np.array(list(map(one_hot, Y)))

In [34]:
from Activations import *

def conv(img, ker,bias, s=1):
  (k_h, k_w) = ker.shape # filter dimensions
  im_h, im_w = img.shape # image dimensions

  out_dim = int((im_h - k_h)/s)+1


  out = np.zeros((out_dim,out_dim))

  curr_y = out_y = 0
  while curr_y + k_h <= im_h:
    curr_x = out_x = 0
    while curr_x + k_w <= im_w:
      out[out_y, out_x] = np.sum(ker * img[curr_y:curr_y+k_h, curr_x:curr_x+k_w])+bias
      curr_x += s
      out_x += 1
    curr_y += s
    out_y += 1
  return out

def maxpool(X, k, s=2):
  x_h, x_w = X.shape
  
  out_dim = int((x_h-k)/s)+1
  out = np.zeros((out_dim, out_dim))

  curr_y = out_y = 0
  while curr_y +k <= x_h:
    curr_x=out_x = 0
    while curr_x <= k <= x_w:
      out[out_y, out_x] = np.max(X[curr_y:curr_y+k, curr_x:curr_x+k])
      curr_x += s
      out_x += 1
    curr_y += s
    out_y += 1
  return out

class ConvLayer():
    
    def __init__(self, input_dim, output_c, kernel_size, activation):
        self.kernels = np.random.random((output_c, kernel_size, kernel_size))
        self.biases = np.random.random((output_c))
    
        self.input_dim = input_dim
        self.kernel_size = kernel_size
    
        self.output_c = output_c
        self.activation = activation_dict[activation][0]
        self.activation_grad = activation_dict[activation][1]

    def conv(self, z):
    
        out = []
        for k, ker in enumerate(self.kernels):
            out.append(conv(z, ker, self.biases[k]))
        return np.array(out)

    def activate(self, z):
        return self.activation(z)
    
    def downsample(self, z):
        out = []
        for i,x in enumerate(z):
            out.append(maxpool(x,self.kernel_size))
        return np.array(out)

    def forward(self, x):
        z = self.activate(self.conv(x))
        return self.downsample(z)

    #this returns the flattened feature map to feed into a fully connected layer
    def flatten(self, z):
        return z.reshape((z.size,1)) 

    def convolutionBackward(self, dconv_prev, conv_in, s=1):
        '''
        Backpropagation through a convolutional layer. 
        '''
        filt = self.kernels
        (n_f, n_c, f, _) = filt.shape
        (_, orig_dim, _) = conv_in.shape
        
        ## initialize derivatives
        dout = np.zeros(conv_in.shape) 
        dfilt = np.zeros(filt.shape)
        dbias = np.zeros((n_f,1))
        for curr_f in range(n_f):
            # loop through all filters
            curr_y = out_y = 0
            while curr_y + f <= orig_dim:
                curr_x = out_x = 0
                while curr_x + f <= orig_dim:
                    # loss gradient of filter (used to update the filter)
                    dfilt[curr_f] += dconv_prev[curr_f, out_y, out_x] * conv_in[:, curr_y:curr_y+f, curr_x:curr_x+f]
                    # loss gradient of the input to the convolution operation (conv1 in the case of this network)
                    dout[:, curr_y:curr_y+f, curr_x:curr_x+f] += dconv_prev[curr_f, out_y, out_x] * filt[curr_f] 
                    curr_x += s
                    out_x += 1
                curr_y += s
                out_y += 1
            # loss gradient of the bias
            dbias[curr_f] = np.sum(dconv_prev[curr_f])
    
        return dout, dfilt, dbias

    def nanargmax(arr):
        '''
        return index of the largest non-nan value in the array. Output is an ordered pair tuple
        '''
        idx = np.nanargmax(arr)
        idxs = np.unravel_index(idx, arr.shape)
        return idxs 

    def maxpoolBackward(dpool, orig, f, s=1):
        '''
        Backpropagation through a maxpooling layer. The gradients are passed through the indices of greatest value in the original maxpooling during the forward step.
        '''
        (n_c, orig_dim, _) = orig.shape
    
        dout = np.zeros(orig.shape)
    
        for curr_c in range(n_c):
            curr_y = out_y = 0
            while curr_y + f <= orig_dim:
                curr_x = out_x = 0
                while curr_x + f <= orig_dim:
                    # obtain index of largest value in input for current window
                    (a, b) = nanargmax(orig[curr_c, curr_y:curr_y+f, curr_x:curr_x+f])
                    dout[curr_c, curr_y+a, curr_x+b] = dpool[curr_c, out_y, out_x]
                
                    curr_x += s
                    out_x += 1
                curr_y += s
                out_y += 1
        
        return dout


In [35]:
F1 = ConvLayer(3, 16, 3, 'relu')
x = np.random.random((50, 50))
z = F1.forward(x)

In [None]:
F1.convolutionBackward()