In [1]:
import numpy as np
import edf
from time import time
import sys
import matplotlib.pyplot as plt

data = np.load('./c10_data/train.npz')
t_imgs = np.float32(data['imgs'])/255.

# Reshape the train image data to (idx, h, w, channel)
t_imgs = t_imgs.reshape(50000, 32, 32, 3)
t_labels = np.float32(data['labels'])

data = np.load('./c10_data/test.npz')
v_imgs = np.float32(data['imgs'])/255.

# Reshape the valid image data to (idx, h, w, channel)
v_imgs = v_imgs.reshape(10000, 32, 32, 3)
v_labels = np.float32(data['labels'])

In [2]:

########################################### Convolution layer#############################################
############################### Please implement the forward abd backward method in this class ############## 
class Conv:

    def __init__(self,x,f,stride=1,pad=0):
        edf.components.append(self)
        self.f = f
        self.x = x
        pad = np.array(pad)
        if pad.shape == ():
            self.xpad = self.ypad = pad
        else:
            self.ypad = pad[0]
            self.xpad = pad[1]
            
        self.stride=stride
        self.grad = None if f.grad is None and x.grad is None else edf.DT(0) 

    ####################### Please implement this function####################### 
    def forward(self):
        x = self.x.value
        f = self.f.value
        datapad = np.zeros((x.shape[0],x.shape[1]+2*self.ypad,x.shape[2]+2*self.xpad,x.shape[3]))
        datapad[:,self.ypad:self.ypad+x.shape[1],self.xpad:self.xpad+x.shape[2],:] = x
        self.value = np.zeros((x.shape[0],(x.shape[1]+2*self.ypad-f.shape[0])/self.stride+1,(x.shape[2]+2*self.xpad-f.shape[0])/self.stride +1,f.shape[3]))
        s = self.stride
        k = f.shape[0]
        self.datapad = datapad
        for i in range(self.value.shape[1]):
            for j in range(self.value.shape[2]):
                self.value[:,i,j,:] = np.tensordot(datapad[:,s*i:s*i+k,s*j:s*j+k,:],f,axes=[[1,2,3],[0,1,2]])
  

                
    ####################### Please implement this function#######################         
    def backward(self):
        s = self.stride
        k = self.f.value.shape[0]
        if self.x.grad is None or len(self.x.grad.shape) < 3:
            self.x.grad = np.ndarray((self.x.value.shape))
        #Pad the x.grad:
        gradpad = np.zeros((self.x.grad.shape[0],self.x.grad.shape[1]+2*self.ypad,self.x.grad.shape[2]+2*self.xpad,self.x.grad.shape[3]))
        gradpad[:,self.ypad:self.ypad+self.x.grad.shape[1],self.xpad:self.xpad+self.x.grad.shape[2],:] = self.x.grad
        for i in range(self.value.shape[1]):
            for j in range(self.value.shape[2]):

                gradpad[:,s*i:s*i+k,s*j:s*j+k,:] += np.tensordot(self.grad[:,i,j,:],self.f.value,axes=(1,3))
                self.f.grad += np.tensordot(self.datapad[:,s*i:s*i+k,s*j:s*j+k,:],self.grad[:,i,j,:],axes=(0,0))
                
        self.x.grad = gradpad[:,self.ypad:self.ypad+self.x.value.shape[1],self.xpad:self.xpad+self.x.value.shape[2],:]



########################################### MaxPool layer#############################################
############################### Please implement the forward abd backward method in this class ##############             
class MaxPool:
    def __init__(self,x,ksz=2,stride=None):
        edf.components.append(self)
        self.x = x
        self.ksz=ksz
        if stride is None:
            self.stride=ksz
        else:
            self.stride=stride
        self.grad = None if x.grad is None else edf.DT(0)

    ####################### Please implement this function#######################     
    def forward(self):
        x = self.x.value
        hold = x.reshape((x.shape[0],x.shape[1]/self.stride,self.stride,x.shape[2]/self.stride,self.stride,x.shape[3]))
        hold = hold.swapaxes(2,3)
        hold = hold.reshape((x.shape[0],x.shape[1]/self.stride,x.shape[2]/self.stride,self.stride**2,x.shape[3]))
        self.value = np.amax(hold,axis=3)
        self.index = np.argmax(hold,axis=3)
        

    ####################### Please implement this function#######################             
    def backward(self):
        x = self.x.value
        s=self.stride
        hold = np.zeros((x.shape[0],x.shape[1]/self.stride,x.shape[2]/self.stride,self.stride**2,x.shape[3]))
        hold[self.index] = 1
        hold = hold.reshape((x.shape[0],x.shape[1]/self.stride,x.shape[2]/self.stride,self.stride,self.stride,x.shape[3]))
        hold = hold.swapaxes(2,3)
        hold = hold.reshape(x.shape)
        if len(self.x.grad.shape) < 3:
            self.x.grad = np.ndarray((x.shape))
        for i in range(self.grad.shape[1]):
            for j in range(self.grad.shape[2]):
                for b in range(self.x.grad.shape[0]):
                    for c in range(self.x.grad.shape[3]):
                        self.x.grad[b,i:i+s,j:j+s,c] += np.multiply(self.grad[b,i,j,c],hold[b,i:i+s,j:j+s,c])

                
                            
########################################### AvePool layer#############################################
############################### Please implement the forward abd backward method in this class ##############                             
class AvePool:
    def __init__(self,x,ksz=2,stride=None):
        edf.components.append(self)
        self.x = x
        self.ksz=ksz
        if stride is None:
            self.stride=ksz
        else:
            self.stride=stride
        self.grad = None if x.grad is None else edf.DT(0)
        
    ####################### Please implement this function#######################   
    def forward(self):
        x = self.x.value
        hold = x.reshape((x.shape[0],x.shape[1]/self.stride,self.stride,x.shape[2]/self.stride,self.stride,x.shape[3]))
        hold = hold.swapaxes(2,3)
        hold = hold.reshape((x.shape[0],x.shape[1]/self.stride,x.shape[2]/self.stride,self.stride**2,x.shape[3]))
        self.value = np.mean(hold,axis=3)
        self.index = np.argmax(hold,axis=3)
        

    ####################### Please implement this function#######################    
    def backward(self):
        self.x.grad += self.grad/(self.stride**2.0)
        


In [3]:
reload(edf)

<module 'edf' from 'edf.pyc'>

In [None]:
# for repeatability
np.random.seed(0)

# Inputs
inp = edf.Value()
lab = edf.Value()


prev_channel = 3 # RGB channel 
########################## Simple Convolution Nerual Network Model for Cifar 10 ##################################
##################################################################################################################
# please implement your main cnn model here, as described by the homework, you can mimic the previous code
f1 = edf.Param(edf.xavier((3,3,3,32)))
f2 = edf.Param(edf.xavier((3,3,32,64)))
f3 = edf.Param(edf.xavier((1,1,64,10)))
b1 = edf.Param(np.zeros((32)))
b2 = edf.Param(np.zeros((64)))
b3 = edf.Param(np.zeros((10)))

hidden = edf.RELU(edf.Add(Conv(inp,f1,1,1),b1))
MP = MaxPool(hidden,stride=4)
hidden2 = edf.RELU(edf.Add(Conv(MP,f2,1,0),b2))
hidden3 = AvePool(hidden2,stride=6)
hidden4 = edf.RELU(edf.Add(Conv(hidden3,f3),b3))
pred = edf.Reshape(hidden4,(100,10))



# the standard classification layer, which you don't need to modify
pred = edf.SoftMax(pred)
loss = edf.Mean(edf.LogLoss(edf.Aref(pred,lab)))
acc = edf.Accuracy(pred,lab)


################################################################################################################## 
# evaluation bucket
bucket = 100
def eval_train():    
    
    # we only choose 1/5 of the train images for evaluation since evaluation the whole images is time consuming
    eval_imgs = t_imgs[::5]
    eval_labels = t_labels[::5]
    avg_acc = 0
    avg_loss = 0
    
    for seq in range(bucket):
        inp.set(eval_imgs[seq::bucket])
        lab.set(eval_labels[seq::bucket])
        edf.Forward()
        avg_acc += acc.value
        avg_loss += loss.value
    
    return avg_acc/bucket, avg_loss/bucket
        
def eval_test():
    
    avg_acc = 0
    avg_loss = 0
    for seq in range(bucket):
        inp.set(v_imgs[seq::bucket])
        lab.set(v_labels[seq::bucket])
        edf.Forward()
        avg_acc += acc.value
        avg_loss += loss.value
    
    return avg_acc/bucket, avg_loss/bucket

# initial accuracy 
random_acc, random_loss = eval_test()
print("Random test loss = %.4f, accuracy = %.4f" % (random_loss, random_acc))


################################################# train loop ######################################################
ep = 0
epoch = 10
batch = 100
train_loss = []; train_acc = []; test_loss =[]; test_acc = []
stime = time()
batches = range(0, len(t_labels), batch)

while ep < epoch:

    # randon shuffle the train data in each epoch
    perm = np.random.permutation(len(t_labels))

    for k in batches:
        inp.set(t_imgs[perm[k:k+batch]])
        lab.set(t_labels[perm[k:k+batch]])
        edf.Forward()
        edf.Backward(loss)
        edf.Adam()

        
    # evaluate on trainset
    t_acc, t_loss = eval_train()
    print("Epoch %d: train loss = %.4f [%.3f secs]" % (ep, t_loss,time()-stime))
    train_loss.append(t_loss)
    train_acc.append(t_acc)

    # evaluate on testset
    v_acc, v_loss = eval_test()
    print("test accuracy = %.4f" % v_acc)
    test_loss.append(v_loss)
    test_acc.append(v_acc)
    stime = time()
    ep += 1      


# plot
plt.figure(1)
plt.xlabel("epochs")
plt.ylabel("loss")
plt.plot(np.arange(len(test_loss)), test_loss, color='red')
plt.plot(np.arange(len(train_loss)), train_loss, color='blue')
plt.legend(['test loss', 'train loss'], loc='upper right')
plt.show()

plt.figure(2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.plot(np.arange(len(test_acc)), test_acc, color='red')
plt.plot(np.arange(len(train_acc)), train_acc, color='blue')
plt.legend(['test acc', 'train acc'], loc='lower right')
plt.show()

Random test loss = 2.3077, accuracy = 0.1007
Epoch 0: train loss = 2.3026 [1306.090 secs]
test accuracy = 0.1001
Epoch 1: train loss = 2.3026 [1276.446 secs]
test accuracy = 0.1000
Epoch 2: train loss = 2.3026 [1336.943 secs]
test accuracy = 0.1000
Epoch 3: train loss = 2.3026 [1533.500 secs]
test accuracy = 0.1000
Epoch 4: train loss = 2.3026 [1403.163 secs]
test accuracy = 0.1000

There is clearly something wrong in the backprop section of the Conv class, but bug fixing has become increasingly difficult as the code takes extremely long to initialize and run. The backprop method in MaxPool could be done more efficiently, as the loops are slow, but I don't have the coding knowledge to make it any better.