# Try to use gpu 

## 1 Import package

In [1]:
import numpy
import minpy.numpy
import cupy
import pandas
import matplotlib.pyplot as plt
import random
from scipy.io import loadmat
from scipy.optimize import minimize
from sklearn.preprocessing import OneHotEncoder

[31mW1024 19:23:37 2009 minpy.dispatch.registry:register:47][0m Type MXNet for name reshape has already existed


## 2 Choose whether to use gpu

In [2]:
np = numpy # Only use cpu

## 3 Determine the network structure

In [3]:
num_units = 5 # the CNN ' size
inrow, incol = 20, 20 # input size is (20, 20)
krow, kcol = 5, 5 # the filtter size is (5, 5)
crow, ccol = inrow - krow + 1, incol - kcol + 1 # the convolution result's size is (16, 16) 
pfrow, pfcol = 2, 2 # the pooling fillters' size is (2, 2)
prow, pcol = crow // pfrow, ccol // pfcol # the pooling results' size is (8, 8) 
output_size = 10

weights_size = (krow * kcol + 1 +# w and b of convolution layer
                prow * pcol * output_size) * num_units + output_size # w of output layer
params = (np.random.random(size=weights_size) - 0.5) * 0.25 # all weights
params.shape

(3340,)

## 4 Initializate data set

In [4]:
data = loadmat("ex4data1.mat")
X = data["X"]
m = X.shape[0]
X = X.reshape((m, inrow, incol))
y = data["y"]

training_set_scale = 0.7
tr_m = int(m * training_set_scale)
tr_X = np.array(X[:tr_m])
ts_m = m - tr_m
ts_X = np.array(X[tr_m:])
onehot_encoder = OneHotEncoder(sparse=False, categories="auto")
y_onehot = onehot_encoder.fit_transform(y)
tr_y = np.array(y_onehot[:tr_m]).reshape((tr_m, output_size, 1))
ts_y = np.array(y[tr_m:])

tr_X.shape, tr_y.shape, ts_X.shape, ts_y.shape

((3500, 20, 20), (3500, 10, 1), (1500, 20, 20), (1500, 1))

## 5 Initializate weights

In [5]:
weights = (np.random.random(size=weights_size) - 0.5) * 0.25
weights.shape

(3340,)

## 6 Encode and decode weights

In [6]:
def encode(theta1, b1, theta2, b2):
    return np.concatenate((theta1.ravel(), b1.ravel(), theta2.ravel(), b2.ravel()))
def decode(weights, num_units, krow, kcol, prow, pcol, output_size):
    theta1 = weights[:num_units*krow*kcol].reshape((num_units, krow, kcol))
    b1 = weights[num_units*krow*kcol:num_units*krow*kcol+num_units].reshape((num_units, 1))
    theta2 = weights[num_units*krow*kcol+num_units:
                    -output_size].reshape((num_units, prow, pcol, output_size))
    b2 = weights[-output_size:].reshape((output_size, 1))
    return theta1, b1, theta2, b2

In [7]:
theta1, b1, theta2, b2 = decode(weights, num_units, krow, kcol, prow, pcol, output_size)
theta1.shape, b1.shape, theta2.shape, b2.shape

((5, 5, 5), (5, 1), (5, 8, 8, 10), (10, 1))

In [8]:
encode(theta1, b1, theta2, b2).shape

(3340,)

In [9]:
theta1.size + b1.size + theta2.size + b2.size

3340

## 7 Convolution

In [10]:
def convolution(X, w, krow, kcol, crow, ccol):
    res = np.zeros((crow, ccol))
    for i in range(crow):
        for j in range(ccol):
            temp = w * X[i:i+krow,j:j+kcol]
    return res # (16, 16)
a = convolution(tr_X[0], theta1[0], krow, kcol, crow, ccol)
a.shape

(16, 16)

## 8 Pooling

In [11]:
def maxPooling(conv, crow, ccol, pfrow, pfcol, prow, pcol):
    res = np.zeros((prow, pcol))
    grad = np.zeros((crow, ccol))
    for i in range(0, crow, pfrow):
        for j in range(0, ccol, pfrow):
            res[i//2,j//2] = np.max(conv[i:i+pfrow,j:j+pcol])
            idx = np.argmax(conv[i:i+pfrow,j:j+pcol])
            grad[i+idx//pfrow,j+idx%pcol] = 1
    return res, grad

## 9 Sigmod

In [12]:
from scipy.special import expit
sigmod = expit

## 10 Forward propagate

In [13]:
def forwardPropagate(X, theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size):
    a1 = X # (20, 20)
    z2 = np.zeros((num_units, crow, ccol)) # (5, 16, 16)
    a2 = z2.copy() # (5, 16, 16)
    pooling_grad = z2.copy() # (5, 16, 16)
    a3 = np.zeros((num_units, prow, pcol)) # (5, 8, 8)
    z4 = np.zeros((output_size, 1)) # (10, 1)
    a4 = z4.copy() # (10, 1)
    
    for i in range(num_units):
        z2[i] = convolution(X, theta1[i], krow, kcol, 
                            crow, ccol) + b1[i] # (16, 16)
    a2 = sigmod(z2) # (5, 16, 16)
    
    for i in range(num_units):
        a3[i], pooling_grad[i] = maxPooling(a2[i], crow, ccol, pfrow, pfcol, prow, pcol)
    
    for i in range(output_size):        
         z4[i] += np.sum(theta2[:,:,:,i] * a3)
    z4 += b2
    a4 = sigmod(z4)
    return a1, z2, a2, pooling_grad, a3, z4, a4

In [14]:
%%time
for i in range(1000):
    a = forwardPropagate(X[0], theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size)


CPU times: user 3.92 s, sys: 0 ns, total: 3.92 s
Wall time: 3.91 s


## 11 Predict

In [15]:
def predict(X, theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size):
    *t, h = forwardPropagate(X, theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size)
    return np.argmax(h) + 1

## 12 Comupte accuracy

In [16]:
def computeAccuracy(X, y, theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size):
    m = X.shape[0]
    correct = 0
    for i in range(m):
        ans = predict(X[i], theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size)
        correct += ans == y[i]
    return f"m:{m} correct:{correct} accuracy:{100 * correct / m}%"
#computeAccuracy(X, y, theta1, b1, theta2, b2, num_units, krow, kcol,
#                    crow, ccol, prow, pcol, output_size)


### The accuracy in all data

In [17]:
%%time
computeAccuracy(X, y, theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size)

CPU times: user 19.5 s, sys: 7 ms, total: 19.5 s
Wall time: 19.5 s


'm:5000 correct:[500] accuracy:[10.]%'

## 13 Sigmod gradient

In [18]:
def sigmodGradient(z):
    t = expit(z)
    return t * (1 - t)

## 14 Backpropagation

In [25]:
def backPropagate(weights,  X, num_units, krow, kcol,
                    crow, ccol, pfrow, pfcol, prow, pcol, output_size, lam=0.):
    m = X.shape[0]
    theta1, b1, theta2, b2 = decode(weights, num_units, 
                                    krow, kcol, prow, pcol, output_size)
    J = 0.
    
    theta1_grad = np.zeros(theta1.shape) # (5, 5, 5)
    b1_grad = np.zeros(b1.shape) # (5, 1)
    theta2_grad = np.zeros(theta2.shape) # (5, 8, 10, 10)
    b2_grad = np.zeros(b2.shape) # (10, 1)
    
    for i in range(m):
        a1, z2, a2, pooling_grad, a3, z4, a4 = forwardPropagate(X[i], 
                    theta1, b1, theta2, b2, num_units, krow, kcol,
                    crow, ccol, prow, pcol, output_size)
        J += -np.sum(y[i] * np.log(a4) + 
                    (1 - y[i]) * np.log(1 - a4)) # cost
        dt2 = a4 - y[i] # (10, 1)
        
        b2_grad += dt2 # (10, 1)
        
        temp = dt2.reshape((1, 1, 1, output_size))
        theta2_grad += a3.reshape((*a3.shape, 1)) * temp # (5, 8, 8, 10)
        
        temp2 = theta2 * temp # (5, 8, 8, 10)
        temp3 = np.zeros((num_units, crow, ccol)) # (5, 16, 16)
        
        for j in range(num_units): #
            for p in range(0, crow, pfrow):
                for q in range(0, ccol, pfcol):
                    val = np.sum(temp2[j,p//pfcol,q//pfcol])
                    for p1 in range(pfrow):
                        for q1 in range(pfcol):
                            temp3[j,p+p1,q+q1] = val
        

        dt1 = temp3 * pooling_grad * a2 * (1 - a2) # (5, 16, 16)
        
        for j in range(num_units):
            b1_grad[j] = np.sum(dt1[j])
            for p in range(krow):
                for q in range(kcol):
                    theta1_grad[j,p,q] += np.sum(dt1[j] * a1[p:p+crow,q:q+ccol])
    
    J /= m
    theta1_grad /= m
    b1_grad /= m
    theta2_grad /=m
    b2_grad /= m
    
    #Regulation
        
    J += (float(lam) / (2 * m)) * (np.sum(theta1 ** 2) + np.sum(theta2 ** 2))
    theta1_grad += theta1 * lam / m
    theta2_grad += theta2 * lam / m
    
    return J, encode(theta1, b1, theta2, b2)

In [28]:
%%time
J, grad = backPropagate(weights,tr_X[:5], num_units, krow, kcol,
                        crow, ccol, pfrow, pfcol, prow, pcol, output_size)

CPU times: user 46.9 ms, sys: 0 ns, total: 46.9 ms
Wall time: 44.5 ms


## 15 Gradient checking

In [None]:
def checkGradient(weights,  X,num_units, krow, kcol,
                    crow, ccol, pfrow, pfcol, prow, pcol, output_size, lam=0.)