In [2]:
import tensorflow as tf
import numpy as np
import functions as func
from matplotlib import pyplot as plt

In [3]:
# load data
data_x = np.load('../cifar_x.npy')
data_y = np.load('../cifar_y.npy')
test_x = np.load('../cifar_test_x.npy')
test_y = np.load('../cifar_test_y.npy')
index = 0

In [4]:
def get_batch(batch_size, data_x = data_x, data_y = data_y):
    global index
    i = 0
    x = []
    y = []
    while i < batch_size:
        if index == len(data_x): index = 0
        x.append(data_x[index].tolist())
        temp = [0] * 10
        temp[data_y[index]] = 1
        y.append(temp)
        index += 1
        i += 1
    return x, y

In [5]:
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
mini_batch = 8
train_iter = batch_size//mini_batch
display_step = 10

# Network Parameters
n_input = 784 # Cifar 10 data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units


# tf Graph input
x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([8*8*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}
para = [weights['wc1'], weights['wc2'], weights['wd1'], weights['out'],
        biases['bc1'], biases['bc2'], biases['bd1'], biases['out']]

In [6]:
# body of network
def body2(i, x, out):
    # Convolution Layer
    inputx = x.read(index=i)
    conv1 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
    # Pooling (down-sampling)
    p1 = func.extract_patches(conv1, 'SAME', 2, 2)
    f1 = func.majority_frequency(p1)
    #     maxpooling
    pool1 = func.max_pool(p=p1)

    # Convolution Layer
    conv2 = func.conv2d(pool1, weights['wc2'], biases['bc2'])
    #     Pooling (down-sampling)
    p2 = func.extract_patches(conv2, 'SAME', 2, 2)
    f2 = func.majority_frequency(p2)
    #     maxpooling
    pool2 = func.max_pool(p=p2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)
    # Output, class prediction
    out = out.write(index=i, value=tf.add(tf.matmul(fc1, weights['out']), biases['out']))
    i += 1
    return i, x, out

In [7]:
inputxs = tf.TensorArray(dtype=tf.float32, size=train_iter, clear_after_read=True).split(x, np.array([mini_batch] * train_iter))
out = tf.TensorArray(dtype=tf.float32, size=train_iter)


# compute gradient and update the weights
i0 = tf.constant(0)
con = lambda i, x, g: i < train_iter

i0, inputx, out = tf.while_loop(cond=con, body=body2, loop_vars=[i0, inputxs, out], parallel_iterations=1)

pred = tf.reshape(out.stack(), [-1, 10])

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred,1), tf.argmax(y,1)), tf.float32))

In [9]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
#     while step * batch_size < training_iters:
    while step < 2:
        batch_x, batch_y = get_batch(batch_size)
        # Run optimization op (backprop)
        sess.run(opt, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            cos, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                y: batch_y, keep_prob: 1.})
            cost_save.append(cos)
            accuracy_save.append(acc)

            print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(cos) +
                  "\nTraining Accuracy= " + "{:.5f}".format(acc))
        step += 1
    print("Optimization Finished!")

#     Calculate accuracy for test images
    global index
    index = 0
    batch_x, batch_y = get_batch(batch_size, test_x, test_y)
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.}))


Optimization Finished!
Testing Accuracy: 0.125


In [6]:
# ------------------------------define graph------------------------------------
# Reshape input picture
inputx = tf.reshape(x, shape=[batch_size, 28, 28, 1])
yi = y

# ------------------------------The tf defined network--------------------------
conv11 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
pool11 = tf.nn.max_pool(conv11, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
conv22 = func.conv2d(pool11, weights['wc2'], biases['bc2'])
pool22 = tf.nn.max_pool(conv22, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
fcc = tf.reshape(pool22, [-1, weights['wd1'].get_shape().as_list()[0]])
fc11 = tf.add(tf.matmul(fcc, weights['wd1']), biases['bd1'])
fc11 = tf.nn.relu(fc11)

pred1 = tf.add(tf.matmul(fc11, weights['out']), biases['out'])
#------------------------------------------------------------------------------------------
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
gv = opt.compute_gradients(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred1, labels=y)))

# ------------------------------self defined network-----------------------------
# Convolution Layer
conv1 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
# Pooling (down-sampling)
p1 = func.extract_patches(conv1, 'SAME', 2, 2)
f1 = func.majority_frequency(p1)
#     maxpooling
# pool1, mask1 = func.weight_pool_with_mask(p1, f1, pool_fun=func.majority_pool_with_mask, reduce_fun=tf.reduce_max)
pool1, mask1 = func.max_pool_with_mask(p1)

# Convolution Layer
conv2 = func.conv2d(pool1, weights['wc2'], biases['bc2'])
#     Pooling (down-sampling)
p2 = func.extract_patches(conv2, 'SAME', 2, 2)
f2 = func.majority_frequency(p2)
#     maxpooling
# pool2, mask2 = func.weight_pool_with_mask(p2, f2, pool_fun=func.majority_pool_with_mask, reduce_fun=tf.reduce_max)
pool2, mask2 = func.max_pool_with_mask(p2)

# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
# fc1 = tf.nn.dropout(fc1, dropout)

# Output, class prediction
pred = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
correct_pred = tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(yi, 1)), dtype=tf.float32)



# ------------------------------define graph------------------------------------
opt1 = tf.train.AdamOptimizer(learning_rate=learning_rate)
gv1 = opt1.compute_gradients(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)))
# ------------------------------define gradient descent-------------------------

# the last fc
e = tf.nn.softmax(pred) - yi
grad_w_out = tf.transpose(fc1) @ e
grad_b_out = tf.reduce_sum(e, axis=0)

# the second last fc
# we use droupout at the last second layer, then we should just update the nodes that are active
e = tf.multiply(e @ tf.transpose(weights['out']), tf.cast(tf.greater(fc1, 0), dtype=tf.float32)) #/ dropout
grad_w_3 = tf.transpose(fc) @ e
grad_b_3 = tf.reduce_sum(e, axis=0)

# the last pooling layer
e = e @ tf.transpose(weights['wd1'])
e = tf.reshape(e, pool2.get_shape().as_list())

# the last conv layer
# unpooling get error from pooling layer
e = func.error_pooling2conv(e, mask2)

# multiply with the derivative of the active function on the conv layer
#     this one is also important this is a part from the upsampling, but 
e = tf.multiply(e, tf.cast(tf.greater(conv2, 0), dtype=tf.float32))
temp1, temp2 = func.filter_gradient(e, pool1, conv2)
grad_k_2 = temp1
grad_b_2 = temp2

# conv to pool
e = func.error_conv2pooling(e, weights['wc2'])

# pool to the first conv
e = func.error_pooling2conv(e, mask1)
e = tf.multiply(e, tf.cast(tf.greater(conv1, 0), dtype=tf.float32))
temp1, temp2 = func.filter_gradient(e, inputx, conv1)
grad_k_1 = temp1
grad_b_1 = temp2
    
    

# gradient
gv2 = [(grad_k_1, weights['wc1']), (grad_k_2, weights['wc2']), 
       (grad_w_3 / batch_size, weights['wd1']), (grad_w_out / batch_size, weights['out']),
       (grad_b_1, biases['bc1']), (grad_b_2, biases['bc2']), 
       (grad_b_3 / batch_size, biases['bd1']), (grad_b_out / batch_size, biases['out'])]
# optimizer = opt.apply_gradients(gv)

In [8]:
# Initializing the variables
init = tf.global_variables_initializer()
# f = open('output.txt', 'w')
# Launch the graph 
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#     while step < 2:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
#         ret1 all from tf.  gv1 all from mime, gv2 half half
        ret1, ret2, ret3 = sess.run([gv, gv1, gv2], feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
#         conv, pool, ee4, ee3 = sess.run([conv2, pool2, e4, e3], feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
#         if step % display_step == 0:
#             # Calculate batch loss and accuracy
#             acc = sess.run(accuracy,feed_dict={x: batch_x,
#                                                           y: batch_y,
#                                                           keep_prob: 1.})
#             print("Iter " + str(step*batch_size) + "\nTraining Accuracy= " + "{:.5f}".format(acc))
        print('difference between tf and mine')
        for i, j, k in zip(ret1, ret2, ret3):
            print(np.max(np.abs(i[0] - j[0])), np.max(np.abs(j[0] - k[0])), np.max(np.abs(i[0] - k[0])))
        step += 1
    print("Optimization Finished!")

difference between tf and mine
0.000320435 0.000915527 0.000976562
0.000183105 0.000213623 0.000244141
0.0 0.0 0.0
0.0 0.0 0.0
0.00341797 0.00610352 0.00537109
2.57492e-05 0.00012207 0.000106812
0.0 0.0 0.0
0.0 0.0 0.0
difference between tf and mine
0.000366211 0.000732422 0.000610352
0.000183105 0.000183105 0.000221252
0.0 0.0 0.0
0.0 0.0 0.0
0.00244141 0.00634766 0.00488281
3.05176e-05 4.57764e-05 6.10352e-05
0.0 0.0 0.0
0.0 0.0 0.0
difference between tf and mine
0.000488281 0.00109863 0.000976562
0.000152588 0.00018692 0.000183105
0.0 0.0 0.0
0.0 0.0 0.0
0.00244141 0.00341797 0.00439453
2.28882e-05 6.10352e-05 6.10352e-05
0.0 0.0 0.0
0.0 0.0 0.0
difference between tf and mine
0.000488281 0.000854492 0.000854492
0.000183105 0.000244141 0.000183105
0.0 0.0 0.0
0.0 0.0 0.0
0.00244141 0.00439453 0.00317383
3.05176e-05 6.10352e-05 6.10352e-05
0.0 0.0 0.0
0.0 0.0 0.0
difference between tf and mine
0.000488281 0.000854492 0.000854492
0.000244141 0.000183105 0.000183105
0.0 0.0 0.0
0.0 0.0 

KeyboardInterrupt: 

In [11]:
ret1[0][0][:,:,0,0]

array([[  236.395,    67.118,   245.784,   362.196,   363.377],
       [  158.298,   -35.431,   132.62 ,   360.272,   310.257],
       [  864.503,   688.634,   865.075,  1098.375,   780.748],
       [ 1570.123,  2003.334,  1882.387,  1476.438,  1155.729],
       [ 1494.532,  1964.872,  1653.557,  1356.253,   725.913]], dtype=float32)

In [12]:
ret2[0][0][:,:,0,0]

array([[  236.395,    67.118,   245.784,   362.196,   363.377],
       [  158.298,   -35.431,   132.619,   360.272,   310.257],
       [  864.502,   688.634,   865.075,  1098.375,   780.748],
       [ 1570.122,  2003.334,  1882.387,  1476.438,  1155.729],
       [ 1494.532,  1964.872,  1653.557,  1356.253,   725.913]], dtype=float32)

In [18]:
print('difference between tf and mine')
for i, j in zip(ret1, ret2):
    print(np.sum(np.abs(i[0] - j[0])), np.sum(np.abs(i[1] - j[1])))
# print('difference between tf and half')
# for i, k in zip(ret1, ret3):
#     print(np.max(i[0] - k[0]), np.sum(np.abs(i[1] - k[1])))
# print('difference between mine and half')
# for j, k in zip(ret2, ret3):
#     print(np.max(j[0] - k[0]), np.sum(np.abs(j[1] - k[1])))
# print('difference between tf and tf')
# for i, l in zip(ret1, ret4):
#     print(np.max(i[0] - l[0]), np.sum(np.abs(i[1] - l[1])))

difference between tf and mine
0.060152 0.0
0.292672 0.0
0.0 0.0
0.0 0.0
0.0168953 0.0
0.0 0.0
0.0 0.0
0.0 0.0


In [29]:
with open('output.txt', 'w') as f:
    f.write('conv2 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, conv[0,:,:,0], delimiter=', ',fmt="%.2f")
    
with open('output.txt', 'a') as f:
    f.write('\npool2 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, pool[0,:,:,0], delimiter=', ',fmt="%.2f")

with open('output.txt', 'a') as f:
    f.write('\nerror4 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, ee4[0,:,:,0], delimiter=', ',fmt="%.2f")
with open('output.txt', 'a') as f:
    f.write('\nerror3 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, ee3[0,:,:,0], delimiter=', ',fmt="%.2f")

In [3]:
tt = np.array([range(1, 65)])
tt = np.reshape(tt, [2, 2, 4,4], order='C')
tt = np.transpose(tt, [0, 2,3,1])

tt[0,0,3,0] = 3
tt[0,2,0,0] = 13
tt[0,2,1,0] = 14
tt[0,2,3,0] = 11
tt[0,3,2,0] = 11
tt[0,3,3,0] = 11
tt[0,0,0,1] = 18
tt[0,1,0,1] = 18
tt[0,1,1,1] = 18
tt[0,0,2,1] = 23
tt[0,0,3,1] = 23
tt[0,2,0,1] = 30

x = tf.constant(tt, dtype=tf.float32)
p = func.extract_patches(x, "VALID", 2, 2)
pool1, mask = func.max_pool_with_mask(p=p)
pool2 = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1],padding='VALID')

# x = tf.reshape(x, [4,4])
with tf.Session() as sess:
    retx, retp, retm = sess.run([x, pool1, mask])

In [4]:
retp[0,:,:,0]

array([[  6.,   8.],
       [ 14.,  11.]], dtype=float32)

In [6]:
retm[0,1,0,:,0]

array([ 0. ,  0.5,  0. ,  0.5], dtype=float32)