In [1]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
import functions as func
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 16
# mini_batch = 16
# train_iter = batch_size//mini_batch
display_step = 10

# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}
para = [weights['wc1'], weights['wc2'], weights['wd1'], weights['out'], biases['bc1'], biases['bc2'], biases['bd1'], biases['out']]



In [4]:
# ------------------------------define graph------------------------------------
# Reshape input picture
inputx = tf.reshape(x, shape=[batch_size, 28, 28, 1])
yi = y

# ------------------------------The tf defined network--------------------------
# conv11 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
# pool11 = tf.nn.max_pool(conv11, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
# conv22 = func.conv2d(pool11, weights['wc2'], biases['bc2'])
# pool22 = tf.nn.max_pool(conv22, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
# fcc = tf.reshape(pool22, [-1, weights['wd1'].get_shape().as_list()[0]])
# fc11 = tf.add(tf.matmul(fcc, weights['wd1']), biases['bd1'])
# fc11 = tf.nn.relu(fc11)

# pred1 = tf.add(tf.matmul(fc11, weights['out']), biases['out'])

# ------------------------------self defined network-----------------------------
# Convolution Layer
conv1 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
# Pooling (down-sampling)
p1 = func.extract_patches(conv1, 'SAME', 2, 2)
f1 = func.majority_frequency(p1)
#     maxpooling
# pool1, mask1 = func.weight_pool_with_mask(p1, f1, pool_fun=func.majority_pool_with_mask, reduce_fun=tf.reduce_max)
pool1, mask1 = func.max_pool_with_mask(p1)

# Convolution Layer
conv2 = func.conv2d(pool1, weights['wc2'], biases['bc2'])
#     Pooling (down-sampling)
p2 = func.extract_patches(conv2, 'SAME', 2, 2)
f2 = func.majority_frequency(p2)
#     maxpooling
# pool2, mask2 = func.weight_pool_with_mask(p2, f2, pool_fun=func.majority_pool_with_mask, reduce_fun=tf.reduce_max)
pool2, mask2 = func.max_pool_with_mask(p2)

# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
# fc1 = tf.nn.dropout(fc1, dropout)

# Output, class prediction
pred = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
correct_pred = tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(yi, 1)), dtype=tf.float32)



# ------------------------------define graph------------------------------------
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
gv = opt.compute_gradients(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)))
# ------------------------------define gradient descent-------------------------

# the last fc
e = tf.nn.softmax(pred) - yi
grad_w_out = tf.transpose(fc1) @ e
grad_b_out = tf.reduce_sum(e, axis=0)

# the second last fc
# we use droupout at the last second layer, then we should just update the nodes that are active
e = tf.multiply(e @ tf.transpose(weights['out']), tf.cast(tf.greater(fc1, 0), dtype=tf.float32)) #/ dropout
grad_w_3 = tf.transpose(fc) @ e
grad_b_3 = tf.reduce_sum(e, axis=0)

# the last pooling layer
e = e @ tf.transpose(weights['wd1'])
e = tf.reshape(e, pool2.get_shape().as_list())

# the last conv layer
# unpooling get error from pooling layer
e = func.error_pooling2conv(e, mask2)

# multiply with the derivative of the active function on the conv layer
#     this one is also important this is a part from the upsampling, but 
e = tf.multiply(e, tf.cast(tf.greater(conv2, 0), dtype=tf.float32))
temp1, temp2 = func.filter_gradient(e, pool1, conv2)
grad_k_2 = temp1
grad_b_2 = temp2

# conv to pool
e = func.error_conv2pooling(e, weights['wc2'])

# pool to the first conv
e = func.error_pooling2conv(e, mask1)
e = tf.multiply(e, tf.cast(tf.greater(conv1, 0), dtype=tf.float32))
temp1, temp2 = func.filter_gradient(e, inputx, conv1)
grad_k_1 = temp1
grad_b_1 = temp2
    
    

# gradient
gv1 = [(grad_k_1, weights['wc1']), (grad_k_2, weights['wc2']), 
       (grad_w_3 / batch_size, weights['wd1']), (grad_w_out / batch_size, weights['out']),
       (grad_b_1, biases['bc1']), (grad_b_2, biases['bc2']), 
       (grad_b_3 / batch_size, biases['bd1']), (grad_b_out / batch_size, biases['out'])]
optimizer = opt.apply_gradients(gv)

In [5]:
# Initializing the variables
init = tf.global_variables_initializer()
# f = open('output.txt', 'w')
# Launch the graph 
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#     while step < 2:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
#         ret1 all from tf.  gv1 all from mime, gv2 half half
        ret1, ret2, optt = sess.run([gv, gv1, optimizer], feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
#         conv, pool, ee4, ee3 = sess.run([conv2, pool2, e4, e3], feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
#         if step % display_step == 0:
#             # Calculate batch loss and accuracy
#             acc = sess.run(accuracy,feed_dict={x: batch_x,
#                                                           y: batch_y,
#                                                           keep_prob: 1.})
#             print("Iter " + str(step*batch_size) + "\nTraining Accuracy= " + "{:.5f}".format(acc))
        print('difference between tf and mine')
        for i, j in zip(ret1, ret2):
            print(np.sum(np.abs(i[0] - j[0])), np.sum(np.abs(i[1] - j[1])))
        step += 1
    print("Optimization Finished!")

difference between tf and mine
0.115223 0.0
0.607223 0.0
0.0 0.0
0.0 0.0
0.022274 0.0
0.000829935 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.113621 0.0
0.823597 0.0
0.0 0.0
0.0 0.0
0.0346107 0.0
0.00120926 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.115806 0.0
0.708008 0.0
0.0 0.0
0.0 0.0
0.0287628 0.0
0.00100803 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.108277 0.0
0.622374 0.0
0.0 0.0
0.0 0.0
0.0341339 0.0
0.00102392 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.10813 0.0
0.580852 0.0
0.0 0.0
0.0 0.0
0.0270348 0.0
0.000722528 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0888213 0.0
0.557357 0.0
0.0 0.0
0.0 0.0
0.0258408 0.0
0.000818372 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.100033 0.0
0.565414 0.0
0.0 0.0
0.0 0.0
0.0248566 0.0
0.000751257 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.117157 0.0
0.621402 0.0
0.0 0.0
0.0 0.0
0.0245361 0.0
0.00075984 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.104403 0.0
0.579503 

difference between tf and mine
0.0464619 0.0
0.240482 0.0
0.0 0.0
0.0 0.0
0.00841522 0.0
0.000219882 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0510942 0.0
0.265314 0.0
0.0 0.0
0.0 0.0
0.00457096 0.0
0.000299513 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0497759 0.0
0.253969 0.0
0.0 0.0
0.0 0.0
0.008564 0.0
0.000236273 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0409185 0.0
0.182896 0.0
0.0 0.0
0.0 0.0
0.00501633 0.0
0.000150204 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0565843 0.0
0.342914 0.0
0.0 0.0
0.0 0.0
0.0141907 0.0
0.0004462 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.077767 0.0
0.401038 0.0
0.0 0.0
0.0 0.0
0.01577 0.0
0.000438452 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0457592 0.0
0.240815 0.0
0.0 0.0
0.0 0.0
0.0078907 0.0
0.000231624 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0567477 0.0
0.286421 0.0
0.0 0.0
0.0 0.0
0.00849533 0.0
0.000302553 0.0
0.0 0.0
0.0 0.0
difference between tf and mine
0.0693002 0

KeyboardInterrupt: 

In [11]:
ret1[0][0][:,:,0,0]

array([[  236.395,    67.118,   245.784,   362.196,   363.377],
       [  158.298,   -35.431,   132.62 ,   360.272,   310.257],
       [  864.503,   688.634,   865.075,  1098.375,   780.748],
       [ 1570.123,  2003.334,  1882.387,  1476.438,  1155.729],
       [ 1494.532,  1964.872,  1653.557,  1356.253,   725.913]], dtype=float32)

In [12]:
ret2[0][0][:,:,0,0]

array([[  236.395,    67.118,   245.784,   362.196,   363.377],
       [  158.298,   -35.431,   132.619,   360.272,   310.257],
       [  864.502,   688.634,   865.075,  1098.375,   780.748],
       [ 1570.122,  2003.334,  1882.387,  1476.438,  1155.729],
       [ 1494.532,  1964.872,  1653.557,  1356.253,   725.913]], dtype=float32)

In [18]:
print('difference between tf and mine')
for i, j in zip(ret1, ret2):
    print(np.sum(np.abs(i[0] - j[0])), np.sum(np.abs(i[1] - j[1])))
# print('difference between tf and half')
# for i, k in zip(ret1, ret3):
#     print(np.max(i[0] - k[0]), np.sum(np.abs(i[1] - k[1])))
# print('difference between mine and half')
# for j, k in zip(ret2, ret3):
#     print(np.max(j[0] - k[0]), np.sum(np.abs(j[1] - k[1])))
# print('difference between tf and tf')
# for i, l in zip(ret1, ret4):
#     print(np.max(i[0] - l[0]), np.sum(np.abs(i[1] - l[1])))

difference between tf and mine
0.060152 0.0
0.292672 0.0
0.0 0.0
0.0 0.0
0.0168953 0.0
0.0 0.0
0.0 0.0
0.0 0.0


In [29]:
with open('output.txt', 'w') as f:
    f.write('conv2 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, conv[0,:,:,0], delimiter=', ',fmt="%.2f")
    
with open('output.txt', 'a') as f:
    f.write('\npool2 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, pool[0,:,:,0], delimiter=', ',fmt="%.2f")

with open('output.txt', 'a') as f:
    f.write('\nerror4 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, ee4[0,:,:,0], delimiter=', ',fmt="%.2f")
with open('output.txt', 'a') as f:
    f.write('\nerror3 value\n')
with open('output.txt', 'ab') as f:
    np.savetxt(f, ee3[0,:,:,0], delimiter=', ',fmt="%.2f")

In [3]:
tt = np.array([range(1, 65)])
tt = np.reshape(tt, [2, 2, 4,4], order='C')
tt = np.transpose(tt, [0, 2,3,1])

tt[0,0,3,0] = 3
tt[0,2,0,0] = 13
tt[0,2,1,0] = 14
tt[0,2,3,0] = 11
tt[0,3,2,0] = 11
tt[0,3,3,0] = 11
tt[0,0,0,1] = 18
tt[0,1,0,1] = 18
tt[0,1,1,1] = 18
tt[0,0,2,1] = 23
tt[0,0,3,1] = 23
tt[0,2,0,1] = 30

x = tf.constant(tt, dtype=tf.float32)
p = func.extract_patches(x, "VALID", 2, 2)
pool1, mask = func.max_pool_with_mask(p=p)
pool2 = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1],padding='VALID')

# x = tf.reshape(x, [4,4])
with tf.Session() as sess:
    retx, retp, retm = sess.run([x, pool1, mask])

In [4]:
retp[0,:,:,0]

array([[  6.,   8.],
       [ 14.,  11.]], dtype=float32)

In [6]:
retm[0,1,0,:,0]

array([ 0. ,  0.5,  0. ,  0.5], dtype=float32)