In [38]:
import tensorflow as tf
import functions as func
from matplotlib import pyplot as plt
import numpy as np
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

In [39]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [40]:
# Parameters
learning_rate = 0.001
training_iters = 300000
batch_size = 64
display_step = 10

In [41]:
# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

In [57]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [56]:
# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    
    x = tf.reshape(x, shape=[64, 28, 28, 1])

    # Convolution Layer
    conv1 = func.conv2d(x, weights['wc1'], biases['bc1'])
    # Pooling (down-sampling)
    p = func.extract_patches(conv1, 'SAME', 2, 2)
    f = func.majority_frequency(p)
#     maxpooling
    maxpool = func.max_pool(p)

    # Convolution Layer
    conv2 = func.conv2d(maxpool, weights['wc2'], biases['bc2'])
#     Pooling (down-sampling)
    p = func.extract_patches(conv2, 'SAME', 2, 2)
    f = func.majority_frequency(p)
#     maxpooling
    maxpool = func.max_pool(p)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(maxpool, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    ofc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(ofc1, weights['out']), biases['out'])
    return ofc1, fc1, out

In [79]:
# Construct model
# ofc1, fc1, pred = conv_net(x, weights, biases, keep_prob)


# ------------------------------define graph------------------------------------
# Reshape input picture
inputx = tf.reshape(x, shape=[batch_size, 28, 28, 1])
# Convolution Layer
conv1 = func.conv2d(inputx, weights['wc1'], biases['bc1'])
# Pooling (down-sampling)
p1 = func.extract_patches(conv1, 'SAME', 2, 2)
f1 = func.majority_frequency(p1)
#     maxpooling
pool1 = func.max_pool(p1)

# Convolution Layer
conv2 = func.conv2d(pool1, weights['wc2'], biases['bc2'])
#     Pooling (down-sampling)
p2 = func.extract_patches(conv2, 'SAME', 2, 2)
f2 = func.majority_frequency(p2)
#     maxpooling
pool2 = func.max_pool(p2)

# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
ofc1 = tf.nn.dropout(fc1, dropout)

# Output, class prediction
pred = tf.add(tf.matmul(ofc1, weights['out']), biases['out'])

# ------------------------------define graph------------------------------------

# -----------------------------Define loss and optimizer------------------------
varList = [weights['wd1'], weights['out'], biases['bd1'], biases['out'], weights['wc2'], biases['bc2']]
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
gv = opt.compute_gradients(loss=cost, var_list=varList)


# ------------------------------define gradient descent-------------------------

# the last fc
sopred = tf.nn.softmax(pred)
e1 = sopred - y
grad_w_out = tf.transpose(ofc1) @ e1 / batch_size
grad_b_out = tf.reduce_sum(e1, axis=0) / batch_size

# the second last fc
drv = tf.cast(tf.greater(ofc1, 0), dtype=tf.float32)
# we use droupout at the last second layer, then we should just update the nodes that are active
e2 = tf.multiply(e1 @ tf.transpose(weights['out']), drv) / dropout
grad_w_3 = tf.transpose(fc) @ e2 / batch_size
grad_b_3 = tf.reduce_sum(e2, axis=0) / batch_size

# the last pooling layer
e3 = e2 @ tf.transpose(weights['wd1'])
e3 = tf.reshape(e3, pool2.get_shape().as_list())

# the last conv layer
[N, H, W, K, C] = p2.get_shape().as_list()
ppool2 = tf.reshape(pool2, [N, H, W, 1, C])
mark2 = tf.cast(tf.equal(p2, ppool2), dtype=tf.float32)
e4 = tf.multiply(mark2, tf.reshape(e3, [N, H, W, 1, C]))
e4 = tf.reshape(e4, conv2.get_shape().as_list())

e4 = func.extract_patches(e4, 'SAME', 2, 2)
e4 = tf.reshape(e4, conv2.get_shape().as_list())
e4 = tf.multiply(e4, tf.cast(tf.greater(conv2, 0), dtype=tf.float32))

[N, H, W, C] = conv2.get_shape().as_list()
print([N, H, W, C])
ppool1 = tf.pad(pool1, tf.constant([[0,0],[2,2],[2,2],[0,0]]))
pc1c2 = func.extract_patches(ppool1, 'VALID', H, 1)

# nhwkc
pc1c2 = tf.reshape(pc1c2, [N, 5, 5, H * H, int(C/2), 1])
grad_k_2 = tf.reduce_sum(tf.multiply(pc1c2, tf.reshape(e4, [N, 1, 1, H *H, 1, C])), axis=3)
grad_k_2 = tf.reduce_sum(grad_k_2, axis=0) / batch_size
grad_b_2 = tf.reduce_sum(e4, axis=[0,1,2]) / batch_size





# optimizer = opt.apply_gradients(grads_and_vars=gv)
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
# Diff1, Diff2, Out = test_conv_net(x, weights, biases, keep_prob)
# lost = []
# for temp in Out:
#     lost.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=temp, labels=y)))

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


[64, 14, 14, 4, 32]
[64, 7, 7, 4, 64]
[64, 14, 14, 64]


In [80]:
# Initializing the variables
init = tf.global_variables_initializer()
# f = open('output.txt', 'w')
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
#     while step * batch_size < training_iters:
    while step < 2:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
#         e = sess.run(e3, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        grad, wo, bo, w3, b3, k2, b2 = sess.run([gv, grad_w_out, grad_b_out, grad_w_3, grad_b_3, grad_k_2, grad_b_2], feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            cos, acc = sess.run([cost, accuracy], 
                                               feed_dict={x: batch_x,
                                                          y: batch_y,
                                                          keep_prob: 1.})
            print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(cos) + 
                  "\nTraining Accuracy= " + "{:.5f}".format(acc))
        step += 1
    print("Optimization Finished!")
    
#     Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:64], y: mnist.test.labels[:64],
keep_prob: 1.}))

Optimization Finished!
Testing Accuracy: 0.03125


In [81]:
np.max(wo - grad[1][0]) - np.min(wo - grad[1][0])

0.0

In [82]:
np.max(bo - grad[3][0]) - np.min(bo - grad[3][0])

0.0

In [83]:
np.max(w3 - grad[0][0]) - np.min(w3 - grad[0][0])

0.0

In [84]:
np.max(b3 - grad[2][0]) - np.min(b3 - grad[2][0])

0.0

In [85]:
grad[4][0][:,:,0,0]

array([[  7.113,   3.941,   1.01 ,   1.72 ,   1.329],
       [  3.869,   4.16 ,   6.256,   3.623,   2.559],
       [ -0.946,  -0.315,   7.511,  10.412,   5.467],
       [  2.677,  -0.286,   3.114,   4.576,  -3.163],
       [  1.466,  -4.366,  -6.594,  -1.089,  -0.11 ]], dtype=float32)

In [86]:
k2[:,:,0,0]

array([[-12.055,  -9.243,  -2.044,  -0.433,  -1.15 ],
       [ -3.359,  -8.579, -10.623,  -0.646,   8.339],
       [  0.018,  -0.573, -10.659,  -6.346,   2.788],
       [ -4.544,  -3.988, -10.051,  -9.829,  -5.183],
       [  2.72 ,   8.593,  12.997,   6.47 ,  -2.174]], dtype=float32)

In [87]:
grad[5][0]

array([   8.486,   32.147,   32.594,   18.176,   19.368,   37.689,
          3.461,    3.161,   36.679,   46.331,   35.181,  -17.983,
        -26.24 ,  -17.523,   22.676,   11.221,  -15.314,  -27.324,
         18.39 ,   -6.823, -112.775,  -39.195,   69.488,   29.929,
        -52.463,  -19.913,   38.214,    3.871, -108.608,   31.039,
        -11.308,   12.716,   -5.73 ,  142.202,   64.528,    9.899,
          4.524,   76.403,   17.972,    1.867,   52.849,  -16.054,
        145.009,  -36.019,  -51.141,   44.265,   89.041,   24.233,
         40.731,   24.423,   47.278,   49.88 ,   28.943,   31.006,
         19.901,   19.968,   -1.874,  -36.756,   37.426,   85.944,
        -13.443,   36.236,   -7.993,  121.819], dtype=float32)

In [89]:
b2

array([-0.033,  0.13 ,  0.136,  0.29 , -0.019,  0.231,  0.074,  0.022,
        0.205,  0.094,  0.188, -0.427, -0.289, -0.115,  0.145, -0.141,
       -0.035, -0.067, -0.072, -0.23 , -0.203, -0.269,  0.299,  0.296,
       -0.4  ,  0.369, -0.033, -0.292, -0.125, -0.183, -0.156,  0.02 ,
       -0.125,  0.793,  0.4  , -0.108, -0.005,  0.189, -0.643,  0.012,
        0.222, -0.317,  0.744, -0.026, -0.279, -0.004,  0.386, -0.056,
        0.131, -0.234,  0.081,  0.156,  0.253,  0.409, -0.04 ,  0.154,
        0.055, -0.391,  0.101,  0.962, -0.03 , -0.251,  0.186,  0.539], dtype=float32)

In [89]:
tt = np.array([range(1, 65)])
tt = np.reshape(tt, [2, 2, 4,4], order='C')
tt = np.transpose(tt, [0, 2,3,1])

tt[0,0,3,0] = 3
tt[0,2,0,0] = 13
tt[0,2,1,0] = 13
tt[0,2,3,0] = 11
tt[0,3,2,0] = 11
tt[0,3,3,0] = 11
tt[0,0,0,1] = 18
tt[0,1,0,1] = 18
tt[0,1,1,1] = 18
tt[0,0,2,1] = 23
tt[0,0,3,1] = 23
tt[0,2,0,1] = 30

x = tf.constant(tt, dtype=tf.float32)
p = func.extract_patches(x, "VALID", 2, 2)
maxx = func.max_pool(p)
maxx = tf.reshape(maxx, [2,2,2,1,2])
mark = tf.cast(tf.equal(p, maxx), dtype=tf.float32)
mark = tf.multiply(mark, maxx)




p = tf.reshape(mark, x.get_shape().as_list())
p = func.extract_patches(p, "VALID", 2, 2)
p = tf.reshape(p, x.get_shape().as_list())


# x = tf.reshape(x, [4,4])
with tf.Session() as sess:
    retx, retp = sess.run([mark, p])

In [180]:
tt1 = np.array([[[16, 2, 3, 13], [5,11,10,8], [9,7,6,12], [4, 14,15, 1]], [[16, 2, 3, 13], [5,11,10,8], [9,7,6,12], [4, 14,15, 1]]])
fk = np.array([[[0.8,0.1,-0.6], [0.3,0.5,0.7],[-0.4,0,-0.2]],[[0.8,0.1,-0.6], [0.3,0.5,0.7],[-0.4,0,-0.2]]])
fk = np.array([fk, np.flip(fk,axis=1)])
fk = fk.transpose([0,2,3,1])

In [181]:
fk.shape

(2, 3, 3, 2)

In [182]:
tt1 = np.array([tt1, tt2])

In [183]:
tt1 = tt1.transpose([0,2,3,1])

In [184]:
tt1[0,:,:,0]

array([[16,  2,  3, 13],
       [ 5, 11, 10,  8],
       [ 9,  7,  6, 12],
       [ 4, 14, 15,  1]])

In [52]:
x = tf.constant(np.array([[[1],[2],[3]],[[3],[4],[5]]]))
y = tf.constant(np.array([[[2],[4]], [[3], [6]]]))
y = tf.reshape(y, [2,1,1,2])
x = tf.reshape(x, [2,1,3,1])
temp1 = tf.multiply(x, y)

with tf.Session() as sess:
    ret = sess.run(temp1)

In [54]:
ret[0]

array([[[ 2,  4],
        [ 4,  8],
        [ 6, 12]]])

In [91]:
retp[0,:,:,0]

array([[  0.,   0.,   0.,   0.],
       [  0.,   6.,   0.,   8.],
       [  0.,   0.,  11.,  11.],
       [  0.,  14.,  11.,  11.]], dtype=float32)

In [81]:
retx[0,0,0,:,0]

array([ 0.,  0.,  0.,  1.], dtype=float32)