In [1]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding = 'SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

# extract patches from feature maps
# input shape N, H, W, C
# output shape N, H, W, K, C
def extract_patches(x, padding, ksize=2, stride=2):
    temp = tf.extract_image_patches(images=x, ksizes=[1, ksize, ksize, 1], strides=[1, stride, stride, 1], rates=[1,1,1,1], padding=padding)
    [N, H, W, C] = temp.get_shape().as_list()
    C = x.get_shape().as_list()[-1]
#     reshape to N,H,W,K,C
    temp = tf.reshape(temp, [-1, H, W, ksize*ksize, C])
    return temp
# MaxPool
def max_pool(p):
    return tf.reduce_max(p, axis=3)

In [4]:
# pcaPool
# if m == 1, then consider each window as an unique instances, and each window have their own pca encoder
# if m != 1, then all windows fetch from the same feature map share one pca encoder
def pca_pool(temp, m = 1):
    [N, H, W, K, C] = temp.get_shape().as_list()
    if m == 1:
        temp = tf.transpose(temp, [0,1,2,4,3])
        temp = tf.reshape(temp, [-1, K, 1])
    else:
        temp = tf.transpose(temp, [0,4,3,1,2])
        temp = tf.reshape(temp, [-1, K, H*W])
#     compute for svd
    [s, u, v] = tf.svd(tf.matmul(temp, tf.transpose(temp, [0,2,1])), compute_uv=True)
#     use mark to remove Eigenvector except for the first one, which is the main component
    temp_mark = np.zeros([K,K])
    temp_mark[:,0] = 1
    mark = tf.constant(temp_mark, dtype=tf.float32)
    
#     after reduce_sum actually it has been transposed automatically
    u = tf.reduce_sum(tf.multiply(u, mark), axis=2)
    u = tf.reshape(u, [-1, 1, K])
    
    temp = tf.matmul(u, temp)/np.sqrt(K)
    if m == 1: temp = tf.reshape(temp, [-1, H, W, C])
    else: 
        temp = tf.reshape(temp, [-1, C, H, W])
        temp = tf.transpose(temp, [0, 2, 3, 1])
    return temp

In [5]:
# compute the frequency of element in each patch
# input extracted patches tensor in shape N, H, W, K, C
# output frequency tensor in shape N, H, W, K, C
def majority_frequency(temp):
    [N, H, W, K, C] = temp.get_shape().as_list()
    print([N, H, W, K, C])
    temp = tf.to_int32(tf.round(temp))
#     build one hot vector
    temp = tf.transpose(temp, [0,1,2,4,3])
    one_hot = tf.one_hot(indices=temp, depth=tf.reduce_max(temp) + 1, dtype=tf.float32)
#     the dimension is bathch, row, col, lay, one hot
#     the order tensorflow takes, when doiong transpose, it will from the most right to most left
    one_hot = tf.reduce_sum(one_hot, axis=4)
    temp = tf.transpose(temp, [0, 3, 1, 2, 4])
    temp = tf.reshape(temp, [N*H*W*C*K,1])
    one_hot = tf.transpose(one_hot, [0,3,1,2,4])
    one_hot = tf.reshape(one_hot, [N*H*W*C, -1])
    
    index = tf.constant(np.array([range(temp.get_shape().as_list()[0])])/ K, dtype=tf.int32)
    temp = tf.concat((tf.transpose(index), temp), axis=1)
    
#     to get the percentage
    temp = tf.gather_nd(one_hot, temp)
    temp = tf.reshape(temp, [N, C, H, W, K])
#     finally we change it back to N,H,W,K,C
    temp = tf.transpose(temp, [0, 2, 3, 4, 1])
    return temp

def majority_pool(p, f):
    btemp = tf.reduce_max(f , axis=[3], keep_dims=True)
#     get the index of the majority element
    temp = tf.equal(f, btemp)
    temp = tf.to_float(temp)
#     use the largest frequency to represent each window
    btemp = tf.squeeze(btemp, squeeze_dims=3)
#     compute mean of the elements that have same round value in each window
    temp = tf.divide(tf.reduce_sum(tf.multiply(p, temp), axis=[3]), btemp)
#     when the largest frequency is 1, then we just the max value in p as the result, else use the mean of the of elements
#     having the same round value, as the result.
    temp = tf.where(tf.equal(btemp, 1), 
                    tf.reduce_max(p, axis=[3]), temp)
    return temp

In [6]:
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 64
display_step = 10

In [7]:
# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

In [8]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [9]:
# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    
    x = tf.reshape(x, shape=[64, 28, 28, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    p = extract_patches(conv1, 'SAME', 2, 2)
    f = majority_frequency(p)
    temp = majority_pool(p, f)
#     temp = max_pool(p)
    
    # Convolution Layer
    conv2 = conv2d(temp, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    p = extract_patches(conv2, 'SAME', 2, 2)
    f = majority_frequency(p)
    temp = majority_pool(p, f)
#     temp = max_pool(p)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(temp, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return conv1, out

In [10]:
# Construct model
conv1, pred = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

[64, 14, 14, 4, 32]
[64, 7, 7, 4, 64]


In [11]:
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#     while step < 2:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
        ret = sess.run(conv1, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                                              y: batch_y,
                                                              keep_prob: 1.})
            print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + 
                  "\nTraining Accuracy= " + "{:.5f}".format(acc))
        step += 1
        with open('output.txt', 'a') as f:
            f.write('\nmaxpooling value: \n')
        with open('output.txt', 'a') as f:
        #             np.savetxt(f, np.amax(ret), delimiter=', ',fmt="%.2f")
            f.write(str(np.amax(ret)))
    print("Optimization Finished!")

    # Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:64], y: mnist.test.labels[:64],
keep_prob: 1.}))

Iter 640, Minibatch Loss= 27102.486328
Training Accuracy= 0.21875
Iter 1280, Minibatch Loss= 16662.992188
Training Accuracy= 0.28125
Iter 1920, Minibatch Loss= 11796.603516
Training Accuracy= 0.37500
Iter 2560, Minibatch Loss= 8583.090820
Training Accuracy= 0.57812
Iter 3200, Minibatch Loss= 8185.166016
Training Accuracy= 0.59375
Iter 3840, Minibatch Loss= 6372.175781
Training Accuracy= 0.67188
Iter 4480, Minibatch Loss= 5220.750977
Training Accuracy= 0.64062
Iter 5120, Minibatch Loss= 3244.526611
Training Accuracy= 0.71875
Iter 5760, Minibatch Loss= 5117.715820
Training Accuracy= 0.67188
Iter 6400, Minibatch Loss= 2847.759277
Training Accuracy= 0.78125
Iter 7040, Minibatch Loss= 4150.369629
Training Accuracy= 0.76562
Iter 7680, Minibatch Loss= 3805.197754
Training Accuracy= 0.81250
Iter 8320, Minibatch Loss= 4788.616211
Training Accuracy= 0.70312
Iter 8960, Minibatch Loss= 2634.083740
Training Accuracy= 0.75000
Iter 9600, Minibatch Loss= 3271.806641
Training Accuracy= 0.70312
Iter 102

Iter 80000, Minibatch Loss= 226.807968
Training Accuracy= 0.90625
Iter 80640, Minibatch Loss= 318.375549
Training Accuracy= 0.87500
Iter 81280, Minibatch Loss= 503.556030
Training Accuracy= 0.89062
Iter 81920, Minibatch Loss= 547.062866
Training Accuracy= 0.79688
Iter 82560, Minibatch Loss= 278.093658
Training Accuracy= 0.92188
Iter 83200, Minibatch Loss= 219.269684
Training Accuracy= 0.93750
Iter 83840, Minibatch Loss= 204.761444
Training Accuracy= 0.90625
Iter 84480, Minibatch Loss= 398.532654
Training Accuracy= 0.87500
Iter 85120, Minibatch Loss= 112.931953
Training Accuracy= 0.93750
Iter 85760, Minibatch Loss= 191.436707
Training Accuracy= 0.90625
Iter 86400, Minibatch Loss= 397.462860
Training Accuracy= 0.87500
Iter 87040, Minibatch Loss= 482.415649
Training Accuracy= 0.89062
Iter 87680, Minibatch Loss= 274.105347
Training Accuracy= 0.90625
Iter 88320, Minibatch Loss= 255.661591
Training Accuracy= 0.89062
Iter 88960, Minibatch Loss= 364.209595
Training Accuracy= 0.90625
Iter 89600

Iter 158720, Minibatch Loss= 149.010178
Training Accuracy= 0.87500
Iter 159360, Minibatch Loss= 87.766090
Training Accuracy= 0.89062
Iter 160000, Minibatch Loss= 178.346268
Training Accuracy= 0.84375
Iter 160640, Minibatch Loss= 199.677261
Training Accuracy= 0.87500
Iter 161280, Minibatch Loss= 119.617638
Training Accuracy= 0.90625
Iter 161920, Minibatch Loss= 402.946472
Training Accuracy= 0.87500
Iter 162560, Minibatch Loss= 177.471771
Training Accuracy= 0.85938
Iter 163200, Minibatch Loss= 122.215240
Training Accuracy= 0.93750
Iter 163840, Minibatch Loss= 63.368340
Training Accuracy= 0.92188
Iter 164480, Minibatch Loss= 175.078430
Training Accuracy= 0.89062
Iter 165120, Minibatch Loss= 93.536964
Training Accuracy= 0.90625
Iter 165760, Minibatch Loss= 129.919617
Training Accuracy= 0.93750
Iter 166400, Minibatch Loss= 185.176208
Training Accuracy= 0.87500
Iter 167040, Minibatch Loss= 176.432266
Training Accuracy= 0.89062
Iter 167680, Minibatch Loss= 132.851242
Training Accuracy= 0.9375