In [1]:
import tensorflow as tf
import numpy as np

from tensorflow.examples.tutorials.mnist import input_data
LOCAL_FOLDER = "G:/notebook/MNIST_data/"
data = input_data.read_data_sets(LOCAL_FOLDER, one_hot=True, validation_size=0)

Extracting G:/notebook/MNIST_data/train-images-idx3-ubyte.gz
Extracting G:/notebook/MNIST_data/train-labels-idx1-ubyte.gz
Extracting G:/notebook/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting G:/notebook/MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
np.random.seed(10)
tf.set_random_seed(10)

class MLP(object):
    def __init__(self, input, in_dim, out_dim, act=tf.nn.relu, vn=('W', 'b'), keep_prob=1.0):
        self.input = input
        
        W = tf.Variable(
            tf.random_uniform(
                [in_dim, out_dim], 
                maxval = tf.sqrt(6.0) / tf.sqrt(float(out_dim+in_dim)),
                seed = 10
            ), name=vn[0]
        )
        
        b = tf.Variable(tf.zeros([out_dim]), name=vn[1])
        
        self.W = W
        self.b = b
        self.params = [self.W, self.b]
        self.act = act
        self.keep_prob = keep_prob
    
    def output(self):
        y = tf.matmul(self.input, self.W) + self.b
        y = tf.nn.dropout(y, self.keep_prob)
        self.output = self.act(y)
        
        return self.output

class conv2d(object):
    def __init__(self, input, in_height, in_width, in_channels, out_channels, 
                 k=1, act=tf.nn.relu, vn=('W', 'b'), keep_prob=1.0):
        self.input = input
        
        W = tf.Variable(
            tf.random_uniform(
                [in_height, in_width, in_channels, out_channels], 
                maxval = tf.sqrt(6.0) / tf.sqrt(float(in_height*in_width*in_channels + out_channels)),
                seed = 10
            ), name=vn[0]
        )
        
        b = tf.Variable(tf.zeros([out_channels]), name=vn[1])
        
        self.W = W
        self.b = b
        self.params = [self.W, self.b]
        self.act = act
        self.strides = k
        self.keep_prob = keep_prob
        
    def output(self): 
        y1 = tf.nn.conv2d(self.input, self.W, strides=[1, self.strides, self.strides, 1], padding='SAME')
        y2 = tf.nn.bias_add(y1, self.b)
        y2 = tf.nn.dropout(y2, self.keep_prob)
        return tf.nn.relu(y2)


class maxpool2d(object):
    def __init__(self, input, k=2):
        self.input = input
        self.strides = k
        
    def output(self): 
        return tf.nn.max_pool(self.input, ksize=[1, self.strides, self.strides, 1], 
                              strides=[1, self.strides, self.strides, 1],
                          padding='SAME')


In [3]:
dropout = tf.placeholder(tf.float32)
input = tf.placeholder(tf.float32, [None, 784], name="input")
targets = tf.placeholder(tf.float32, [None, 10], name="targets")

x = tf.reshape(input, shape=[-1, 28, 28, 1])

conv1 = conv2d(x, 5, 5, 1, 32, keep_prob=1.0 - dropout)
conv1pool = maxpool2d(conv1.output(), k=2)

#h1 = conv1pool.output()
#conv2 = conv2d(h1, 5, 5, 4, 8, keep_prob=1.0 - dropout)
#conv2pool = maxpool2d(conv2.output(), k=2)

h2 = tf.reshape(conv1pool.output(), [-1, 14*14*32])
mlp1 = MLP(h2, 14*14*32, 200, keep_prob = 1 - dropout)

h3 = mlp1.output()
mlp2 = MLP(h3, 200, 10, act=tf.identity, keep_prob = 1)

predict = mlp2.output()


cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        labels=targets, logits=predict
    )
)

loss = cross_entropy + 1e-6 * (tf.reduce_sum(tf.abs(conv1.W)) 
                               + tf.reduce_sum(tf.abs(mlp1.W))+ tf.reduce_sum(tf.abs(mlp2.W)) )

train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) 
#tf.train.GradientDescentOptimizer(0.1).minimize(loss)

correct_prediction = tf.equal(tf.argmax(predict, 1), tf.argmax(targets, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
from matplotlib import pyplot

steps = []
accuracies = []

# creating session
sess = tf.InteractiveSession()

# initializing trainable variables
sess.run(tf.global_variables_initializer())

# training loop
for step in range(1000):
    # fetching next batch of training data
    batch_xs, batch_ys = data.train.next_batch(100)

    if step % 100 == 0:
        # reporting current accuracy of the model on every 100th batch
        batch_accuracy = sess.run(accuracy, feed_dict={input: batch_xs, targets: batch_ys, dropout: 0.0})
        print("{0}:\tbatch accuracy {1:.2f}".format(step, batch_accuracy))
        steps.append(step/100)
        accuracies.append(batch_accuracy)

    # running the training step with the fetched batch
    sess.run(train_step, feed_dict={input: batch_xs, targets: batch_ys, dropout: 0.25})

pyplot.plot(steps, accuracies, 'k-', linewidth=2.0, color='green')
pyplot.show()   

0:	batch accuracy 0.11
100:	batch accuracy 0.70
200:	batch accuracy 0.78
300:	batch accuracy 0.73


In [22]:
# evaluating model prediction accuracy of the model on the test set
test_accuracy = sess.run(accuracy, feed_dict={input: data.test.images, targets: data.test.labels, dropout: 0.0})


print("-------------------------------------------------")
print("Test set accuracy: {0:.4f}".format(test_accuracy))

-------------------------------------------------
Test set accuracy: 0.9479
