In [1]:
from utils import *

Leaky ReLUs solve the dead neuron problem of conventional ReLUs. The Leaky ReLU function has a non-zero differential for inputs less than 0, so it still can learn, even if its value is 0 or less. 

Here we make a custom ReLU function and make a simple DNN to use on mnist.

In [2]:
# load data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/")

Extracting tmp/train-images-idx3-ubyte.gz
Extracting tmp/train-labels-idx1-ubyte.gz
Extracting tmp/t10k-images-idx3-ubyte.gz
Extracting tmp/t10k-labels-idx1-ubyte.gz


In [3]:
def leaky_relu(z, alpha=0.01, name=None):
    return tf.maximum(alpha * z, z, name=name)

In [4]:
# construction

reset_graph()

n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

learning_rate= 0.01

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu , name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu , name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy, name="loss")   
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, k=1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()

In [5]:
# execution

n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        if epoch % 5 == 0 or epoch == n_epochs - 1:
            acc_train = accuracy.eval(feed_dict={X:mnist.train.images, y:mnist.train.labels})
            acc_val = accuracy.eval(feed_dict={X:mnist.validation.images, y:mnist.validation.labels})
            print(epoch, "train acc:", acc_train, "val acc:", acc_val)
    acc_test = accuracy.eval(feed_dict={X:mnist.test.images, y:mnist.test.labels})
    print("Test acc:", acc_test)

0 train acc: 0.896636 val acc: 0.9044
5 train acc: 0.946309 val acc: 0.951
10 train acc: 0.963909 val acc: 0.9666
15 train acc: 0.973909 val acc: 0.972
20 train acc: 0.979655 val acc: 0.9748
25 train acc: 0.984873 val acc: 0.9764
30 train acc: 0.988218 val acc: 0.978
35 train acc: 0.9904 val acc: 0.9792
39 train acc: 0.992091 val acc: 0.98
Test acc: 0.9774
