In [1]:
## referenced from 
## https://blog.metaflow.fr/sparse-coding-a-simple-exploration-152a3c900a7c#.uo3kxjenw
import time, os

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

dir = os.getcwd()
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# Fully connected model
# Number of parameters: (784 * 784 + 784) + (784 * 10 + 10) = 615440 + 7850 = 623290
# Dimensionality: R^784 -> R^784 -> R^10

# Placeholder
x = tf.placeholder(tf.float32, shape=[None, 784])
y_true = tf.placeholder(tf.float32, shape=[None, 10])

sparsity_constraint = tf.placeholder(tf.float32)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
with tf.variable_scope('NeuralLayer'):
    W = tf.get_variable('W', shape=[784, 784], initializer=tf.random_normal_initializer(stddev=1e-1))
    b = tf.get_variable('b', shape=[784], initializer=tf.constant_initializer(0.1))

    z = tf.matmul(x, W) + b
    a = tf.nn.relu(z)

    # We graph the average density of neurons activation
    average_density = tf.reduce_mean(tf.reduce_sum(tf.cast((a > 0), tf.float32), axis=[1]))
    tf.summary.scalar('AverageDensity', average_density)

with tf.variable_scope('SoftmaxLayer'):
    W_s = tf.get_variable('W_s', shape=[784, 10], initializer=tf.random_normal_initializer(stddev=1e-1))
    b_s = tf.get_variable('b_s', shape=[10], initializer=tf.constant_initializer(0.1))

    out = tf.matmul(a, W_s) + b_s
    y = tf.nn.relu(out)

with tf.variable_scope('Loss'):
    epsilon = 1e-7 # After some training, y can be 0 on some classes which lead to NaN 
    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y)
    # We add our sparsity constraint on the activations
    cross_entropy = tf.reduce_mean(diff)
    loss = cross_entropy + sparsity_constraint * tf.reduce_sum(a)

    tf.summary.scalar('loss', loss) # Graph the loss

summaries = tf.summary.merge_all() # This is convenient

with tf.variable_scope('Accuracy'):
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    acc_summary = tf.summary.scalar('accuracy', accuracy) 

In [3]:
# Training
adam = tf.train.AdamOptimizer(learning_rate=1e-3)
train_op = adam.minimize(loss)
sess = None
# We iterate over different sparsity constraint
for sc in [0, 1e-4, 5e-4, 1e-3, 2.7e-3]:
    result_folder = dir + '/results/' + str(int(time.time())) + '-fc-sc' + str(sc)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        sw = tf.summary.FileWriter(result_folder, sess.graph)
        
        for i in range(20000):
            batch = mnist.train.next_batch(100)
            current_loss, summary, _ = sess.run([loss, summaries, train_op], feed_dict={
                x: batch[0],
                y_true: batch[1],
                sparsity_constraint: sc
            })
            sw.add_summary(summary, i + 1)

            if (i + 1) % 100 == 0:
                acc, acc_sum = sess.run([accuracy, acc_summary], feed_dict={
                    x: mnist.test.images, 
                    y_true: mnist.test.labels
                })
                sw.add_summary(acc_sum, i + 1)
                print('batch: %d, loss: %f, accuracy: %f' % (i + 1, current_loss, acc))

batch: 100, loss: 0.422984, accuracy: 0.830300
batch: 200, loss: 0.475206, accuracy: 0.845500
batch: 300, loss: 0.555954, accuracy: 0.857900
batch: 400, loss: 0.439589, accuracy: 0.858900
batch: 500, loss: 0.450527, accuracy: 0.864400
batch: 600, loss: 0.381175, accuracy: 0.869300
batch: 700, loss: 0.451939, accuracy: 0.874800
batch: 800, loss: 0.393449, accuracy: 0.876000
batch: 900, loss: 0.361880, accuracy: 0.877400
batch: 1000, loss: 0.421486, accuracy: 0.877400
batch: 1100, loss: 0.312802, accuracy: 0.878200
batch: 1200, loss: 0.394193, accuracy: 0.881100
batch: 1300, loss: 0.073932, accuracy: 0.968900
batch: 1400, loss: 0.116551, accuracy: 0.969100
batch: 1500, loss: 0.129034, accuracy: 0.970600
batch: 1600, loss: 0.145322, accuracy: 0.973300
batch: 1700, loss: 0.027795, accuracy: 0.976600
batch: 1800, loss: 0.058118, accuracy: 0.975300
batch: 1900, loss: 0.096179, accuracy: 0.973400
batch: 2000, loss: 0.094231, accuracy: 0.977700
batch: 2100, loss: 0.016156, accuracy: 0.976100
b