# MNIST multilayer network

## Exercise - Load and preprocess data

> **Exercise**: Load the MNIST data. Split it into train, validation and test sets. Standardize the images. Define a `get_batches(X, y, batch_size)` function to generate random X/y batches of size `batch_size` using a Python generator.

In [1]:
# define which xk.npz file to load (give the number of k)
number_of_k=6
data_filename='mnist-{}k.npz'.format(number_of_k)
print('Using file:',data_filename)

Using file: mnist-6k.npz


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np


In [3]:
# Load data
with np.load(data_filename, allow_pickle=False) as npz_file:
    # Load items into a dictionary
    mnist = dict(npz_file.items())

In [4]:
# Create train, test and validation sets
X_train, X_test, y_train, y_test = train_test_split(
    # Convert uint8 pixel values to float
    mnist['data'].astype(np.float32),
    mnist['labels'],
    test_size=1000, random_state=0)

X_test, X_valid, y_test, y_valid = train_test_split(
    # Convert uint8 pixel values to float
    X_test,
    y_test,
    test_size=500, random_state=0)

print("Train:", X_train.shape, y_train.shape)
print("Test :", X_test.shape, y_test.shape)
print("Valid:", X_valid.shape, y_valid.shape)

# Rescale train and validation data
scaler = StandardScaler()
X_train_rescaled = scaler.fit_transform(X_train)
X_test_rescaled = scaler.fit_transform(X_test)
X_valid_rescaled = scaler.transform(X_valid)



Train: (5000, 784) (5000,)
Test : (500, 784) (500,)
Valid: (500, 784) (500,)


In [5]:
# Batch generator
def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y)) # 1,2,...,n
    np.random.shuffle(shuffled_idx)

    # Enumerate indexes by steps of batch_size
    # i: 0, b, 2b, 3b, 4b, .. where b is the batch size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]

## Exercise - Create and train a multilayer network

> **Exercise:** Create a multilayer neural network and train it using your batch generator. Evaluate the accuracy on the validation set after each epoch. Test different architectures and parameters. Evaluate your best network on the test set. Save the trained weights of the first fully connected layer in a variable.

In [91]:
# Definition of the different networks to try
neural_networks={
    '2-layer-32': {
        'hidden': [32],
        'graph': None,
        'train_op': None,
    },
    '2-layer-64': {
        'hidden': [64],
        'graph': None,
        'train_op': None,
    }
}

In [95]:
import tensorflow as tf

for network_name in neural_networks.keys():
    print("Working with network:", network_name)
    
    # Redefine the graph
    neural_networks[network_name]['graph'] = tf.Graph()
    
    with neural_networks[network_name]['graph'].as_default():
        # Create placeholders
        X = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='{}-X'.format(network_name))
        y = tf.placeholder(dtype=tf.int32, shape=[None], name='{}-y'.format(network_name))

        i=1
        X_current=X
        for shape in neural_networks[network_name]['hidden']:
            print('  Adding hidden layer with {} neurons'.format(i))
            # Hidden layer with 16 units
            hidden = tf.layers.dense(
                X_current, shape, activation=tf.nn.relu, # ReLU
                kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
                bias_initializer=tf.zeros_initializer(),
                name='{}-hidden-{}'.format(network_name, i)
            )
            i+=1
            X_current=hidden

        # Output layer
        logits = tf.layers.dense(
            hidden, 10, activation=None, # No activation function
            kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
            bias_initializer=tf.zeros_initializer(),
            name='{}-output'.format(network_name)
        )

        # Loss fuction: mean cross-entropy
        mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))

        # Gradient descent
        lr = tf.placeholder(dtype=tf.float32, name='{}-lr'.format(network_name))
        gd = tf.train.GradientDescentOptimizer(learning_rate=lr)

        # Minimize cross-entropy
        train_op = gd.minimize(mean_ce, name='{}-train_op'.format(network_name))

        # Compute predictions and accuracy
        predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
        is_correct = tf.equal(y, predictions)
        accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32), name='{}-accuracy'.format(network_name))
        neural_networks[network_name]['accuracy']=accuracy
        
        print("  Trainable Variables:")
        for v in tf.trainable_variables():
            print("   ",v)
        

Working with network: 2-layer-32
  Adding hidden layer with 1 neurons
  Trainable Variables:
    <tf.Variable '2-layer-32-hidden-1/kernel:0' shape=(784, 32) dtype=float32_ref>
    <tf.Variable '2-layer-32-hidden-1/bias:0' shape=(32,) dtype=float32_ref>
    <tf.Variable '2-layer-32-output/kernel:0' shape=(32, 10) dtype=float32_ref>
    <tf.Variable '2-layer-32-output/bias:0' shape=(10,) dtype=float32_ref>
Working with network: 2-layer-64
  Adding hidden layer with 1 neurons
  Trainable Variables:
    <tf.Variable '2-layer-64-hidden-1/kernel:0' shape=(784, 64) dtype=float32_ref>
    <tf.Variable '2-layer-64-hidden-1/bias:0' shape=(64,) dtype=float32_ref>
    <tf.Variable '2-layer-64-output/kernel:0' shape=(64, 10) dtype=float32_ref>
    <tf.Variable '2-layer-64-output/bias:0' shape=(10,) dtype=float32_ref>


In [108]:
# Validation accuracy
valid_acc_values = []

network_name='2-layer-64'

graph=neural_networks[network_name]['graph']

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())

    # Set seed
    np.random.seed(0)

    # Get variables
    X=graph.get_tensor_by_name('{}-X:0'.format(network_name))
    y=graph.get_tensor_by_name('{}-y:0'.format(network_name))
    lr=graph.get_tensor_by_name('{}-lr:0'.format(network_name))
    
    train_op=graph.get_operation_by_name('{}-train_op'.format(network_name))
    accuracy=graph.get_operation_by_name('{}-accuracy'.format(network_name))

    print(train_op.values)
    
    # Train several epochs
    for epoch in range(50):
        # Accuracy values (train) after each batch
        batch_acc = []

        # Get batches of data
        for X_batch, y_batch in get_batches(X_train_rescaled, y_train, 32):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.001 # Learning rate
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)

        # Evaluate validation accuracy
        valid_acc = sess.run(accuracy, feed_dict={
            X: X_valid_rescaled,
            y: y_valid
        })
        valid_acc_values.append(valid_acc)
        print(valid_acc)

        # Print progress
        print('Epoch {} - valid: {:.3f} train: {:.3f} (mean)'.format(
            epoch+1,
            2, 3
            #np.mean(batch_acc)
        ))


<bound method Operation.values of <tf.Operation '2-layer-64-train_op' type=NoOp>>
None
Epoch 1 - valid: 2.000 train: 3.000 (mean)
None
Epoch 2 - valid: 2.000 train: 3.000 (mean)
None
Epoch 3 - valid: 2.000 train: 3.000 (mean)
None
Epoch 4 - valid: 2.000 train: 3.000 (mean)
None
Epoch 5 - valid: 2.000 train: 3.000 (mean)
None
Epoch 6 - valid: 2.000 train: 3.000 (mean)
None
Epoch 7 - valid: 2.000 train: 3.000 (mean)
None
Epoch 8 - valid: 2.000 train: 3.000 (mean)
None
Epoch 9 - valid: 2.000 train: 3.000 (mean)
None
Epoch 10 - valid: 2.000 train: 3.000 (mean)
None
Epoch 11 - valid: 2.000 train: 3.000 (mean)
None
Epoch 12 - valid: 2.000 train: 3.000 (mean)
None
Epoch 13 - valid: 2.000 train: 3.000 (mean)
None
Epoch 14 - valid: 2.000 train: 3.000 (mean)
None
Epoch 15 - valid: 2.000 train: 3.000 (mean)
None
Epoch 16 - valid: 2.000 train: 3.000 (mean)
None
Epoch 17 - valid: 2.000 train: 3.000 (mean)
None
Epoch 18 - valid: 2.000 train: 3.000 (mean)
None
Epoch 19 - valid: 2.000 train: 3.000 (me

In [76]:
# Validation accuracy
valid_acc_values = []

graph=neural_networks['2-layer-32']['graph']

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())

    # Set seed
    np.random.seed(0)

    
    # Get variables
    X=graph.get_tensor_by_name(  "productX:0" )
    
    # Train several epochs
    for epoch in range(50):
        # Accuracy values (train) after each batch
        batch_acc = []

        # Get batches of data
        for X_batch, y_batch in get_batches(X_train_rescaled, y_train, 64):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.01 # Learning rate
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)

        # Evaluate validation accuracy
        valid_acc = sess.run(accuracy, feed_dict={
            X: X_valid_rescaled,
            y: y_valid
        })
        valid_acc_values.append(valid_acc)

        # Print progress
        print('Epoch {} - valid: {:.3f} train: {:.3f} (mean)'.format(
            epoch+1, valid_acc, np.mean(batch_acc)
        ))

    # Weights of the hidden and output layers
    #weights_hidden = W1.eval()
    #weights_output = W2.eval()



KeyError: "The name 'productX:0' refers to a Tensor which does not exist. The operation, 'productX', does not exist in the graph."

## Exercise - Visualize weights

> **Exercise**: Plot the weights from the first fully connected layer (the templates) with the `imshow()` function.

In [None]:
???