# Part 1: Deep Learning

In [1]:
import tensorflow as tf
from functools import partial
import time
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.metrics import accuracy_score
from DNNClassifier import DNNClassifier
from sklearn.model_selection import RandomizedSearchCV

def shuffle_split(X, y, n_batches):
    np.random.seed(seed=42)
    rnd_idx = np.random.permutation(len(X))
    for i_idx in np.array_split(rnd_idx, n_batches):
        X_batch = X[i_idx]
        y_batch = y[i_idx]
        yield X_batch, y_batch

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

### 1. Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function.

In [2]:
print('Task 1 start')
time.sleep(1)

n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_outputs = 5

he_init = tf.contrib.layers.variance_scaling_initializer()
dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init)
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = dense_layer(X, n_hidden1, name='hidden1')
    hidden2 = dense_layer(hidden1, n_hidden2, name='hidden2')
    hidden3 = dense_layer(hidden2, n_hidden3, name='hidden3')
    hidden4 = dense_layer(hidden3, n_hidden4, name='hidden4')
    hidden5 = dense_layer(hidden4, n_hidden5, name='hidden5')
    logits = dense_layer(hidden5, n_outputs, activation=None, name='outputs')
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

Task 1 start


### 2. Using Adam optimization and early stopping, try training it on MNIST but only on digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You will need a softmax output layer with five neurons.

In [3]:
print('Task 2 start')
time.sleep(1)

learning_rate = 0.001

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
mnist = input_data.read_data_sets('/tmp/data/')
X_train = mnist.train.images[mnist.train.labels < 5]
y_train = mnist.train.labels[mnist.train.labels < 5]
X_test = mnist.test.images[mnist.test.labels < 5]
y_test = mnist.test.labels[mnist.test.labels < 5]
X_valid = mnist.validation.images[mnist.validation.labels < 5]
y_valid = mnist.validation.labels[mnist.validation.labels < 5]


n_epochs = 50
batch_size = 50
n_batches = len(X_train) // batch_size
best_loss = float('inf')
patience = 2
cnt_patience = 0
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_split(X_train, y_train, n_batches):
            sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})
        accuracy_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        accuracy_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        loss_test = loss.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, 'train_acc:', accuracy_train, 'test_acc:', accuracy_test, 'loss', loss_test,)
        if loss_test < best_loss:
            best_loss = loss_test
        else:
            cnt_patience += 1
            if cnt_patience > patience:
                'Early stopping!'
                break

Task 2 start
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
0 train_acc: 1.0 test_acc: 0.9820977 loss 0.04894622
1 train_acc: 0.98 test_acc: 0.98715705 loss 0.04120996
2 train_acc: 1.0 test_acc: 0.98813 loss 0.03728239
3 train_acc: 1.0 test_acc: 0.9910488 loss 0.030111544
4 train_acc: 1.0 test_acc: 0.9918272 loss 0.026404124
5 train_acc: 1.0 test_acc: 0.99065965 loss 0.0332601
6 train_acc: 1.0 test_acc: 0.9918

### 3. Tune the hyperparameters using cross-validation and see what precision you can achieve.

In [4]:
print('Task 3 start')
time.sleep(1)

param_distribs = {
    "n_neurons": [10],
    "batch_size": [10],
    "learning_rate": [0.01],
    "activation": [tf.nn.relu, tf.nn.elu],
}

random_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,fit_params={"X_valid": X_valid, "y_valid": y_valid, "n_epochs": 10},
                                random_state=42, verbose=2)

random_search.fit(X_train, y_train)
y_pred = random_search.predict(X_test)
accuracy_score(y_test, y_pred)

Task 3 start


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 2 candidates, totalling 6 fits
[CV] n_neurons=10, learning_rate=0.01, batch_size=10, activation=<function relu at 0x114093d08> 
0	Validation loss: 0.163162	Best loss: 0.163162	Accuracy: 96.05%
1	Validation loss: 0.142110	Best loss: 0.142110	Accuracy: 95.86%
2	Validation loss: 0.151269	Best loss: 0.142110	Accuracy: 96.09%
3	Validation loss: 0.142484	Best loss: 0.142110	Accuracy: 96.21%
4	Validation loss: 0.128904	Best loss: 0.128904	Accuracy: 96.44%
5	Validation loss: 0.138315	Best loss: 0.128904	Accuracy: 95.97%
6	Validation loss: 0.140073	Best loss: 0.128904	Accuracy: 96.56%
7	Validation loss: 0.126383	Best loss: 0.126383	Accuracy: 96.79%
8	Validation loss: 0.145166	Best loss: 0.126383	Accuracy: 95.90%
9	Validation loss: 0.138134	Best loss: 0.126383	Accuracy: 96.56%
[CV]  n_neurons=10, learning_rate=0.01, batch_size=10, activation=<function relu at 0x114093d08>, total=  22.7s
[CV] n_neurons=10, learning_rate=0.01, batch_size=10, activation=<function relu at

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   22.9s remaining:    0.0s


0	Validation loss: 0.195183	Best loss: 0.195183	Accuracy: 94.57%
1	Validation loss: 0.133971	Best loss: 0.133971	Accuracy: 95.97%
2	Validation loss: 0.139042	Best loss: 0.133971	Accuracy: 96.09%
3	Validation loss: 0.099624	Best loss: 0.099624	Accuracy: 97.22%
4	Validation loss: 0.122260	Best loss: 0.099624	Accuracy: 96.72%
5	Validation loss: 0.119203	Best loss: 0.099624	Accuracy: 96.95%
6	Validation loss: 0.122047	Best loss: 0.099624	Accuracy: 96.72%
7	Validation loss: 0.115580	Best loss: 0.099624	Accuracy: 97.07%
8	Validation loss: 0.105392	Best loss: 0.099624	Accuracy: 97.19%
9	Validation loss: 0.112131	Best loss: 0.099624	Accuracy: 96.68%
[CV]  n_neurons=10, learning_rate=0.01, batch_size=10, activation=<function relu at 0x114093d08>, total=  24.5s
[CV] n_neurons=10, learning_rate=0.01, batch_size=10, activation=<function relu at 0x114093d08> 
0	Validation loss: 0.186138	Best loss: 0.186138	Accuracy: 94.64%
1	Validation loss: 0.126624	Best loss: 0.126624	Accuracy: 96.76%
2	Validatio

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  2.0min finished


0	Validation loss: 0.103769	Best loss: 0.103769	Accuracy: 96.95%
1	Validation loss: 0.097480	Best loss: 0.097480	Accuracy: 97.54%
2	Validation loss: 0.088346	Best loss: 0.088346	Accuracy: 97.73%
3	Validation loss: 0.085567	Best loss: 0.085567	Accuracy: 97.81%
4	Validation loss: 0.097083	Best loss: 0.085567	Accuracy: 97.38%
5	Validation loss: 0.095767	Best loss: 0.085567	Accuracy: 97.34%
6	Validation loss: 0.115644	Best loss: 0.085567	Accuracy: 97.38%
7	Validation loss: 0.104943	Best loss: 0.085567	Accuracy: 97.03%
8	Validation loss: 0.089925	Best loss: 0.085567	Accuracy: 97.89%
9	Validation loss: 0.085950	Best loss: 0.085567	Accuracy: 97.93%


0.9824868651488616

### 4. Now try adding Batch Normalization and compare the learning curves: is it converging faster than before? Does it produce a better model?

In [5]:
print('Task 4 start')
time.sleep(1)

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

dnn_clf_bn = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,n_neurons=90, random_state=42,
                           batch_norm_momentum=0.95)
dnn_clf_bn.fit(X_train, y_train, n_epochs=10, X_valid=X_valid, y_valid=y_valid)

Task 4 start
0	Validation loss: 0.038478	Best loss: 0.038478	Accuracy: 98.87%
1	Validation loss: 0.036767	Best loss: 0.036767	Accuracy: 98.71%
2	Validation loss: 0.039822	Best loss: 0.036767	Accuracy: 98.83%
3	Validation loss: 0.036627	Best loss: 0.036627	Accuracy: 98.79%
4	Validation loss: 0.047019	Best loss: 0.036627	Accuracy: 98.63%
5	Validation loss: 0.031215	Best loss: 0.031215	Accuracy: 99.10%
6	Validation loss: 0.044797	Best loss: 0.031215	Accuracy: 98.55%
7	Validation loss: 0.028140	Best loss: 0.028140	Accuracy: 99.10%
8	Validation loss: 0.033399	Best loss: 0.028140	Accuracy: 98.98%
9	Validation loss: 0.033288	Best loss: 0.028140	Accuracy: 99.18%


DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x1c28931ae8>,
       batch_norm_momentum=0.95, batch_size=500, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0xb24b059d8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=90,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

### It is converging faster, but overall accuracy is a bit worse.

### 5. Is the model overfitting the training set? Try adding dropout to every layer and try again. Does it help?

In [6]:
param_distribs = {
    "n_neurons": [10],
    "batch_size": [10],
    "learning_rate": [0.01],
    "activation": [tf.nn.relu, tf.nn.elu],
    "dropout_rate": [0.1],
    "batch_norm_momentum": [0.9]
}


random_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50, fit_params={"X_valid": X_valid, "y_valid": y_valid, "n_epochs": 10},
                                random_state=42, verbose=2)
random_search.fit(X_train, y_train)
y_pred = random_search.predict(X_test)
accuracy_score(y_test, y_pred)
random_search.best_estimator_.save("./model")

Fitting 3 folds for each of 2 candidates, totalling 6 fits
[CV] n_neurons=10, learning_rate=0.01, dropout_rate=0.1, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x114093d08> 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0	Validation loss: 0.204085	Best loss: 0.204085	Accuracy: 94.49%
1	Validation loss: 0.169103	Best loss: 0.169103	Accuracy: 94.80%
2	Validation loss: 0.175184	Best loss: 0.169103	Accuracy: 94.84%
3	Validation loss: 0.163380	Best loss: 0.163380	Accuracy: 95.27%
4	Validation loss: 0.164530	Best loss: 0.163380	Accuracy: 95.58%
5	Validation loss: 0.137814	Best loss: 0.137814	Accuracy: 96.29%
6	Validation loss: 0.173640	Best loss: 0.137814	Accuracy: 95.50%
7	Validation loss: 0.147342	Best loss: 0.137814	Accuracy: 95.93%
8	Validation loss: 0.140294	Best loss: 0.137814	Accuracy: 96.44%
9	Validation loss: 0.131124	Best loss: 0.131124	Accuracy: 96.76%
[CV]  n_neurons=10, learning_rate=0.01, dropout_rate=0.1, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x114093d08>, total= 1.5min
[CV] n_neurons=10, learning_rate=0.01, dropout_rate=0.1, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x114093d08> 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.5min remaining:    0.0s


0	Validation loss: 0.187497	Best loss: 0.187497	Accuracy: 94.64%
1	Validation loss: 0.175127	Best loss: 0.175127	Accuracy: 95.62%
2	Validation loss: 0.155351	Best loss: 0.155351	Accuracy: 95.90%
3	Validation loss: 0.148643	Best loss: 0.148643	Accuracy: 95.82%
4	Validation loss: 0.139371	Best loss: 0.139371	Accuracy: 96.29%
5	Validation loss: 0.156797	Best loss: 0.139371	Accuracy: 95.47%
6	Validation loss: 0.150020	Best loss: 0.139371	Accuracy: 96.64%
7	Validation loss: 0.147593	Best loss: 0.139371	Accuracy: 96.01%
8	Validation loss: 0.125479	Best loss: 0.125479	Accuracy: 96.36%
9	Validation loss: 0.127403	Best loss: 0.125479	Accuracy: 96.36%
[CV]  n_neurons=10, learning_rate=0.01, dropout_rate=0.1, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x114093d08>, total= 1.6min
[CV] n_neurons=10, learning_rate=0.01, dropout_rate=0.1, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x114093d08> 
0	Validation loss: 0.207432	Best loss: 0.207432	Accurac

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  9.6min finished


0	Validation loss: 0.145685	Best loss: 0.145685	Accuracy: 95.35%
1	Validation loss: 0.126576	Best loss: 0.126576	Accuracy: 96.01%
2	Validation loss: 0.113865	Best loss: 0.113865	Accuracy: 96.21%
3	Validation loss: 0.119362	Best loss: 0.113865	Accuracy: 96.40%
4	Validation loss: 0.103655	Best loss: 0.103655	Accuracy: 96.79%
5	Validation loss: 0.097415	Best loss: 0.097415	Accuracy: 97.07%
6	Validation loss: 0.095078	Best loss: 0.095078	Accuracy: 97.07%
7	Validation loss: 0.103109	Best loss: 0.095078	Accuracy: 96.79%
8	Validation loss: 0.102628	Best loss: 0.095078	Accuracy: 96.76%
9	Validation loss: 0.104654	Best loss: 0.095078	Accuracy: 96.79%


### The model doesn't overfit. So dropout of every layer doesn't really help in accuracy improvement. 

# Part 2: Transfer learning

In [7]:
import tensorflow as tf
import numpy as np
import os, sys, time
sys.path.append(os.getcwd())
from DNNClassifier import DNNClassifier
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.metrics import accuracy_score

he_init = tf.contrib.layers.variance_scaling_initializer()
mnist = input_data.read_data_sets("/tmp/data/")

X_train_full = mnist.train.images[mnist.train.labels >= 5]
y_train_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test = mnist.test.images[mnist.test.labels >= 5]
y_test = mnist.test.labels[mnist.test.labels >= 5] - 5

def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train, y_train = sample_n_instances_per_class(X_train_full, y_train_full, n=100)
X_valid, y_valid = sample_n_instances_per_class(X_valid_full, y_valid_full, n=30)

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

n_epochs = 1000
batch_size = 20

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


### 1. Create a new DNN that reuses all the pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one.

In [8]:
print('Task 1 start')
time.sleep(1)

reset_graph()

restore_saver = tf.train.import_meta_graph("./model.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")
learning_rate = 0.01


output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

Task 1 start


### 2. Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how long it takes. Despite this small number of examples, can you achieve high precision?

In [9]:
print('Task 2 start')
time.sleep(1)

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for var in output_layer_vars:
        var.initializer.run()
    t0 = time.time()
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Task 2 start
INFO:tensorflow:Restoring parameters from ./model
0	Validation loss: 1.613087	Best loss: 1.613087	Accuracy: 29.33%
1	Validation loss: 1.381432	Best loss: 1.381432	Accuracy: 46.67%
2	Validation loss: 1.263373	Best loss: 1.263373	Accuracy: 49.33%
3	Validation loss: 1.193123	Best loss: 1.193123	Accuracy: 55.33%
4	Validation loss: 1.147694	Best loss: 1.147694	Accuracy: 59.33%
5	Validation loss: 1.115880	Best loss: 1.115880	Accuracy: 59.33%
6	Validation loss: 1.102723	Best loss: 1.102723	Accuracy: 61.33%
7	Validation loss: 1.088225	Best loss: 1.088225	Accuracy: 61.33%
8	Validation loss: 1.087017	Best loss: 1.087017	Accuracy: 61.33%
9	Validation loss: 1.079110	Best loss: 1.079110	Accuracy: 62.67%
10	Validation loss: 1.066826	Best loss: 1.066826	Accuracy: 62.00%
11	Validation loss: 1.067650	Best loss: 1.066826	Accuracy: 60.00%
12	Validation loss: 1.077122	Best loss: 1.066826	Accuracy: 61.33%
13	Validation loss: 1.064024	Best loss: 1.064024	Accuracy: 61.33%
14	Validation loss: 1.0

125	Validation loss: 0.968067	Best loss: 0.964836	Accuracy: 65.33%
126	Validation loss: 0.966442	Best loss: 0.964836	Accuracy: 65.33%
127	Validation loss: 0.974345	Best loss: 0.964836	Accuracy: 64.67%
128	Validation loss: 0.968021	Best loss: 0.964836	Accuracy: 65.33%
129	Validation loss: 0.970689	Best loss: 0.964836	Accuracy: 65.33%
130	Validation loss: 0.968800	Best loss: 0.964836	Accuracy: 65.33%
131	Validation loss: 0.970321	Best loss: 0.964836	Accuracy: 65.33%
132	Validation loss: 0.963780	Best loss: 0.963780	Accuracy: 65.33%
133	Validation loss: 0.969368	Best loss: 0.963780	Accuracy: 65.33%
134	Validation loss: 0.962952	Best loss: 0.962952	Accuracy: 65.33%
135	Validation loss: 0.971286	Best loss: 0.962952	Accuracy: 65.33%
136	Validation loss: 0.973394	Best loss: 0.962952	Accuracy: 65.33%
137	Validation loss: 0.975058	Best loss: 0.962952	Accuracy: 64.67%
138	Validation loss: 0.964954	Best loss: 0.962952	Accuracy: 66.00%
139	Validation loss: 0.966920	Best loss: 0.962952	Accuracy: 65

### Despite this small number of examples, it is difficult to get high precision.

### 3. Try caching the frozen layers, and train the model again: how much faster is it now?

In [10]:
print('Task 3 start')
time.sleep(1)

hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for var in output_layer_vars:
        var.initializer.run()
    t0 = time.time()
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train, y: y_train})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid, y: y_valid})
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Task 3 start
INFO:tensorflow:Restoring parameters from ./model
0	Validation loss: 1.605368	Best loss: 1.605368	Accuracy: 30.00%
1	Validation loss: 1.385463	Best loss: 1.385463	Accuracy: 46.67%
2	Validation loss: 1.256262	Best loss: 1.256262	Accuracy: 50.00%
3	Validation loss: 1.188582	Best loss: 1.188582	Accuracy: 56.00%
4	Validation loss: 1.150881	Best loss: 1.150881	Accuracy: 58.00%
5	Validation loss: 1.116827	Best loss: 1.116827	Accuracy: 60.00%
6	Validation loss: 1.104238	Best loss: 1.104238	Accuracy: 63.33%
7	Validation loss: 1.092081	Best loss: 1.092081	Accuracy: 61.33%
8	Validation loss: 1.081479	Best loss: 1.081479	Accuracy: 62.67%
9	Validation loss: 1.074789	Best loss: 1.074789	Accuracy: 60.67%
10	Validation loss: 1.069879	Best loss: 1.069879	Accuracy: 64.00%
11	Validation loss: 1.073716	Best loss: 1.069879	Accuracy: 60.67%
12	Validation loss: 1.062565	Best loss: 1.062565	Accuracy: 62.00%
13	Validation loss: 1.066350	Best loss: 1.062565	Accuracy: 60.00%
14	Validation loss: 1.0

128	Validation loss: 0.962128	Best loss: 0.962128	Accuracy: 65.33%
129	Validation loss: 0.966995	Best loss: 0.962128	Accuracy: 65.33%
130	Validation loss: 0.972174	Best loss: 0.962128	Accuracy: 64.67%
131	Validation loss: 0.973569	Best loss: 0.962128	Accuracy: 65.33%
132	Validation loss: 0.968293	Best loss: 0.962128	Accuracy: 65.33%
133	Validation loss: 0.972234	Best loss: 0.962128	Accuracy: 64.67%
134	Validation loss: 0.970420	Best loss: 0.962128	Accuracy: 65.33%
135	Validation loss: 0.974617	Best loss: 0.962128	Accuracy: 64.67%
136	Validation loss: 0.967816	Best loss: 0.962128	Accuracy: 65.33%
137	Validation loss: 0.963773	Best loss: 0.962128	Accuracy: 65.33%
138	Validation loss: 0.968723	Best loss: 0.962128	Accuracy: 65.33%
139	Validation loss: 0.974694	Best loss: 0.962128	Accuracy: 64.67%
140	Validation loss: 0.964783	Best loss: 0.962128	Accuracy: 66.00%
141	Validation loss: 0.967297	Best loss: 0.962128	Accuracy: 65.33%
142	Validation loss: 0.971275	Best loss: 0.962128	Accuracy: 64

It is very fast. It took about 10 seconds to finish the job.

### 4. Try again reusing just four hidden layers instead of five. Can you achieve a higher precision?

In [12]:
print('Task 4 start')
time.sleep(1)

reset_graph()

n_outputs = 5
learning_rate = 0.01

restore_saver = tf.train.import_meta_graph("./model.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./mnistModel_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./mnistModel_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Task 4 start
INFO:tensorflow:Restoring parameters from ./model
0	Validation loss: 1.367993	Best loss: 1.367993	Accuracy: 50.67%
1	Validation loss: 1.242862	Best loss: 1.242862	Accuracy: 57.33%
2	Validation loss: 1.187196	Best loss: 1.187196	Accuracy: 58.00%
3	Validation loss: 1.154578	Best loss: 1.154578	Accuracy: 56.67%
4	Validation loss: 1.127822	Best loss: 1.127822	Accuracy: 57.33%
5	Validation loss: 1.110844	Best loss: 1.110844	Accuracy: 56.67%
6	Validation loss: 1.100803	Best loss: 1.100803	Accuracy: 60.00%
7	Validation loss: 1.089174	Best loss: 1.089174	Accuracy: 61.33%
8	Validation loss: 1.088616	Best loss: 1.088616	Accuracy: 61.33%
9	Validation loss: 1.081465	Best loss: 1.081465	Accuracy: 60.00%
10	Validation loss: 1.069525	Best loss: 1.069525	Accuracy: 60.00%
11	Validation loss: 1.068703	Best loss: 1.068703	Accuracy: 61.33%
12	Validation loss: 1.074007	Best loss: 1.068703	Accuracy: 61.33%
13	Validation loss: 1.063972	Best loss: 1.063972	Accuracy: 60.67%
14	Validation loss: 1.0

124	Validation loss: 0.993649	Best loss: 0.982751	Accuracy: 63.33%
125	Validation loss: 0.985647	Best loss: 0.982751	Accuracy: 63.33%
126	Validation loss: 0.983785	Best loss: 0.982751	Accuracy: 63.33%
127	Validation loss: 0.988552	Best loss: 0.982751	Accuracy: 63.33%
128	Validation loss: 0.984974	Best loss: 0.982751	Accuracy: 63.33%
129	Validation loss: 0.988550	Best loss: 0.982751	Accuracy: 63.33%
130	Validation loss: 0.987274	Best loss: 0.982751	Accuracy: 64.00%
131	Validation loss: 0.988010	Best loss: 0.982751	Accuracy: 62.00%
132	Validation loss: 0.983320	Best loss: 0.982751	Accuracy: 63.33%
133	Validation loss: 0.985608	Best loss: 0.982751	Accuracy: 63.33%
134	Validation loss: 0.980552	Best loss: 0.980552	Accuracy: 64.00%
135	Validation loss: 0.986996	Best loss: 0.980552	Accuracy: 63.33%
136	Validation loss: 0.991633	Best loss: 0.980552	Accuracy: 62.00%
137	Validation loss: 0.991004	Best loss: 0.980552	Accuracy: 64.00%
138	Validation loss: 0.984321	Best loss: 0.980552	Accuracy: 62

You can't achieve higher precision.

### 5. Now unfreeze the top two hidden layers and continue training: can you get the model to perform even better?

In [13]:
print('Task 5 start')
time.sleep(1)

learning_rate = 0.01
unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam23")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./mnistModel_5_to_9_four_frozen")

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./mnistModel_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./mnistModel_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam24")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./mnistModel_5_to_9_two_frozen")
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./mnistModel_5_to_9_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./mnistModel_5_to_9_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

dnn_clf_5_to_9 = DNNClassifier(n_hidden_layers=4, random_state=42)
dnn_clf_5_to_9.fit(X_train, y_train, n_epochs=1000, X_valid=X_valid, y_valid=y_valid)
y_pred = dnn_clf_5_to_9.predict(X_test)
accuracy_score(y_test, y_pred)

Task 5 start
INFO:tensorflow:Restoring parameters from ./mnistModel_5_to_9_four_frozen
0	Validation loss: 0.984967	Best loss: 0.984967	Accuracy: 62.67%
1	Validation loss: 0.984831	Best loss: 0.984831	Accuracy: 64.00%
2	Validation loss: 0.972830	Best loss: 0.972830	Accuracy: 64.67%
3	Validation loss: 0.971631	Best loss: 0.971631	Accuracy: 64.67%
4	Validation loss: 0.973418	Best loss: 0.971631	Accuracy: 64.67%
5	Validation loss: 0.979836	Best loss: 0.971631	Accuracy: 63.33%
6	Validation loss: 0.967368	Best loss: 0.967368	Accuracy: 66.00%
7	Validation loss: 0.963179	Best loss: 0.963179	Accuracy: 66.67%
8	Validation loss: 0.966520	Best loss: 0.963179	Accuracy: 66.67%
9	Validation loss: 0.962242	Best loss: 0.962242	Accuracy: 66.00%
10	Validation loss: 0.959144	Best loss: 0.959144	Accuracy: 66.00%
11	Validation loss: 0.961056	Best loss: 0.959144	Accuracy: 66.67%
12	Validation loss: 0.957902	Best loss: 0.957902	Accuracy: 67.33%
13	Validation loss: 0.961002	Best loss: 0.957902	Accuracy: 65.33%

0.8996091339230611

So unfreezing the top two hidden layers dramastically improves the model.