# Part 1: Deep Learning

In [1]:
import tensorflow as tf
from functools import partial
import time
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.metrics import accuracy_score
from DNNClassifier import DNNClassifier
from sklearn.model_selection import RandomizedSearchCV

def shuffle_split(X, y, n_batches):
    np.random.seed(seed=42)
    rnd_idx = np.random.permutation(len(X))
    for i_idx in np.array_split(rnd_idx, n_batches):
        X_batch = X[i_idx]
        y_batch = y[i_idx]
        yield X_batch, y_batch

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

### 1. Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function.

In [2]:
print('Task 1 start')
time.sleep(1)

n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_outputs = 5

he_init = tf.contrib.layers.variance_scaling_initializer()
dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init)
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = dense_layer(X, n_hidden1, name='hidden1')
    hidden2 = dense_layer(hidden1, n_hidden2, name='hidden2')
    hidden3 = dense_layer(hidden2, n_hidden3, name='hidden3')
    hidden4 = dense_layer(hidden3, n_hidden4, name='hidden4')
    hidden5 = dense_layer(hidden4, n_hidden5, name='hidden5')
    logits = dense_layer(hidden5, n_outputs, activation=None, name='outputs')
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

Task 1 start


### 2. Using Adam optimization and early stopping, try training it on MNIST but only on digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You will need a softmax output layer with five neurons.

In [3]:
print('Task 2 start')
time.sleep(1)

learning_rate = 0.001

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
mnist = input_data.read_data_sets('/tmp/data/')
X_train = mnist.train.images[mnist.train.labels < 5]
y_train = mnist.train.labels[mnist.train.labels < 5]
X_test = mnist.test.images[mnist.test.labels < 5]
y_test = mnist.test.labels[mnist.test.labels < 5]
X_valid = mnist.validation.images[mnist.validation.labels < 5]
y_valid = mnist.validation.labels[mnist.validation.labels < 5]


n_epochs = 50
batch_size = 50
n_batches = len(X_train) // batch_size
best_loss = float('inf')
patience = 2
cnt_patience = 0
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_split(X_train, y_train, n_batches):
            sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})
        accuracy_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        accuracy_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        loss_test = loss.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, 'train_acc:', accuracy_train, 'test_acc:', accuracy_test, 'loss', loss_test,)
        if loss_test < best_loss:
            best_loss = loss_test
        else:
            cnt_patience += 1
            if cnt_patience > patience:
                'Early stopping!'
                break

Task 2 start
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
0 train_acc: 1.0 test_acc: 0.9

### 3. Tune the hyperparameters using cross-validation and see what precision you can achieve.

In [5]:
print('Task 3 start')
time.sleep(1)

param_distribs = {
    "n_neurons": [10, 100, 150],
    "batch_size": [10, 50],
    "learning_rate": [0.01, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu],
    "n_hidden_layers": [0, 1, 3],
    "optimizer_class": [tf.train.AdamOptimizer, tf.train.AdagradOptimizer]
}

random_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,fit_params={"X_valid": X_valid, "y_valid": y_valid, "n_epochs": 10},
                                random_state=42, verbose=2)

random_search.fit(X_train, y_train)
y_pred = random_search.predict(X_test)
accuracy_score(y_test, y_pred)

Task 3 start


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, batch_size=50, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.152497	Best loss: 0.152497	Accuracy: 95.58%
1	Validation loss: 0.127988	Best loss: 0.127988	Accuracy: 96.25%
2	Validation loss: 0.118213	Best loss: 0.118213	Accuracy: 96.44%
3	Validation loss: 0.113110	Best loss: 0.113110	Accuracy: 96.56%
4	Validation loss: 0.107176	Best loss: 0.107176	Accuracy: 96.72%
5	Validation loss: 0.105178	Best loss: 0.105178	Accuracy: 96.79%
6	Validation loss: 0.101452	Best loss: 0.101452	Accuracy: 96.91%
7	Validation loss: 0.098651	Best loss: 0.098651	Accuracy: 97.07%
8	Validation loss: 0.098709	Best loss: 0.098651	Accuracy: 96.99%
9	Validation loss: 0.094841	Best loss: 0.094841	Accuracy: 97.15%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neuron

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.6s remaining:    0.0s


0	Validation loss: 0.151717	Best loss: 0.151717	Accuracy: 95.78%
1	Validation loss: 0.126819	Best loss: 0.126819	Accuracy: 96.33%
2	Validation loss: 0.117319	Best loss: 0.117319	Accuracy: 96.44%
3	Validation loss: 0.112161	Best loss: 0.112161	Accuracy: 96.68%
4	Validation loss: 0.107701	Best loss: 0.107701	Accuracy: 96.87%
5	Validation loss: 0.106203	Best loss: 0.106203	Accuracy: 96.83%
6	Validation loss: 0.103154	Best loss: 0.103154	Accuracy: 97.11%
7	Validation loss: 0.101419	Best loss: 0.101419	Accuracy: 96.99%
8	Validation loss: 0.099047	Best loss: 0.099047	Accuracy: 97.19%
9	Validation loss: 0.095380	Best loss: 0.095380	Accuracy: 97.19%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, batch_size=50, activation=<function elu at 0x1228d39d8>, total=   8.0s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, ba

[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, batch_size=10, activation=<function elu at 0x1228d39d8>, total=  26.4s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=10, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.094712	Best loss: 0.094712	Accuracy: 96.99%
1	Validation loss: 0.081546	Best loss: 0.081546	Accuracy: 97.62%
2	Validation loss: 0.076062	Best loss: 0.076062	Accuracy: 97.50%
3	Validation loss: 0.081023	Best loss: 0.076062	Accuracy: 97.50%
4	Validation loss: 0.070513	Best loss: 0.070513	Accuracy: 97.77%
5	Validation loss: 0.070253	Best loss: 0.070253	Accuracy: 97.85%
6	Validation loss: 0.070301	Best loss: 0.070253	Accuracy: 97.85%
7	Validation loss: 0.071902	Best loss: 0.070253	Accuracy: 97.81%
8	Validation loss: 0.071157	Best loss: 0.070253	Accuracy: 97.85%
9	Validation loss:

8	Validation loss: 0.103539	Best loss: 0.096761	Accuracy: 97.19%
9	Validation loss: 0.110813	Best loss: 0.096761	Accuracy: 96.76%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=3, learning_rate=0.01, batch_size=10, activation=<function relu at 0x1228d9d08>, total=  16.3s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=3, learning_rate=0.01, batch_size=10, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.137910	Best loss: 0.137910	Accuracy: 96.48%
1	Validation loss: 0.138925	Best loss: 0.137910	Accuracy: 96.52%
2	Validation loss: 0.150167	Best loss: 0.137910	Accuracy: 95.50%
3	Validation loss: 0.158665	Best loss: 0.137910	Accuracy: 96.01%
4	Validation loss: 0.125279	Best loss: 0.125279	Accuracy: 96.64%
5	Validation loss: 0.146355	Best loss: 0.125279	Accuracy: 96.33%
6	Validation loss: 0.134619	Best loss: 0.125279	Accuracy: 96.29%
7	Validation loss: 0.1

6	Validation loss: 1.189452	Best loss: 0.585426	Accuracy: 39.87%
7	Validation loss: 1.160004	Best loss: 0.585426	Accuracy: 40.03%
8	Validation loss: 1.166771	Best loss: 0.585426	Accuracy: 40.42%
9	Validation loss: 1.158666	Best loss: 0.585426	Accuracy: 40.62%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=3, learning_rate=0.1, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   5.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=3, learning_rate=0.1, batch_size=50, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.309828	Best loss: 0.309828	Accuracy: 92.96%
1	Validation loss: 1.373788	Best loss: 0.309828	Accuracy: 33.03%
2	Validation loss: 1.608681	Best loss: 0.309828	Accuracy: 22.01%
3	Validation loss: 1.620388	Best loss: 0.309828	Accuracy: 18.73%
4	Validation loss: 1.611311	Best loss: 0.309828	Accuracy: 22.01%
5	Validation loss: 1.612

4	Validation loss: 0.058066	Best loss: 0.058066	Accuracy: 98.08%
5	Validation loss: 0.057442	Best loss: 0.057442	Accuracy: 98.20%
6	Validation loss: 0.057903	Best loss: 0.057442	Accuracy: 98.20%
7	Validation loss: 0.049626	Best loss: 0.049626	Accuracy: 98.40%
8	Validation loss: 0.050754	Best loss: 0.049626	Accuracy: 98.48%
9	Validation loss: 0.046137	Best loss: 0.046137	Accuracy: 98.51%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.01, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   8.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.01, batch_size=10, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.096830	Best loss: 0.096830	Accuracy: 96.60%
1	Validation loss: 0.077338	Best loss: 0.077338	Accuracy: 97.54%
2	Validation loss: 0.076574	Best loss: 0.076574	Accuracy: 97.85%
3	Validation los

2	Validation loss: 0.090670	Best loss: 0.090670	Accuracy: 97.38%
3	Validation loss: 0.082176	Best loss: 0.082176	Accuracy: 97.73%
4	Validation loss: 0.078183	Best loss: 0.078183	Accuracy: 97.85%
5	Validation loss: 0.076956	Best loss: 0.076956	Accuracy: 97.77%
6	Validation loss: 0.077938	Best loss: 0.076956	Accuracy: 97.58%
7	Validation loss: 0.074401	Best loss: 0.074401	Accuracy: 97.65%
8	Validation loss: 0.073934	Best loss: 0.073934	Accuracy: 97.81%
9	Validation loss: 0.072668	Best loss: 0.072668	Accuracy: 97.85%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=50, activation=<function elu at 0x1228d39d8>, total=   3.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=50, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.111301	Best loss: 0.111301	Accuracy: 96.72%
1	Validation l

0	Validation loss: 0.053715	Best loss: 0.053715	Accuracy: 98.12%
1	Validation loss: 0.046101	Best loss: 0.046101	Accuracy: 98.20%
2	Validation loss: 0.038654	Best loss: 0.038654	Accuracy: 98.75%
3	Validation loss: 0.037729	Best loss: 0.037729	Accuracy: 98.59%
4	Validation loss: 0.031675	Best loss: 0.031675	Accuracy: 98.87%
5	Validation loss: 0.031553	Best loss: 0.031553	Accuracy: 98.83%
6	Validation loss: 0.028024	Best loss: 0.028024	Accuracy: 99.02%
7	Validation loss: 0.031185	Best loss: 0.028024	Accuracy: 98.83%
8	Validation loss: 0.029295	Best loss: 0.028024	Accuracy: 99.02%
9	Validation loss: 0.029807	Best loss: 0.028024	Accuracy: 99.14%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.1, batch_size=10, activation=<function relu at 0x1228d9d08>, total= 9.5min
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.1, bat

0	Validation loss: 0.108705	Best loss: 0.108705	Accuracy: 96.72%
1	Validation loss: 0.092199	Best loss: 0.092199	Accuracy: 97.38%
2	Validation loss: 0.085479	Best loss: 0.085479	Accuracy: 97.46%
3	Validation loss: 0.077294	Best loss: 0.077294	Accuracy: 97.54%
4	Validation loss: 0.068569	Best loss: 0.068569	Accuracy: 98.12%
5	Validation loss: 0.065981	Best loss: 0.065981	Accuracy: 97.89%
6	Validation loss: 0.069251	Best loss: 0.065981	Accuracy: 97.85%
7	Validation loss: 0.057132	Best loss: 0.057132	Accuracy: 98.32%
8	Validation loss: 0.055720	Best loss: 0.055720	Accuracy: 98.28%
9	Validation loss: 0.053864	Best loss: 0.053864	Accuracy: 98.01%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=3, learning_rate=0.01, batch_size=50, activation=<function elu at 0x1228d39d8>, total=   9.8s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, bat

0	Validation loss: 0.147570	Best loss: 0.147570	Accuracy: 95.39%
1	Validation loss: 0.123874	Best loss: 0.123874	Accuracy: 96.25%
2	Validation loss: 0.115476	Best loss: 0.115476	Accuracy: 96.36%
3	Validation loss: 0.110199	Best loss: 0.110199	Accuracy: 96.83%
4	Validation loss: 0.106357	Best loss: 0.106357	Accuracy: 96.91%
5	Validation loss: 0.105481	Best loss: 0.105481	Accuracy: 96.91%
6	Validation loss: 0.101520	Best loss: 0.101520	Accuracy: 97.03%
7	Validation loss: 0.100816	Best loss: 0.100816	Accuracy: 96.95%
8	Validation loss: 0.098615	Best loss: 0.098615	Accuracy: 97.26%
9	Validation loss: 0.095016	Best loss: 0.095016	Accuracy: 97.15%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, batch_size=50, activation=<function elu at 0x1228d39d8>, total=   8.9s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, ba

0	Validation loss: 0.124046	Best loss: 0.124046	Accuracy: 96.01%
1	Validation loss: 0.112526	Best loss: 0.112526	Accuracy: 96.64%
2	Validation loss: 0.107567	Best loss: 0.107567	Accuracy: 96.87%
3	Validation loss: 0.108835	Best loss: 0.107567	Accuracy: 96.52%
4	Validation loss: 0.102895	Best loss: 0.102895	Accuracy: 96.95%
5	Validation loss: 0.103922	Best loss: 0.102895	Accuracy: 96.91%
6	Validation loss: 0.102176	Best loss: 0.102176	Accuracy: 96.76%
7	Validation loss: 0.100972	Best loss: 0.100972	Accuracy: 96.91%
8	Validation loss: 0.102577	Best loss: 0.100972	Accuracy: 96.99%
9	Validation loss: 0.100378	Best loss: 0.100378	Accuracy: 97.15%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   3.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, batch

[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, batch_size=10, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.102291	Best loss: 0.102291	Accuracy: 96.99%
1	Validation loss: 0.086031	Best loss: 0.086031	Accuracy: 97.50%
2	Validation loss: 0.075631	Best loss: 0.075631	Accuracy: 97.73%
3	Validation loss: 0.068474	Best loss: 0.068474	Accuracy: 98.12%
4	Validation loss: 0.061888	Best loss: 0.061888	Accuracy: 98.28%
5	Validation loss: 0.059189	Best loss: 0.059189	Accuracy: 98.36%
6	Validation loss: 0.057141	Best loss: 0.057141	Accuracy: 98.48%
7	Validation loss: 0.052584	Best loss: 0.052584	Accuracy: 98.36%
8	Validation loss: 0.051086	Best loss: 0.051086	Accuracy: 98.63%
9	Validation loss: 0.048340	Best loss: 0.048340	Accuracy: 98.59%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, batch_size=10, 

0	Validation loss: 0.109486	Best loss: 0.109486	Accuracy: 96.52%
1	Validation loss: 0.080025	Best loss: 0.080025	Accuracy: 98.20%
2	Validation loss: 0.072296	Best loss: 0.072296	Accuracy: 98.16%
3	Validation loss: 0.155453	Best loss: 0.072296	Accuracy: 97.54%
4	Validation loss: 0.120988	Best loss: 0.072296	Accuracy: 97.54%
5	Validation loss: 0.138843	Best loss: 0.072296	Accuracy: 97.54%
6	Validation loss: 0.073367	Best loss: 0.072296	Accuracy: 98.51%
7	Validation loss: 0.062935	Best loss: 0.062935	Accuracy: 98.63%
8	Validation loss: 0.103789	Best loss: 0.062935	Accuracy: 98.44%
9	Validation loss: 0.100997	Best loss: 0.062935	Accuracy: 98.40%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=3, learning_rate=0.01, batch_size=50, activation=<function elu at 0x1228d39d8>, total=  18.5s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=3, learning_rate=0.01, batch_size=50,

0	Validation loss: 0.113187	Best loss: 0.113187	Accuracy: 96.52%
1	Validation loss: 0.109183	Best loss: 0.109183	Accuracy: 96.60%
2	Validation loss: 0.119215	Best loss: 0.109183	Accuracy: 96.52%
3	Validation loss: 0.119139	Best loss: 0.109183	Accuracy: 96.36%
4	Validation loss: 0.118164	Best loss: 0.109183	Accuracy: 96.17%
5	Validation loss: 0.115740	Best loss: 0.109183	Accuracy: 96.44%
6	Validation loss: 0.127270	Best loss: 0.109183	Accuracy: 96.33%
7	Validation loss: 0.127239	Best loss: 0.109183	Accuracy: 96.17%
8	Validation loss: 0.119858	Best loss: 0.109183	Accuracy: 96.40%
9	Validation loss: 0.133943	Best loss: 0.109183	Accuracy: 96.33%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   7.4s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, batch_size=50, 

0	Validation loss: 0.105815	Best loss: 0.105815	Accuracy: 97.07%
1	Validation loss: 0.085874	Best loss: 0.085874	Accuracy: 97.85%
2	Validation loss: 0.088266	Best loss: 0.085874	Accuracy: 97.89%
3	Validation loss: 0.101349	Best loss: 0.085874	Accuracy: 98.40%
4	Validation loss: 0.089159	Best loss: 0.085874	Accuracy: 98.36%
5	Validation loss: 0.116587	Best loss: 0.085874	Accuracy: 97.97%
6	Validation loss: 0.143260	Best loss: 0.085874	Accuracy: 97.54%
7	Validation loss: 0.133344	Best loss: 0.085874	Accuracy: 98.44%
8	Validation loss: 0.100112	Best loss: 0.085874	Accuracy: 98.67%
9	Validation loss: 0.157156	Best loss: 0.085874	Accuracy: 98.16%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, batch_size=10, activation=<function relu at 0x1228d9d08>, total= 1.2min
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, batch_si

0	Validation loss: 0.106216	Best loss: 0.106216	Accuracy: 96.91%
1	Validation loss: 0.087920	Best loss: 0.087920	Accuracy: 97.69%
2	Validation loss: 0.078681	Best loss: 0.078681	Accuracy: 97.73%
3	Validation loss: 0.070438	Best loss: 0.070438	Accuracy: 97.85%
4	Validation loss: 0.076943	Best loss: 0.070438	Accuracy: 97.54%
5	Validation loss: 0.078181	Best loss: 0.070438	Accuracy: 97.58%
6	Validation loss: 0.079455	Best loss: 0.070438	Accuracy: 98.01%
7	Validation loss: 0.086619	Best loss: 0.070438	Accuracy: 97.65%
8	Validation loss: 0.100265	Best loss: 0.070438	Accuracy: 97.58%
9	Validation loss: 0.086238	Best loss: 0.070438	Accuracy: 97.73%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.01, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   9.7s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.01, batch_size=50, 

0	Validation loss: 0.863259	Best loss: 0.863259	Accuracy: 70.72%
1	Validation loss: 0.854538	Best loss: 0.854538	Accuracy: 80.65%
2	Validation loss: 0.793923	Best loss: 0.793923	Accuracy: 70.68%
3	Validation loss: 0.710195	Best loss: 0.710195	Accuracy: 69.66%
4	Validation loss: 1.081547	Best loss: 0.710195	Accuracy: 86.16%
5	Validation loss: 0.632932	Best loss: 0.632932	Accuracy: 85.69%
6	Validation loss: 1.131793	Best loss: 0.632932	Accuracy: 89.68%
7	Validation loss: 0.715394	Best loss: 0.632932	Accuracy: 88.51%
8	Validation loss: 1.162859	Best loss: 0.632932	Accuracy: 73.96%
9	Validation loss: 1.227432	Best loss: 0.632932	Accuracy: 85.57%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=10, activation=<function elu at 0x1228d39d8>, total=  36.7s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=10, act

0	Validation loss: 0.143070	Best loss: 0.143070	Accuracy: 96.48%
1	Validation loss: 0.151245	Best loss: 0.143070	Accuracy: 96.13%
2	Validation loss: 0.197296	Best loss: 0.143070	Accuracy: 95.50%
3	Validation loss: 0.185180	Best loss: 0.143070	Accuracy: 95.97%
4	Validation loss: 0.194400	Best loss: 0.143070	Accuracy: 96.21%
5	Validation loss: 0.195893	Best loss: 0.143070	Accuracy: 95.04%
6	Validation loss: 0.186197	Best loss: 0.143070	Accuracy: 96.01%
7	Validation loss: 0.213957	Best loss: 0.143070	Accuracy: 95.31%
8	Validation loss: 0.245719	Best loss: 0.143070	Accuracy: 95.54%
9	Validation loss: 0.171481	Best loss: 0.143070	Accuracy: 96.36%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, batch_size=10, activation=<function relu at 0x1228d9d08>, total=  32.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=3, learning_rate=0.01, batch_size=10, 

0	Validation loss: 0.118726	Best loss: 0.118726	Accuracy: 96.40%
1	Validation loss: 0.108067	Best loss: 0.108067	Accuracy: 96.99%
2	Validation loss: 0.109147	Best loss: 0.108067	Accuracy: 96.68%
3	Validation loss: 0.109682	Best loss: 0.108067	Accuracy: 96.99%
4	Validation loss: 0.119659	Best loss: 0.108067	Accuracy: 96.48%
5	Validation loss: 0.199471	Best loss: 0.108067	Accuracy: 94.92%
6	Validation loss: 0.121200	Best loss: 0.108067	Accuracy: 97.03%
7	Validation loss: 0.134618	Best loss: 0.108067	Accuracy: 96.05%
8	Validation loss: 0.130439	Best loss: 0.108067	Accuracy: 96.52%
9	Validation loss: 0.125034	Best loss: 0.108067	Accuracy: 96.48%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=0, learning_rate=0.01, batch_size=50, activation=<function relu at 0x1228d9d08>, total=   9.1s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=0, learning_rate=0.01, batch_size=50

0	Validation loss: 0.103907	Best loss: 0.103907	Accuracy: 96.79%
1	Validation loss: 0.093195	Best loss: 0.093195	Accuracy: 97.26%
2	Validation loss: 0.089627	Best loss: 0.089627	Accuracy: 97.38%
3	Validation loss: 0.096253	Best loss: 0.089627	Accuracy: 96.99%
4	Validation loss: 0.083713	Best loss: 0.083713	Accuracy: 97.46%
5	Validation loss: 0.086308	Best loss: 0.083713	Accuracy: 97.26%
6	Validation loss: 0.083707	Best loss: 0.083707	Accuracy: 97.46%
7	Validation loss: 0.081745	Best loss: 0.081745	Accuracy: 97.42%
8	Validation loss: 0.083186	Best loss: 0.081745	Accuracy: 97.46%
9	Validation loss: 0.081148	Best loss: 0.081148	Accuracy: 97.62%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch_size=10, activation=<function relu at 0x1228d9d08>, total=  20.1s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, batch

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 90.1min finished


0	Validation loss: 0.040328	Best loss: 0.040328	Accuracy: 98.75%
1	Validation loss: 0.028891	Best loss: 0.028891	Accuracy: 99.02%
2	Validation loss: 0.025929	Best loss: 0.025929	Accuracy: 99.18%
3	Validation loss: 0.024223	Best loss: 0.024223	Accuracy: 99.30%
4	Validation loss: 0.026495	Best loss: 0.024223	Accuracy: 99.06%
5	Validation loss: 0.021683	Best loss: 0.021683	Accuracy: 99.26%
6	Validation loss: 0.023073	Best loss: 0.021683	Accuracy: 99.34%
7	Validation loss: 0.022365	Best loss: 0.021683	Accuracy: 99.26%
8	Validation loss: 0.022718	Best loss: 0.021683	Accuracy: 99.30%
9	Validation loss: 0.022327	Best loss: 0.021683	Accuracy: 99.37%


0.9953298307063632

### 4. Now try adding Batch Normalization and compare the learning curves: is it converging faster than before? Does it produce a better model?

In [9]:
print('Task 4 start')
time.sleep(1)

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

dnn_clf_bn = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,n_neurons=90, random_state=42,
                           batch_norm_momentum=0.95)
dnn_clf_bn.fit(X_train, y_train, n_epochs=10, X_valid=X_valid, y_valid=y_valid)

Task 4 start
0	Validation loss: 0.038478	Best loss: 0.038478	Accuracy: 98.87%
1	Validation loss: 0.036767	Best loss: 0.036767	Accuracy: 98.71%
2	Validation loss: 0.039822	Best loss: 0.036767	Accuracy: 98.83%
3	Validation loss: 0.036627	Best loss: 0.036627	Accuracy: 98.79%
4	Validation loss: 0.047019	Best loss: 0.036627	Accuracy: 98.63%
5	Validation loss: 0.031215	Best loss: 0.031215	Accuracy: 99.10%
6	Validation loss: 0.044797	Best loss: 0.031215	Accuracy: 98.55%
7	Validation loss: 0.028140	Best loss: 0.028140	Accuracy: 99.10%
8	Validation loss: 0.033399	Best loss: 0.028140	Accuracy: 98.98%
9	Validation loss: 0.033288	Best loss: 0.028140	Accuracy: 99.18%


DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x1c2c77c378>,
       batch_norm_momentum=0.95, batch_size=500, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0xb289919d8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=90,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

### 5. Is the model overfitting the training set? Try adding dropout to every layer and try again. Does it help?

In [11]:
param_distribs = {
    "n_neurons": [10, 100, 150],
    "batch_size": [10, 50],
    "learning_rate": [0.01, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu],
    "n_hidden_layers": [0, 1, 3],
    "optimizer_class": [tf.train.AdamOptimizer, tf.train.AdagradOptimizer],
    "dropout_rate": [0.2, 0.4],
    "batch_norm_momentum": [0.9, 0.95, 0.98]
}


random_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50, fit_params={"X_valid": X_valid, "y_valid": y_valid, "n_epochs": 10},
                                random_state=42, verbose=2)
random_search.fit(X_train, y_train)
y_pred = random_search.predict(X_test)
accuracy_score(y_test, y_pred)
random_search.best_estimator_.save("./model")

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.95, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 1.434567	Best loss: 1.434567	Accuracy: 94.53%
1	Validation loss: 1.329875	Best loss: 1.329875	Accuracy: 95.07%
2	Validation loss: 1.402850	Best loss: 1.329875	Accuracy: 94.88%
3	Validation loss: 1.751204	Best loss: 1.329875	Accuracy: 93.94%
4	Validation loss: 1.693469	Best loss: 1.329875	Accuracy: 94.68%
5	Validation loss: 1.638291	Best loss: 1.329875	Accuracy: 94.80%
6	Validation loss: 1.858445	Best loss: 1.329875	Accuracy: 94.41%
7	Validation loss: 1.912539	Best loss: 1.329875	Accuracy: 95.11%
8	Validation loss: 1.520367	Best loss: 1.329875	Accuracy: 96.29%
9	Validation loss: 1.728908	Best loss: 1.329875	Accuracy: 95.58%
[CV]  optimizer_class=<class 'tensorflow.python.training

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   22.3s remaining:    0.0s


0	Validation loss: 1.247212	Best loss: 1.247212	Accuracy: 95.15%
1	Validation loss: 1.175410	Best loss: 1.175410	Accuracy: 96.09%
2	Validation loss: 1.352797	Best loss: 1.175410	Accuracy: 95.54%
3	Validation loss: 1.333857	Best loss: 1.175410	Accuracy: 95.86%
4	Validation loss: 1.677570	Best loss: 1.175410	Accuracy: 95.19%
5	Validation loss: 1.817450	Best loss: 1.175410	Accuracy: 95.62%
6	Validation loss: 1.959067	Best loss: 1.175410	Accuracy: 94.41%
7	Validation loss: 1.700133	Best loss: 1.175410	Accuracy: 96.05%
8	Validation loss: 1.648744	Best loss: 1.175410	Accuracy: 95.43%
9	Validation loss: 1.946867	Best loss: 1.175410	Accuracy: 95.47%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.95, activation=<function relu at 0x1228d9d08>, total=  14.9s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_la

4	Validation loss: 0.102652	Best loss: 0.102652	Accuracy: 96.91%
5	Validation loss: 0.106362	Best loss: 0.102652	Accuracy: 96.52%
6	Validation loss: 0.106135	Best loss: 0.102652	Accuracy: 96.83%
7	Validation loss: 0.101282	Best loss: 0.101282	Accuracy: 96.72%
8	Validation loss: 0.103064	Best loss: 0.101282	Accuracy: 96.91%
9	Validation loss: 0.100247	Best loss: 0.100247	Accuracy: 96.91%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.95, activation=<function elu at 0x1228d39d8>, total=  16.1s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.113263	Best loss: 0.113263	Accuracy: 96.60%
1	Validation loss: 0.103679	Best loss: 0.103679	Accur

8	Validation loss: 0.043564	Best loss: 0.043564	Accuracy: 98.75%
9	Validation loss: 0.043027	Best loss: 0.043027	Accuracy: 98.63%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total= 1.0min
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.080946	Best loss: 0.080946	Accuracy: 97.69%
1	Validation loss: 0.069602	Best loss: 0.069602	Accuracy: 97.89%
2	Validation loss: 0.060380	Best loss: 0.060380	Accuracy: 98.12%
3	Validation loss: 0.057877	Best loss: 0.057877	Accuracy: 98.24%
4	Validation loss: 0.052966	Best loss: 0.052966	Accuracy: 98.36%
5	Validation loss: 0.051086	Best loss: 0.051086	Ac

[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.01, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.061996	Best loss: 0.061996	Accuracy: 98.12%
1	Validation loss: 0.048006	Best loss: 0.048006	Accuracy: 98.48%
2	Validation loss: 0.049498	Best loss: 0.048006	Accuracy: 98.36%
3	Validation loss: 0.048279	Best loss: 0.048006	Accuracy: 98.59%
4	Validation loss: 0.039946	Best loss: 0.039946	Accuracy: 98.87%
5	Validation loss: 0.049686	Best loss: 0.039946	Accuracy: 98.40%
6	Validation loss: 0.049911	Best loss: 0.039946	Accuracy: 98.44%
7	Validation loss: 0.041971	Best loss: 0.039946	Accuracy: 98.59%
8	Validation loss: 0.036820	Best loss: 0.036820	Accuracy: 98.98%
9	Validation loss: 0.035243	Best loss: 0.035243	Accuracy: 98.87%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=1, lea

[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.064211	Best loss: 0.064211	Accuracy: 97.97%
1	Validation loss: 0.050466	Best loss: 0.050466	Accuracy: 98.59%
2	Validation loss: 0.044464	Best loss: 0.044464	Accuracy: 98.67%
3	Validation loss: 0.039611	Best loss: 0.039611	Accuracy: 98.59%
4	Validation loss: 0.031079	Best loss: 0.031079	Accuracy: 99.02%
5	Validation loss: 0.033543	Best loss: 0.031079	Accuracy: 98.87%
6	Validation loss: 0.029691	Best loss: 0.029691	Accuracy: 98.91%
7	Validation loss: 0.030823	Best loss: 0.029691	Accuracy: 98.98%
8	Validation loss: 0.028411	Best loss: 0.028411	Accuracy: 99.10%
9	Validation loss: 0.028043	Best loss: 0.028043	Accuracy: 99.18%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_la

1	Validation loss: 0.103522	Best loss: 0.103522	Accuracy: 97.03%
2	Validation loss: 0.101254	Best loss: 0.101254	Accuracy: 96.72%
3	Validation loss: 0.100307	Best loss: 0.100307	Accuracy: 97.03%
4	Validation loss: 0.101647	Best loss: 0.100307	Accuracy: 96.83%
5	Validation loss: 0.105102	Best loss: 0.100307	Accuracy: 96.72%
6	Validation loss: 0.101155	Best loss: 0.100307	Accuracy: 96.91%
7	Validation loss: 0.104440	Best loss: 0.100307	Accuracy: 96.64%
8	Validation loss: 0.103228	Best loss: 0.100307	Accuracy: 96.72%
9	Validation loss: 0.100753	Best loss: 0.100307	Accuracy: 96.87%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.95, activation=<function elu at 0x1228d39d8>, total=  11.5s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_siz

6	Validation loss: 0.136807	Best loss: 0.039590	Accuracy: 97.62%
7	Validation loss: 0.201431	Best loss: 0.039590	Accuracy: 96.99%
8	Validation loss: 0.058676	Best loss: 0.039590	Accuracy: 98.67%
9	Validation loss: 0.040105	Best loss: 0.039590	Accuracy: 98.79%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=3, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.95, activation=<function elu at 0x1228d39d8>, total=  29.0s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.120033	Best loss: 0.120033	Accuracy: 96.13%
1	Validation loss: 0.105347	Best loss: 0.105347	Accuracy: 96.95%
2	Validation loss: 0.097364	Best loss: 0.097364	Accuracy: 96.83%
3	Validation loss: 0.089846	Best loss: 0.089846	Accuracy: 97.

[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=50, batch_norm_momentum=0.98, activation=<function elu at 0x1228d39d8>, total=   3.0s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=50, batch_norm_momentum=0.98, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.124929	Best loss: 0.124929	Accuracy: 96.36%
1	Validation loss: 0.110629	Best loss: 0.110629	Accuracy: 97.03%
2	Validation loss: 0.107540	Best loss: 0.107540	Accuracy: 96.83%
3	Validation loss: 0.104847	Best loss: 0.104847	Accuracy: 96.87%
4	Validation loss: 0.104066	Best loss: 0.104066	Accuracy: 96.95%
5	Validation loss: 0.107351	Best loss: 0.104066	Accuracy: 96.87%
6	Validation loss: 0.102842	Best loss: 0.102842	Accuracy: 97.03%
7	Validation loss: 0.105074	Best loss: 0.102842	Accura

0	Validation loss: 0.105917	Best loss: 0.105917	Accuracy: 96.68%
1	Validation loss: 0.089928	Best loss: 0.089928	Accuracy: 97.46%
2	Validation loss: 0.069752	Best loss: 0.069752	Accuracy: 98.08%
3	Validation loss: 0.076799	Best loss: 0.069752	Accuracy: 97.69%
4	Validation loss: 0.077566	Best loss: 0.069752	Accuracy: 97.65%
5	Validation loss: 0.076448	Best loss: 0.069752	Accuracy: 98.05%
6	Validation loss: 0.078169	Best loss: 0.069752	Accuracy: 97.73%
7	Validation loss: 0.083675	Best loss: 0.069752	Accuracy: 97.54%
8	Validation loss: 0.065602	Best loss: 0.065602	Accuracy: 98.08%
9	Validation loss: 0.078000	Best loss: 0.065602	Accuracy: 97.77%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=1, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function relu at 0x1228d9d08>, total=   9.8s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_la

4	Validation loss: 0.283822	Best loss: 0.128033	Accuracy: 96.21%
5	Validation loss: 0.123620	Best loss: 0.123620	Accuracy: 97.69%
6	Validation loss: 0.168342	Best loss: 0.123620	Accuracy: 95.74%
7	Validation loss: 0.117872	Best loss: 0.117872	Accuracy: 96.29%
8	Validation loss: 0.141771	Best loss: 0.117872	Accuracy: 96.09%
9	Validation loss: 0.353828	Best loss: 0.117872	Accuracy: 94.72%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.9, activation=<function elu at 0x1228d39d8>, total= 1.2min
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.9, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.236389	Best loss: 0.236389	Accuracy: 95.27%
1	Validation loss: 0.132186	Best loss: 0.132186	Accuracy: 96.40%
2	V

8	Validation loss: 0.024862	Best loss: 0.024862	Accuracy: 99.14%
9	Validation loss: 0.032298	Best loss: 0.024862	Accuracy: 98.94%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.01, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function relu at 0x1228d9d08>, total=  23.7s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.01, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.056144	Best loss: 0.056144	Accuracy: 98.36%
1	Validation loss: 0.049918	Best loss: 0.049918	Accuracy: 98.32%
2	Validation loss: 0.046539	Best loss: 0.046539	Accuracy: 98.59%
3	Validation loss: 0.038293	Best loss: 0.038293	Accuracy: 98.67%
4	Validation loss: 0.040930	Best loss: 0.038293	Accuracy: 98.75%
5	Validation loss: 0.031021	Best loss: 0.031021	Accuracy: 98.87%

0	Validation loss: 0.220946	Best loss: 0.220946	Accuracy: 93.35%
1	Validation loss: 0.160136	Best loss: 0.160136	Accuracy: 95.31%
2	Validation loss: 0.138511	Best loss: 0.138511	Accuracy: 95.86%
3	Validation loss: 0.124081	Best loss: 0.124081	Accuracy: 96.36%
4	Validation loss: 0.114401	Best loss: 0.114401	Accuracy: 96.56%
5	Validation loss: 0.111544	Best loss: 0.111544	Accuracy: 96.40%
6	Validation loss: 0.103299	Best loss: 0.103299	Accuracy: 96.72%
7	Validation loss: 0.098290	Best loss: 0.098290	Accuracy: 96.95%
8	Validation loss: 0.098263	Best loss: 0.098263	Accuracy: 96.95%
9	Validation loss: 0.091742	Best loss: 0.091742	Accuracy: 97.30%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=3, learning_rate=0.01, dropout_rate=0.4, batch_size=50, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total=  22.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=1

4	Validation loss: 0.143681	Best loss: 0.143681	Accuracy: 95.90%
5	Validation loss: 0.139493	Best loss: 0.139493	Accuracy: 96.05%
6	Validation loss: 0.135089	Best loss: 0.135089	Accuracy: 96.13%
7	Validation loss: 0.132446	Best loss: 0.132446	Accuracy: 96.25%
8	Validation loss: 0.129994	Best loss: 0.129994	Accuracy: 96.29%
9	Validation loss: 0.127400	Best loss: 0.127400	Accuracy: 96.25%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total=   2.9s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.223154	Best loss: 0.223154	Accuracy: 94.92%
1	Validation loss: 0.178968	Best loss: 0.178968	Accu

8	Validation loss: 2.054601	Best loss: 0.975078	Accuracy: 95.58%
9	Validation loss: 1.647246	Best loss: 0.975078	Accuracy: 95.23%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=150, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total=  11.9s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.98, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.108014	Best loss: 0.108014	Accuracy: 96.56%
1	Validation loss: 0.161934	Best loss: 0.108014	Accuracy: 96.29%
2	Validation loss: 0.115985	Best loss: 0.108014	Accuracy: 97.65%
3	Validation loss: 0.156131	Best loss: 0.108014	Accuracy: 96.87%
4	Validation loss: 0.156245	Best loss: 0.108014	Accuracy: 97.97%
5	Validation loss: 0.192077	Best loss: 0.108014	Accuracy: 97.73%


0	Validation loss: 0.391181	Best loss: 0.391181	Accuracy: 95.58%
1	Validation loss: 0.457517	Best loss: 0.391181	Accuracy: 96.36%
2	Validation loss: 0.505746	Best loss: 0.391181	Accuracy: 95.82%
3	Validation loss: 0.567634	Best loss: 0.391181	Accuracy: 96.09%
4	Validation loss: 0.615589	Best loss: 0.391181	Accuracy: 95.70%
5	Validation loss: 0.914987	Best loss: 0.391181	Accuracy: 94.18%
6	Validation loss: 0.874886	Best loss: 0.391181	Accuracy: 91.44%
7	Validation loss: 0.798926	Best loss: 0.391181	Accuracy: 95.23%
8	Validation loss: 0.785767	Best loss: 0.391181	Accuracy: 95.70%
9	Validation loss: 1.005811	Best loss: 0.391181	Accuracy: 94.64%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function elu at 0x1228d39d8>, total=   3.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=10, n_hidden_laye

5	Validation loss: 0.107917	Best loss: 0.103697	Accuracy: 96.83%
6	Validation loss: 0.106083	Best loss: 0.103697	Accuracy: 96.99%
7	Validation loss: 0.101997	Best loss: 0.101997	Accuracy: 96.68%
8	Validation loss: 0.103524	Best loss: 0.101997	Accuracy: 96.99%
9	Validation loss: 0.100838	Best loss: 0.100838	Accuracy: 97.15%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=10, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.98, activation=<function elu at 0x1228d39d8>, total=   2.9s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.95, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.123285	Best loss: 0.123285	Accuracy: 96.36%
1	Validation loss: 0.112454	Best loss: 0.112454	Accuracy: 96.64%
2	Validation loss: 0.107300	Best loss: 0.107300	Accura

9	Validation loss: 0.100223	Best loss: 0.100223	Accuracy: 97.07%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function elu at 0x1228d39d8>, total=   2.8s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.1, dropout_rate=0.2, batch_size=50, batch_norm_momentum=0.9, activation=<function elu at 0x1228d39d8> 
0	Validation loss: 0.124929	Best loss: 0.124929	Accuracy: 96.36%
1	Validation loss: 0.110629	Best loss: 0.110629	Accuracy: 97.03%
2	Validation loss: 0.107540	Best loss: 0.107540	Accuracy: 96.83%
3	Validation loss: 0.104847	Best loss: 0.104847	Accuracy: 96.87%
4	Validation loss: 0.104066	Best loss: 0.104066	Accuracy: 96.95%
5	Validation loss: 0.107351	Best loss: 0.104066	Accuracy: 96.87%
6	Validation loss: 0.102842	Best loss: 0.102842	Accuracy

0	Validation loss: 0.065350	Best loss: 0.065350	Accuracy: 97.93%
1	Validation loss: 0.048040	Best loss: 0.048040	Accuracy: 98.44%
2	Validation loss: 0.044301	Best loss: 0.044301	Accuracy: 98.40%
3	Validation loss: 0.042056	Best loss: 0.042056	Accuracy: 98.63%
4	Validation loss: 0.036435	Best loss: 0.036435	Accuracy: 98.79%
5	Validation loss: 0.035136	Best loss: 0.035136	Accuracy: 98.75%
6	Validation loss: 0.038678	Best loss: 0.035136	Accuracy: 98.55%
7	Validation loss: 0.030883	Best loss: 0.030883	Accuracy: 99.02%
8	Validation loss: 0.031332	Best loss: 0.030883	Accuracy: 98.87%
9	Validation loss: 0.030861	Best loss: 0.030861	Accuracy: 99.02%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=150, n_hidden_layers=1, learning_rate=0.1, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.95, activation=<function relu at 0x1228d9d08>, total= 1.1min
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=15

4	Validation loss: 0.121539	Best loss: 0.121539	Accuracy: 96.52%
5	Validation loss: 0.119241	Best loss: 0.119241	Accuracy: 96.56%
6	Validation loss: 0.116923	Best loss: 0.116923	Accuracy: 96.60%
7	Validation loss: 0.115272	Best loss: 0.115272	Accuracy: 96.72%
8	Validation loss: 0.114154	Best loss: 0.114154	Accuracy: 96.79%
9	Validation loss: 0.112894	Best loss: 0.112894	Accuracy: 96.76%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total=  15.2s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.2, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.158951	Best loss: 0.158951	Accuracy: 95.66%
1	Validation loss: 0.139017	Best loss: 0.139017	Ac

8	Validation loss: 0.115563	Best loss: 0.115563	Accuracy: 96.79%
9	Validation loss: 0.113833	Best loss: 0.113833	Accuracy: 96.87%
[CV]  optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x1228d9d08>, total=  13.1s
[CV] optimizer_class=<class 'tensorflow.python.training.adagrad.AdagradOptimizer'>, n_neurons=100, n_hidden_layers=0, learning_rate=0.01, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.9, activation=<function relu at 0x1228d9d08> 
0	Validation loss: 0.157899	Best loss: 0.157899	Accuracy: 95.90%
1	Validation loss: 0.139176	Best loss: 0.139176	Accuracy: 96.29%
2	Validation loss: 0.130126	Best loss: 0.130126	Accuracy: 96.40%
3	Validation loss: 0.124841	Best loss: 0.124841	Accuracy: 96.64%
4	Validation loss: 0.121461	Best loss: 0.121461	Accuracy: 96.76%
5	Validation loss: 0.119507	Best loss: 0.119507	Accu

0	Validation loss: 0.084233	Best loss: 0.084233	Accuracy: 97.30%
1	Validation loss: 0.060185	Best loss: 0.060185	Accuracy: 98.01%
2	Validation loss: 0.058899	Best loss: 0.058899	Accuracy: 97.97%
3	Validation loss: 0.052112	Best loss: 0.052112	Accuracy: 98.24%
4	Validation loss: 0.043808	Best loss: 0.043808	Accuracy: 98.67%
5	Validation loss: 0.039777	Best loss: 0.039777	Accuracy: 98.67%
6	Validation loss: 0.053049	Best loss: 0.039777	Accuracy: 98.12%
7	Validation loss: 0.039689	Best loss: 0.039689	Accuracy: 98.79%
8	Validation loss: 0.044658	Best loss: 0.039689	Accuracy: 98.48%
9	Validation loss: 0.038589	Best loss: 0.038589	Accuracy: 98.79%
[CV]  optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden_layers=1, learning_rate=0.01, dropout_rate=0.4, batch_size=10, batch_norm_momentum=0.98, activation=<function relu at 0x1228d9d08>, total=  48.1s
[CV] optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>, n_neurons=100, n_hidden

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 80.4min finished


0	Validation loss: 0.056500	Best loss: 0.056500	Accuracy: 98.12%
1	Validation loss: 0.039173	Best loss: 0.039173	Accuracy: 98.59%
2	Validation loss: 0.038444	Best loss: 0.038444	Accuracy: 98.75%
3	Validation loss: 0.032878	Best loss: 0.032878	Accuracy: 98.91%
4	Validation loss: 0.029713	Best loss: 0.029713	Accuracy: 98.87%
5	Validation loss: 0.027749	Best loss: 0.027749	Accuracy: 98.87%
6	Validation loss: 0.024890	Best loss: 0.024890	Accuracy: 99.06%
7	Validation loss: 0.025736	Best loss: 0.024890	Accuracy: 98.98%
8	Validation loss: 0.021595	Best loss: 0.021595	Accuracy: 99.30%
9	Validation loss: 0.020510	Best loss: 0.020510	Accuracy: 99.30%


# Part 2: Transfer learning

In [12]:
import tensorflow as tf
import numpy as np
import os, sys, time
sys.path.append(os.getcwd())
from DNNClassifier import DNNClassifier
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.metrics import accuracy_score

he_init = tf.contrib.layers.variance_scaling_initializer()
mnist = input_data.read_data_sets("/tmp/data/")

X_train_full = mnist.train.images[mnist.train.labels >= 5]
y_train_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test = mnist.test.images[mnist.test.labels >= 5]
y_test = mnist.test.labels[mnist.test.labels >= 5] - 5

def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train, y_train = sample_n_instances_per_class(X_train_full, y_train_full, n=100)
X_valid, y_valid = sample_n_instances_per_class(X_valid_full, y_valid_full, n=30)

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

n_epochs = 1000
batch_size = 20

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


### 1. Create a new DNN that reuses all the pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one.

In [13]:
print('Task 1 start')
time.sleep(1)

reset_graph()

restore_saver = tf.train.import_meta_graph("./model.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")
learning_rate = 0.01


output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

Task 1 start


### 2. Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how long it takes. Despite this small number of examples, can you achieve high precision?

In [17]:
print('Task 2 start')
time.sleep(1)

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for var in output_layer_vars:
        var.initializer.run()
    t0 = time.time()
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Task 2 start
INFO:tensorflow:Restoring parameters from ./model
0	Validation loss: 0.804879	Best loss: 0.804879	Accuracy: 76.67%
1	Validation loss: 0.656432	Best loss: 0.656432	Accuracy: 79.33%
2	Validation loss: 0.599913	Best loss: 0.599913	Accuracy: 82.00%
3	Validation loss: 0.563547	Best loss: 0.563547	Accuracy: 82.67%
4	Validation loss: 0.545680	Best loss: 0.545680	Accuracy: 86.00%
5	Validation loss: 0.529378	Best loss: 0.529378	Accuracy: 81.33%
6	Validation loss: 0.515505	Best loss: 0.515505	Accuracy: 84.67%
7	Validation loss: 0.497837	Best loss: 0.497837	Accuracy: 83.33%
8	Validation loss: 0.486211	Best loss: 0.486211	Accuracy: 84.67%
9	Validation loss: 0.484087	Best loss: 0.484087	Accuracy: 85.33%
10	Validation loss: 0.466256	Best loss: 0.466256	Accuracy: 88.00%
11	Validation loss: 0.470066	Best loss: 0.466256	Accuracy: 86.00%
12	Validation loss: 0.464126	Best loss: 0.464126	Accuracy: 87.33%
13	Validation loss: 0.440333	Best loss: 0.440333	Accuracy: 86.67%
14	Validation loss: 0.4

### 3. Try caching the frozen layers, and train the model again: how much faster is it now?

In [15]:
print('Task 3 start')
time.sleep(1)

hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for var in output_layer_vars:
        var.initializer.run()
    t0 = time.time()
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train, y: y_train})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid, y: y_valid})
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Task 3 start


KeyError: "The name 'hidden5_out:0' refers to a Tensor which does not exist. The operation, 'hidden5_out', does not exist in the graph."

### 4. Try again reusing just four hidden layers instead of five. Can you achieve a higher precision?

In [None]:
print('Task 4 start')
time.sleep(1)

reset_graph()

n_outputs = 5
learning_rate = 0.01

restore_saver = tf.train.import_meta_graph("./bestDNNDropout_batch_model.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model")
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./mnistModel_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./mnistModel_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

### 5. Now unfreeze the top two hidden layers and continue training: can you get the model to perform even better?

In [None]:
print('Task 5 start')
time.sleep(1)

learning_rate = 0.01
unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam23")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./mnistModel_5_to_9_four_frozen")

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./mnistModel_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./mnistModel_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam24")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./mnistModel_5_to_9_two_frozen")
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./mnistModel_5_to_9_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./mnistModel_5_to_9_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

dnn_clf_5_to_9 = DNNClassifier(n_hidden_layers=4, random_state=42)
dnn_clf_5_to_9.fit(X_train, y_train, n_epochs=1000, X_valid=X_valid, y_valid=y_valid)
y_pred = dnn_clf_5_to_9.predict(X_test)
accuracy_score(y_test, y_pred)