In [1]:
import sys
import tensorflow as tf
from tensorflow.python.client import device_lib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from time import time
import pickle

def generate_initializers_and_data():
    """**Note**: this function must be called to generate weights and data for 
    both jupyter files!!!"""
    conv1_kernel = np.float32(np.random.normal(scale=0.5, size=[6, 3, 5, 5]))
    conv2_kernel = np.float32(np.random.normal(scale=0.5, size=[12, 6, 3, 3]))
    logits_kernel = np.float32(np.random.normal(scale=0.5, size=[432, 10]))
    with open('init.pkl', 'wb') as fo:
        init = {'conv1': conv1_kernel,
                'conv2': conv2_kernel,
                'logits': logits_kernel}
        pickle.dump(init, fo, protocol=pickle.HIGHEST_PROTOCOL)
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
    x_train, x_test = np.float32(x_train/255.0), np.float32(x_test/255.0)
    y_train, y_test = np.int32(y_train), np.int32(y_test)
    x_train, y_train = shuffle(x_train, y_train)
    x_train, y_train = x_train[:train_data_size], y_train[:train_data_size]
    y_train, y_test = y_train.flatten(), y_test.flatten()
    data = {
        'x_train':x_train,
        'y_train':y_train,
        'x_test':x_test,
        'y_test':y_test
    }
    with open('data.pkl', 'wb') as fo:
        pickle.dump(data, fo, protocol=pickle.HIGHEST_PROTOCOL)

learning_rate = 0.01
n_epochs = 300
batch_size = 50
train_data_size = 4000
dropout_rate = 0.4


def print_log(dict_):
    buff = '|'.join(['['+str(k)+ ':' +"{0:.3f}".format(v)+']' for k, v in sorted(dict_.items())])
    sys.stdout.write('\r' + buff)
    sys.stdout.flush()

def _test_loss_err(sess, loss_op, err_op, next_batch, iterator, x, y):
    batch_loss = []
    batch_err = []

    while True:
        try:
            batch = sess.run(next_batch)
            loss, err = sess.run([loss_op, err_op], feed_dict={x:batch['x'], y:batch['y']})
            batch_loss.append(loss)
            batch_err.append(err)
        except tf.errors.OutOfRangeError:
            loss = np.mean(batch_loss)
            err = np.mean(batch_err)
            sess.run(iterator.initializer)
            break
    return loss, err

def tf_model(graph, init=None):
    with graph.as_default():
        if init:
            conv1_init = init['conv1']
            conv2_init = init['conv2']
            logits_init = init['logits']
            conv1_init = tf.constant_initializer(conv1_init)
            conv2_init = tf.constant_initializer(conv2_init)
            logits_init = tf.constant_initializer(logits_init)
        else:
            conv1_init = tf.contrib.layers.xavier_initializer()
            conv2_init = tf.contrib.layers.xavier_initializer()
            logits_init = tf.contrib.layers.xavier_initializer()
        with tf.name_scope('Input'):
            
            x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name='x')
            y = tf.placeholder(tf.int32, shape=[None], name='y')
            keep_prob = tf.placeholder_with_default(1.0 - dropout_rate, shape=())
        with tf.device('/device:GPU:0'):
            with tf.name_scope('conv1'):
                conv1 = tf.layers.conv2d(x,
                                         filters=6,
                                         kernel_size=5,
                                         strides=1,
                                         padding='valid',
                                         kernel_initializer=conv1_init,
                                         bias_initializer=tf.initializers.zeros,
                                         activation=tf.nn.relu,
                                         name='conv1'
                                         )

                
                max_pool1 = tf.nn.max_pool(value=conv1,
                                           ksize=(1, 2, 2, 1),
                                           strides=(1, 2, 2, 1),
                                           padding='SAME',
                                           name='max_pool1')

                dropout1 = tf.nn.dropout(max_pool1, keep_prob=keep_prob)

            with tf.name_scope('conv2'):
                conv2 = tf.layers.conv2d(dropout1,
                                         filters=12,
                                         kernel_size=3,
                                         strides=1,
                                         padding='valid',
                                         bias_initializer=tf.initializers.zeros,
                                         activation=tf.nn.relu,
                                         kernel_initializer=conv2_init,
                                         name='conv2')

                max_pool2 = tf.nn.max_pool(value=conv2,
                                           ksize=(1, 2, 2, 1),
                                           strides=(1, 2, 2, 1),
                                           padding='VALID',
                                           name='max_pool2')

                dropout2 = tf.nn.dropout(max_pool2, keep_prob=keep_prob)

            with tf.name_scope('logits'):
                flatten = tf.layers.Flatten()(max_pool2)
                logits = tf.layers.dense(flatten,
                                         units=10,
                                         kernel_initializer=logits_init,
                                         bias_initializer=tf.initializers.zeros,
                                         name='logits')

    return x, y, keep_prob, logits
# call `generate_initializers_and_data()` only once to generate data
generate_initializers_and_data()
with open('init.pkl', 'rb') as fo:
    init = pickle.load(fo)

with open('data.pkl', 'rb') as fo:
    data = pickle.load(fo)


In [2]:
x_train = data['x_train']
x_test = data['x_test']
y_train = data['y_train']
y_test = data['y_test']
min_errs = []
time_took = []
for i in range(1):
    tf.reset_default_graph()
    
    start_time = time()
    x, y, keep_prob, logits = tf_model(tf.get_default_graph())
    with tf.device('cpu'):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=logits)
        loss_op = tf.reduce_mean(xentropy)
        y_pred = tf.nn.in_top_k(predictions=tf.cast(logits, tf.float32),
                              targets=y,
                              k=1)
        err_op = 1.0 - tf.reduce_mean(tf.cast(x=y_pred, dtype=tf.float32),
                                      name='error')

    with tf.device('/device:GPU:0'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss_op)
        train_op = optimizer.apply_gradients(grads_and_vars)

    data = tf.data.Dataset.from_tensor_slices({'x':x_train, 'y':y_train}).batch(batch_size)
    iter_train = data.make_initializable_iterator()
    data = tf.data.Dataset.from_tensor_slices({'x':x_test, 'y':y_test}).batch(batch_size)
    iter_test = data.make_initializable_iterator()

    tf_log = {'train_loss':[],
              'test_loss':[],
              'train_error':[],
              'test_error':[]
             }

    with tf.Session() as sess:
        _ = sess.run([tf.global_variables_initializer(),
                      iter_train.initializer,
                      iter_test.initializer])
        next_train = iter_train.get_next()
        next_test = iter_test.get_next()

        loss_val, err_val = _test_loss_err(sess, loss_op, err_op, next_test, iter_test, x, y)
        tf_log['test_loss'].append(loss_val)
        tf_log['test_error'].append(err_val)
        loss_val, err_val = _test_loss_err(sess, loss_op, err_op, next_train, iter_train, x, y)
        tf_log['train_loss'].append(loss_val)
        tf_log['train_error'].append(err_val)

        for epoch in range(n_epochs):
            while True:
                try:
                    batch = sess.run(next_train)
                    _ = sess.run([train_op],
                                 feed_dict={x:batch['x'], y:batch['y']})

                except tf.errors.OutOfRangeError:
                    sess.run(iter_train.initializer)
                    loss_val, err_val = _test_loss_err(sess, loss_op, err_op, next_test, iter_test, x, y)
                    tf_log['test_loss'].append(loss_val)
                    tf_log['test_error'].append(err_val)
                    print_log({'epoch':epoch, 'error':err_val, 'loss':loss_val})
                    loss_val, err_val = _test_loss_err(sess, loss_op, err_op, next_train, iter_train, x, y)
                    tf_log['train_loss'].append(loss_val)
                    tf_log['train_error'].append(err_val)

                    break
    with open('tf_log.pkl', 'wb') as fo:
        tf_log['n_epochs'] = n_epochs
        pickle.dump(tf_log, fo, protocol=pickle.HIGHEST_PROTOCOL)
    time_took.append(time() - start_time)
    min_errs.append(min(tf_log['test_error']))
print()
print('time_took', np.mean(time_took))
print('min_error', np.mean(min_errs))

[epoch:299.000]|[error:0.536]|[loss:1.572]
time_took 290.59439420700073
min_error 0.53139997
