# Wide Residual network using tensorflow

paper: https://arxiv.org/pdf/1605.07146.pdf
________________________________
> Classification of Cifar 10 dataset. Here learnnig rate with cosine annealing and warm restarts are applied.

> **The model that will be attempted is wide-dropout :**

<img src='w-resnet.png'>

In [2]:
# load the necessary
import matplotlib.pylab as plt
%matplotlib inline
import os
import numpy as np
import math as m
import sys
print('Python version:',sys.version)
import tensorflow as tf
print('TF version:',tf.__version__)
from data_utils import input_pipeline
slim = tf.contrib.slim
from tensorflow.contrib.layers.python.layers import initializers
from colorama import Fore, Style
from datetime import datetime
import time
import glob

Python version: 3.5.2 (default, Nov 23 2017, 16:37:01) 
[GCC 5.4.0 20160609]
TF version: 1.8.0


## Model definition

In [2]:
# Define the conv function
KEEP_PROB = 1.
WEIGHT_DECAY = 0.0005
def resnet_block(input_layer, out_features, training=True, scope=None):
    
    with tf.variable_scope(scope):
        
        with slim.arg_scope([slim.batch_norm],
                            is_training = training, scale=True,
                            decay=0.9, epsilon=1e-5,updates_collections=None,
                            param_initializers={
                                "beta": tf.constant_initializer(value=0),
                                "gamma": tf.random_normal_initializer(mean=1., stddev=0.01)}):

            # 1st Conv 3x3

            # batch norm        
            net = slim.batch_norm(input_layer)
            # Relu
            net = tf.nn.relu(net)
            #convolution
            net = slim.conv2d(net, out_features)
            #dropout
            net = slim.dropout(net,keep_prob=KEEP_PROB,is_training=training)

            # 2nd Conv 3x3
            # batch norm
            net = slim.batch_norm(net)
            # Relu
            net = tf.nn.relu(net)
            #convolution
            net = slim.conv2d(net, out_features)

            #concat with the input                    
            net += input_layer
    
    return net

def wide_resnet(inputs,
                num_classes=10,
                is_training=True,
                k = 6,
                depth = 22,
                scope='wide_resnet'):
    
    # number of filters at various stages
    filters = [16, 16*k, 32*k, 64*k]
    # from their code https://github.com/szagoruyko/wide-residual-networks/blob/master/models/wide-resnet.lua
    assert (depth - 4) % 6 == 0, 'depth should be 6n+4'
    n = int((depth - 4) / 6)
    
    # Set the weights to a small value initially and check whether the output is ln(num_of_classes) = ln(10) ~2.3
    #weights_initializer = tf.random_uniform_initializer(0,0.001)
    
    # He et all uniform weights
    #weights_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2.0, 
    #                                       mode='FAN_IN', uniform=True)):
            
    with tf.variable_scope(scope, reuse=None):
        with tf.contrib.framework.arg_scope([slim.conv2d], 
                                             normalizer_fn=None,
                                             activation_fn=None,
                                             padding='SAME',
                                             kernel_size=(3,3),
                                             weights_initializer=initializers.variance_scaling_initializer(),
                                             weights_regularizer=slim.l2_regularizer(WEIGHT_DECAY),
                                             biases_initializer=None):


            #one conv at the beginning (spatial size: 32x32)
            net = slim.conv2d(inputs, filters[0], stride=(1,1), scope='first_conv' )
            # batch norm        
            net = slim.batch_norm(net, is_training =is_training, scale=True, updates_collections=None)
            # Relu
            net = tf.nn.relu(net)

            #block 1 -- Stage 1 (spatial size: 32x32)
            net = slim.conv2d(net,filters[1], stride=1)
            net = slim.repeat(net,n-1, resnet_block, filters[1], training=is_training, scope='block_1')
            print('block1:',net.get_shape().as_list())

            #block 2 -- Stage 2 (spatial size: 16x16)
            net = slim.conv2d(net,filters[2], stride=2)
            net = slim.repeat(net,n-1, resnet_block, filters[2], training=is_training, scope='block_2')
            print('block2:',net.get_shape().as_list())

            #block 3 -- Stage 3 (spatial size: 8x8)
            net = slim.conv2d(net,filters[3], stride=2)
            net = slim.repeat(net,n-1, resnet_block, filters[3], training=is_training, scope='block_3')
            print('block3:',net.get_shape().as_list())

            #average pooling 8x8
            net = slim.avg_pool2d(net, kernel_size=[8,8],padding='SAME',scope='avg_pool')

            # batch norm        
            net = slim.batch_norm(net, is_training =is_training, scale=True, updates_collections=None)
            # Relu
            net = tf.nn.relu(net)
            
            #flatten
            net = slim.flatten(net, scope='flatten')

            #logits
            net = slim.fully_connected(net, num_classes, activation_fn=None, scope='logits', 
                                       biases_regularizer=slim.l2_regularizer(WEIGHT_DECAY))

            return net


In [3]:
#checking the above graph
tf.reset_default_graph()
net = wide_resnet(tf.placeholder(tf.float32,(None, 32, 32, 3)),num_classes=10, is_training=True)
print('Variables in graph:')
for var in tf.global_variables():
    print(var.name)

block1: [None, 32, 32, 96]
block2: [None, 16, 16, 192]
block3: [None, 8, 8, 384]
Variables in graph:
wide_resnet/first_conv/weights:0
wide_resnet/BatchNorm/beta:0
wide_resnet/BatchNorm/gamma:0
wide_resnet/BatchNorm/moving_mean:0
wide_resnet/BatchNorm/moving_variance:0
wide_resnet/Conv/weights:0
wide_resnet/block_1/block_1_1/BatchNorm/beta:0
wide_resnet/block_1/block_1_1/BatchNorm/gamma:0
wide_resnet/block_1/block_1_1/BatchNorm/moving_mean:0
wide_resnet/block_1/block_1_1/BatchNorm/moving_variance:0
wide_resnet/block_1/block_1_1/Conv/weights:0
wide_resnet/block_1/block_1_1/BatchNorm_1/beta:0
wide_resnet/block_1/block_1_1/BatchNorm_1/gamma:0
wide_resnet/block_1/block_1_1/BatchNorm_1/moving_mean:0
wide_resnet/block_1/block_1_1/BatchNorm_1/moving_variance:0
wide_resnet/block_1/block_1_1/Conv_1/weights:0
wide_resnet/block_1/block_1_2/BatchNorm/beta:0
wide_resnet/block_1/block_1_2/BatchNorm/gamma:0
wide_resnet/block_1/block_1_2/BatchNorm/moving_mean:0
wide_resnet/block_1/block_1_2/BatchNorm/m

## Create the graph and run it in a session
> The input pipelines for both train and validation can be dynamically switched. 

In [4]:
#Create the graph

#Hyperparameters
N_EPOCH = 200
NUM_CLASSES = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.1 #3e-4 
WIDTH = 32
HEIGHT = 32
LOG_FREQ = 250
N_TRAIN_SAMPLES = 50000
EARLY_STOP = False
CIFAR_LOCAL_FOLDER = '/home/vijay/datasets/image/cifar-10/cifar-10-batches-py'
MODEL_DIR = './slim_model/'

tf.reset_default_graph()

train_graph = tf.Graph()
with train_graph.as_default():
    global_step = tf.train.get_or_create_global_step()
    
    #-------------------------------------------
    #1. Create valid and train iterators
    #-------------------------------------------

    # placeholder for passing testing dataset
    test_filenames = tf.placeholder(tf.string, shape=[None], name='input')
    train_filenames = sorted(glob.glob(os.path.join(CIFAR_LOCAL_FOLDER, '%s*' %'train*.tfrecords')))
    
    # Create valid and train iterators
    train_dataset = input_pipeline(train_filenames, BATCH_SIZE)
    test_dataset = input_pipeline(test_filenames, BATCH_SIZE*3,  validation=True)
    
    # A feedable iterator is defined by a handle placeholder and its structure.
    handle = tf.placeholder(tf.string, shape=[], name='handle')

    iterator = tf.data.Iterator.from_string_handle(handle, 
                                                   train_dataset.output_types, 
                                                   train_dataset.output_shapes)
    
    # Returns a batch of image and mask at every call
    image_batch, label_batch = iterator.get_next()
    
    image_batch = tf.identity(image_batch, 'images')
    label_batch = tf.identity(label_batch, 'labels')
    
    # Create a initializable iterator for valid dataset, 
    # so that the dataset is same for every valid loop.
    testing_iterator = test_dataset.make_initializable_iterator()
    training_iterator = train_dataset.make_initializable_iterator() #make_one_shot_iterator()
    
    
    # EDIT : the below not working for inference -https://github.com/tensorflow/tensorflow/issues/11679
    # Build the iterator SaveableObject.
    #saveable = tf.contrib.data.make_saveable_from_iterator(testing_iterator)

    # Save the iterator state by adding it to the saveable objects collection.
    #tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable)
    
    #------------------------------------------------
    # 2. load Wide-resnet to the graph
    #------------------------------------------------
    
    # Placeholder definitions to handle incase of testing individual images
    X = tf.placeholder_with_default(image_batch, shape=[None,HEIGHT,WIDTH,3], name='X')
    y = tf.placeholder_with_default(label_batch, shape=[None], name='y')
    
    # A bool to switch between training loop and testing loop
    is_train = tf.placeholder(dtype=tf.bool, name='is_train')
    
    #wide-resnet logits
    logits = wide_resnet(X, num_classes=10, is_training=is_train)
    
    #------------------------------------------------
    # 3. Loss and accuracy
    #------------------------------------------------
        
    loss = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(y,NUM_CLASSES), logits=logits)
    
    #add regularization loss
    regularization_loss = tf.add_n(tf.losses.get_regularization_losses())
    loss += regularization_loss
        
    #learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')
    
    # cosine dacay learning rate
    learning_rate = tf.train.cosine_decay_restarts(LEARNING_RATE, global_step,
                                          first_decay_steps=30000, t_mul=2.0,
                                          m_mul=1.0, alpha=0.0)
    
    # use Momentum optimizer    
    solver = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)

    # create the train op
    train_op = slim.learning.create_train_op(loss, solver, global_step=global_step)
    
    # Probabilities of the outputs
    prob = tf.nn.softmax(logits, name='prob')
    
    # Prediction 
    pred = tf.argmax(prob, 1, name='pred')

    #accuracy create two different ops
    with tf.name_scope('train_metrics'):
        train_acc_op, train_accuracy = tf.metrics.accuracy(labels=tf.cast(y, tf.int32), predictions=tf.cast(pred, tf.int32))
    
    with tf.name_scope('test_metrics'):
        test_acc_op, test_accuracy = tf.metrics.accuracy(labels=tf.cast(y, tf.int32), predictions=tf.cast(pred, tf.int32))
    
    # Select accuracy for saving
    accuracy = tf.cond(is_train, lambda:train_accuracy, lambda:test_accuracy)
    accuracy = tf.identity(accuracy,name='accuracy')
    
    # Create summaries
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)    
    tf.summary.scalar('learning_rate', learning_rate)
    my_summary_op = tf.summary.merge_all()


block1: [None, 32, 32, 96]
block2: [None, 16, 16, 192]
block3: [None, 8, 8, 384]


In [5]:
# Run the graph in the session

def testing_loop(_handle, file_names, writer, test=False):
    """ Testing/validation loop between trainings"""
    # do validation at each epoch
    _loss = 0.
    count = 0
    # Reset the iterator at every valid call, to go through the validation set
    sess.run(testing_iterator.initializer, {test_filenames: file_names})

    while True:
        try:
            _summaries, _l, _acc, _,_pred, _label = sess.run([my_summary_op, loss,test_accuracy, test_acc_op, pred, y], 
                                               {is_train:False, handle:_handle})  
            count += len(_label)
            _loss += _l*len(_label)
        except tf.errors.OutOfRangeError:
            break

    _loss = _loss/count
    
    if test:
        tf.logging.info(Fore.CYAN + Style.BRIGHT +'Step:{} Test Loss:{:.5f} Test Accuracy:{:.5f}'  
                .format(step,_loss, _acc)+Fore.RESET+Style.RESET_ALL)  
    else:
        tf.logging.info(Fore.GREEN + Style.BRIGHT +'Step:{} Valid Loss:{:.5f} Valid Accuracy:{:.5f}'  
                .format(step,_loss, _acc)+Fore.RESET+Style.RESET_ALL)  

    #write validation summaries
    writer.add_summary(_summaries, step)
    return _loss

with tf.Session(graph=train_graph) as sess:
    sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
    # The `Iterator.string_handle()` method returns a tensor that can be evaluated
    # and used to feed the `handle` placeholder.
    valid_files = sorted(glob.glob(os.path.join(CIFAR_LOCAL_FOLDER, '%s*' %'valid*.tfrecords')))
    test_files = sorted(glob.glob(os.path.join(CIFAR_LOCAL_FOLDER, '%s*' %'eval*.tfrecords')))
    train_handle = sess.run(training_iterator.string_handle())
    valid_handle = sess.run(testing_iterator.string_handle(), {test_filenames: valid_files})
    test_handle = sess.run(testing_iterator.string_handle(), {test_filenames: test_files})
    
    # variables for early exit
    best_v_loss = 10e10
    early_stopping = 0
    saver = tf.train.Saver()
    start_time = time.time()

    # Get the test/valid metrics variables to reset at each call
    stream_vars_test = [v for v in tf.local_variables() if 'test_metrics/' in v.name]
    # Get the train metrics variables to reset at each epoch
    stream_vars_train = [v for v in tf.local_variables() if 'train_metrics/' in v.name]

    #Summaries for validation and training
    summaries_dir = os.path.join(MODEL_DIR,'summaries')
    train_writer = tf.summary.FileWriter(summaries_dir + '/train', sess.graph)
    train_writer.flush()
    valid_writer = tf.summary.FileWriter(summaries_dir + '/valid', sess.graph)
    valid_writer.flush()
    test_writer = tf.summary.FileWriter(summaries_dir + '/test', sess.graph)
    test_writer.flush()

    
    for e in range(N_EPOCH):
        
        tf.logging.info(Fore.RED+ Style.BRIGHT +'Epoch:{} Time taken:{:.2f}s'.format(e,time.time()-start_time)+Fore.RESET+Style.RESET_ALL)
        start_time = time.time()
        # initialize the training iterator at every epoch
        sess.run(training_iterator.initializer)              
        while True:

            try:
                # The train images are passed
                _, l, _, step = sess.run([train_op, loss, train_acc_op, global_step], {is_train:True,handle:train_handle})  

                #print train logs
                if (step %LOG_FREQ) == 0:
                    train_summaries,acc, l, reg_loss = sess.run([my_summary_op, train_accuracy, loss,regularization_loss], 
                                                                {is_train:True,handle:train_handle})  
                    #Print train logs
                    tf.logging.info(Fore.BLUE + Style.BRIGHT +'Step:{} Reg Loss:{:.5f} Train Loss:{:.5f}: Accuracy:{:.5f}' 
                                    .format(step,reg_loss,l,acc)+Fore.RESET+Style.RESET_ALL)

                    # write train summaries
                    train_writer.add_summary(train_summaries, step)

            # catch end of dataset
            except tf.errors.OutOfRangeError:
                break
        
        # do validation at each epoch
        valid_loss = testing_loop(valid_handle, valid_files, valid_writer)
                
        # reset train and validation statistics every epoch
        sess.run(tf.variables_initializer(stream_vars_train))
        sess.run(tf.variables_initializer(stream_vars_test))

        # Save the best model based on valid loss and test the model on the testing set
        if (best_v_loss > valid_loss) and (step > 0):
            tf.logging.info(Fore.RED+ Style.BRIGHT +'Saving the model...'+Fore.RESET+Style.RESET_ALL)
            saver.save(sess, os.path.join(MODEL_DIR, 'wide_resnet'))
            best_v_loss = valid_loss
            early_stopping = 0
            
            # test when the validation set has a new minimum loss 
            
            testing_loop(test_handle, test_files, test_writer, test=True)
       
            #reset test statistics
            sess.run(tf.variables_initializer(stream_vars_test))

        
        else:
            early_stopping += 1

        # early stopping
        if early_stopping > 3 and EARLY_STOP:
            tf.logging.info(Fore.RED+ Style.BRIGHT +'Stopping the training...'+Fore.RESET+Style.RESET_ALL)
            break
    #One final testing    
    testing_loop(test_handle, test_files, test_writer, test=True)


INFO:tensorflow:[31m[1mEpoch:0 Time taken:1.14s[39m[0m
INFO:tensorflow:[34m[1mStep:0 Reg Loss:1.69695 Train Loss:17.73261: Accuracy:0.14844[39m[0m
INFO:tensorflow:[34m[1mStep:250 Reg Loss:2.77488 Train Loss:4.81545: Accuracy:0.18750[39m[0m
INFO:tensorflow:[34m[1mStep:500 Reg Loss:2.16083 Train Loss:4.30770: Accuracy:0.20573[39m[0m
INFO:tensorflow:[32m[1mStep:621 Valid Loss:4.16712 Valid Accuracy:0.21081[39m[0m
INFO:tensorflow:[31m[1mSaving the model...[39m[0m
INFO:tensorflow:[36m[1mStep:621 Test Loss:4.16727 Test Accuracy:0.20610[39m[0m
INFO:tensorflow:[31m[1mEpoch:1 Time taken:64.28s[39m[0m
INFO:tensorflow:[34m[1mStep:750 Reg Loss:1.68491 Train Loss:3.59711: Accuracy:0.20735[39m[0m
INFO:tensorflow:[34m[1mStep:1000 Reg Loss:1.31556 Train Loss:3.16624: Accuracy:0.20817[39m[0m
INFO:tensorflow:[32m[1mStep:1244 Valid Loss:3.26014 Valid Accuracy:0.20427[39m[0m
INFO:tensorflow:[31m[1mSaving the model...[39m[0m
INFO:tensorflow:[36m[1mStep:1244

INFO:tensorflow:[34m[1mStep:10000 Reg Loss:0.20360 Train Loss:0.68167: Accuracy:0.62934[39m[0m
INFO:tensorflow:[34m[1mStep:10250 Reg Loss:0.20770 Train Loss:0.96257: Accuracy:0.63007[39m[0m
INFO:tensorflow:[34m[1mStep:10500 Reg Loss:0.21229 Train Loss:1.15572: Accuracy:0.63059[39m[0m
INFO:tensorflow:[32m[1mStep:10581 Valid Loss:1.12026 Valid Accuracy:0.70917[39m[0m
INFO:tensorflow:[31m[1mSaving the model...[39m[0m
INFO:tensorflow:[36m[1mStep:10581 Test Loss:1.13957 Test Accuracy:0.70100[39m[0m
INFO:tensorflow:[31m[1mEpoch:17 Time taken:60.52s[39m[0m
INFO:tensorflow:[34m[1mStep:10750 Reg Loss:0.21494 Train Loss:0.77727: Accuracy:0.70251[39m[0m
INFO:tensorflow:[34m[1mStep:11000 Reg Loss:0.21933 Train Loss:1.11082: Accuracy:0.70222[39m[0m
INFO:tensorflow:[32m[1mStep:11204 Valid Loss:1.12709 Valid Accuracy:0.67736[39m[0m
INFO:tensorflow:[31m[1mEpoch:18 Time taken:58.41s[39m[0m
INFO:tensorflow:[34m[1mStep:11250 Reg Loss:0.22304 Train Loss:0.6427

INFO:tensorflow:[32m[1mStep:21164 Valid Loss:0.72544 Valid Accuracy:0.83619[39m[0m
INFO:tensorflow:[31m[1mEpoch:34 Time taken:58.27s[39m[0m
INFO:tensorflow:[34m[1mStep:21250 Reg Loss:0.23189 Train Loss:0.30792: Accuracy:0.96875[39m[0m
INFO:tensorflow:[34m[1mStep:21500 Reg Loss:0.22891 Train Loss:0.77092: Accuracy:0.89844[39m[0m
INFO:tensorflow:[34m[1mStep:21750 Reg Loss:0.22852 Train Loss:0.62157: Accuracy:0.89323[39m[0m
INFO:tensorflow:[32m[1mStep:21786 Valid Loss:0.61480 Valid Accuracy:0.86893[39m[0m
INFO:tensorflow:[31m[1mSaving the model...[39m[0m
INFO:tensorflow:[36m[1mStep:21786 Test Loss:0.62425 Test Accuracy:0.86610[39m[0m
INFO:tensorflow:[31m[1mEpoch:35 Time taken:61.68s[39m[0m
INFO:tensorflow:[34m[1mStep:22000 Reg Loss:0.22283 Train Loss:0.40689: Accuracy:0.86710[39m[0m
INFO:tensorflow:[34m[1mStep:22250 Reg Loss:0.22244 Train Loss:0.69963: Accuracy:0.86661[39m[0m
INFO:tensorflow:[32m[1mStep:22409 Valid Loss:0.59677 Valid Accuracy:

INFO:tensorflow:[34m[1mStep:32250 Reg Loss:0.37917 Train Loss:1.14542: Accuracy:0.78906[39m[0m
INFO:tensorflow:[32m[1mStep:32369 Valid Loss:1.85918 Valid Accuracy:0.61527[39m[0m
INFO:tensorflow:[31m[1mEpoch:52 Time taken:61.45s[39m[0m
INFO:tensorflow:[34m[1mStep:32500 Reg Loss:0.37819 Train Loss:0.65562: Accuracy:0.91406[39m[0m
INFO:tensorflow:[34m[1mStep:32750 Reg Loss:0.38112 Train Loss:1.00946: Accuracy:0.84766[39m[0m
INFO:tensorflow:[32m[1mStep:32992 Valid Loss:1.02458 Valid Accuracy:0.78822[39m[0m
INFO:tensorflow:[31m[1mEpoch:53 Time taken:60.53s[39m[0m
INFO:tensorflow:[34m[1mStep:33000 Reg Loss:0.38358 Train Loss:0.74974: Accuracy:0.89062[39m[0m
INFO:tensorflow:[34m[1mStep:33250 Reg Loss:0.38416 Train Loss:0.94438: Accuracy:0.85156[39m[0m
INFO:tensorflow:[34m[1mStep:33500 Reg Loss:0.38864 Train Loss:1.10852: Accuracy:0.81771[39m[0m
INFO:tensorflow:[32m[1mStep:33614 Valid Loss:0.96057 Valid Accuracy:0.80518[39m[0m
INFO:tensorflow:[31m[

INFO:tensorflow:[34m[1mStep:45250 Reg Loss:0.42369 Train Loss:0.97753: Accuracy:0.88281[39m[0m
INFO:tensorflow:[32m[1mStep:45442 Valid Loss:0.92247 Valid Accuracy:0.83785[39m[0m
INFO:tensorflow:[31m[1mEpoch:73 Time taken:58.78s[39m[0m
INFO:tensorflow:[34m[1mStep:45500 Reg Loss:0.42242 Train Loss:0.68746: Accuracy:0.91406[39m[0m
INFO:tensorflow:[34m[1mStep:45750 Reg Loss:0.42072 Train Loss:0.90391: Accuracy:0.87891[39m[0m
INFO:tensorflow:[34m[1mStep:46000 Reg Loss:0.42518 Train Loss:1.02770: Accuracy:0.84896[39m[0m
INFO:tensorflow:[32m[1mStep:46064 Valid Loss:0.94703 Valid Accuracy:0.82030[39m[0m
INFO:tensorflow:[31m[1mEpoch:74 Time taken:58.41s[39m[0m
INFO:tensorflow:[34m[1mStep:46250 Reg Loss:0.41896 Train Loss:0.79641: Accuracy:0.88281[39m[0m
INFO:tensorflow:[34m[1mStep:46500 Reg Loss:0.42433 Train Loss:1.04866: Accuracy:0.83203[39m[0m
INFO:tensorflow:[32m[1mStep:46687 Valid Loss:0.95205 Valid Accuracy:0.82332[39m[0m
INFO:tensorflow:[31m[

INFO:tensorflow:[34m[1mStep:58250 Reg Loss:0.39359 Train Loss:0.88074: Accuracy:0.89453[39m[0m
INFO:tensorflow:[34m[1mStep:58500 Reg Loss:0.39645 Train Loss:0.87216: Accuracy:0.86719[39m[0m
INFO:tensorflow:[32m[1mStep:58514 Valid Loss:0.84566 Valid Accuracy:0.84707[39m[0m
INFO:tensorflow:[31m[1mEpoch:94 Time taken:58.10s[39m[0m
INFO:tensorflow:[34m[1mStep:58750 Reg Loss:0.38905 Train Loss:0.71888: Accuracy:0.89844[39m[0m
INFO:tensorflow:[34m[1mStep:59000 Reg Loss:0.39113 Train Loss:0.88134: Accuracy:0.87109[39m[0m
INFO:tensorflow:[32m[1mStep:59137 Valid Loss:0.76890 Valid Accuracy:0.87363[39m[0m
INFO:tensorflow:[31m[1mEpoch:95 Time taken:58.68s[39m[0m
INFO:tensorflow:[34m[1mStep:59250 Reg Loss:0.38409 Train Loss:0.45598: Accuracy:0.98438[39m[0m
INFO:tensorflow:[34m[1mStep:59500 Reg Loss:0.38717 Train Loss:0.79856: Accuracy:0.91016[39m[0m
INFO:tensorflow:[34m[1mStep:59750 Reg Loss:0.38930 Train Loss:1.08760: Accuracy:0.86458[39m[0m
INFO:tensor

INFO:tensorflow:[34m[1mStep:71500 Reg Loss:0.28374 Train Loss:0.59234: Accuracy:0.93490[39m[0m
INFO:tensorflow:[32m[1mStep:71588 Valid Loss:0.58313 Valid Accuracy:0.90293[39m[0m
INFO:tensorflow:[31m[1mEpoch:115 Time taken:61.00s[39m[0m
INFO:tensorflow:[34m[1mStep:71750 Reg Loss:0.27705 Train Loss:0.28862: Accuracy:1.00000[39m[0m
INFO:tensorflow:[34m[1mStep:72000 Reg Loss:0.27624 Train Loss:0.62346: Accuracy:0.94922[39m[0m
INFO:tensorflow:[32m[1mStep:72211 Valid Loss:0.57802 Valid Accuracy:0.90386[39m[0m
INFO:tensorflow:[31m[1mEpoch:116 Time taken:59.82s[39m[0m
INFO:tensorflow:[34m[1mStep:72250 Reg Loss:0.27620 Train Loss:0.29054: Accuracy:1.00000[39m[0m
INFO:tensorflow:[34m[1mStep:72500 Reg Loss:0.26874 Train Loss:0.65323: Accuracy:0.94141[39m[0m
INFO:tensorflow:[34m[1mStep:72750 Reg Loss:0.27035 Train Loss:0.63411: Accuracy:0.91146[39m[0m
INFO:tensorflow:[32m[1mStep:72833 Valid Loss:0.58549 Valid Accuracy:0.90235[39m[0m
INFO:tensorflow:[31m

INFO:tensorflow:[31m[1mEpoch:134 Time taken:65.58s[39m[0m
INFO:tensorflow:[34m[1mStep:83500 Reg Loss:0.18343 Train Loss:0.18541: Accuracy:0.92713[39m[0m
INFO:tensorflow:[34m[1mStep:83750 Reg Loss:0.18230 Train Loss:0.23377: Accuracy:0.92794[39m[0m
INFO:tensorflow:[34m[1mStep:84000 Reg Loss:0.18151 Train Loss:0.28664: Accuracy:0.92835[39m[0m
INFO:tensorflow:[32m[1mStep:84038 Valid Loss:0.42753 Valid Accuracy:0.93442[39m[0m
INFO:tensorflow:[31m[1mEpoch:135 Time taken:60.96s[39m[0m
INFO:tensorflow:[34m[1mStep:84250 Reg Loss:0.18053 Train Loss:0.21460: Accuracy:0.98438[39m[0m
INFO:tensorflow:[34m[1mStep:84500 Reg Loss:0.17975 Train Loss:0.32469: Accuracy:0.96875[39m[0m
INFO:tensorflow:[32m[1mStep:84661 Valid Loss:0.41809 Valid Accuracy:0.93399[39m[0m
INFO:tensorflow:[31m[1mSaving the model...[39m[0m
INFO:tensorflow:[36m[1mStep:84661 Test Loss:0.42628 Test Accuracy:0.92650[39m[0m
INFO:tensorflow:[31m[1mEpoch:136 Time taken:61.98s[39m[0m
INFO:t

INFO:tensorflow:[32m[1mStep:95244 Valid Loss:1.14462 Valid Accuracy:0.77847[39m[0m
INFO:tensorflow:[31m[1mEpoch:153 Time taken:58.24s[39m[0m
INFO:tensorflow:[34m[1mStep:95250 Reg Loss:0.46849 Train Loss:0.74139: Accuracy:0.90625[39m[0m
INFO:tensorflow:[34m[1mStep:95500 Reg Loss:0.46462 Train Loss:1.04119: Accuracy:0.84766[39m[0m
INFO:tensorflow:[34m[1mStep:95750 Reg Loss:0.47113 Train Loss:1.13412: Accuracy:0.82292[39m[0m
INFO:tensorflow:[32m[1mStep:95866 Valid Loss:0.95946 Valid Accuracy:0.83388[39m[0m
INFO:tensorflow:[31m[1mEpoch:154 Time taken:58.14s[39m[0m
INFO:tensorflow:[34m[1mStep:96000 Reg Loss:0.46767 Train Loss:0.76259: Accuracy:0.90625[39m[0m
INFO:tensorflow:[34m[1mStep:96250 Reg Loss:0.46931 Train Loss:1.09470: Accuracy:0.85547[39m[0m
INFO:tensorflow:[32m[1mStep:96489 Valid Loss:1.08338 Valid Accuracy:0.79622[39m[0m
INFO:tensorflow:[31m[1mEpoch:155 Time taken:58.58s[39m[0m
INFO:tensorflow:[34m[1mStep:96500 Reg Loss:0.47507 Train

INFO:tensorflow:[34m[1mStep:108000 Reg Loss:0.49356 Train Loss:1.08057: Accuracy:0.89844[39m[0m
INFO:tensorflow:[34m[1mStep:108250 Reg Loss:0.49821 Train Loss:1.08678: Accuracy:0.85677[39m[0m
INFO:tensorflow:[32m[1mStep:108316 Valid Loss:1.01471 Valid Accuracy:0.82810[39m[0m
INFO:tensorflow:[31m[1mEpoch:174 Time taken:59.50s[39m[0m
INFO:tensorflow:[34m[1mStep:108500 Reg Loss:0.48825 Train Loss:0.81464: Accuracy:0.88281[39m[0m
INFO:tensorflow:[34m[1mStep:108750 Reg Loss:0.49582 Train Loss:1.18812: Accuracy:0.83203[39m[0m
INFO:tensorflow:[32m[1mStep:108939 Valid Loss:1.13175 Valid Accuracy:0.79924[39m[0m
INFO:tensorflow:[31m[1mEpoch:175 Time taken:58.45s[39m[0m
INFO:tensorflow:[34m[1mStep:109000 Reg Loss:0.49592 Train Loss:0.71756: Accuracy:0.92969[39m[0m
INFO:tensorflow:[34m[1mStep:109250 Reg Loss:0.49642 Train Loss:1.02749: Accuracy:0.87109[39m[0m
INFO:tensorflow:[34m[1mStep:109500 Reg Loss:0.50087 Train Loss:1.17547: Accuracy:0.82812[39m[0m


INFO:tensorflow:[32m[1mStep:120766 Valid Loss:0.98061 Valid Accuracy:0.83937[39m[0m
INFO:tensorflow:[31m[1mEpoch:194 Time taken:58.36s[39m[0m
INFO:tensorflow:[34m[1mStep:121000 Reg Loss:0.48477 Train Loss:0.81795: Accuracy:0.88281[39m[0m
INFO:tensorflow:[34m[1mStep:121250 Reg Loss:0.49175 Train Loss:1.18987: Accuracy:0.83984[39m[0m
INFO:tensorflow:[32m[1mStep:121389 Valid Loss:0.96111 Valid Accuracy:0.84263[39m[0m
INFO:tensorflow:[31m[1mEpoch:195 Time taken:58.44s[39m[0m
INFO:tensorflow:[34m[1mStep:121500 Reg Loss:0.48770 Train Loss:0.59345: Accuracy:0.97656[39m[0m
INFO:tensorflow:[34m[1mStep:121750 Reg Loss:0.48997 Train Loss:1.02228: Accuracy:0.90625[39m[0m
INFO:tensorflow:[34m[1mStep:122000 Reg Loss:0.49314 Train Loss:0.98761: Accuracy:0.86979[39m[0m
INFO:tensorflow:[32m[1mStep:122011 Valid Loss:1.08603 Valid Accuracy:0.81587[39m[0m
INFO:tensorflow:[31m[1mEpoch:196 Time taken:58.35s[39m[0m
INFO:tensorflow:[34m[1mStep:122250 Reg Loss:0.48

The best model was saved at *epoch 143* and the corresponding **Test Accuracy: 0.93230**

> INFO:tensorflow:Epoch:143 Time taken:58.23s  
INFO:tensorflow:Step:89250 Reg Loss:0.17370 Train Loss:0.21408: Accuracy:0.99219  
INFO:tensorflow:Step:89500 Reg Loss:0.17369 Train Loss:0.25064: Accuracy:0.98438  
INFO:tensorflow:Step:89641 Valid Loss:0.40154 Valid Accuracy:0.93857  
INFO:tensorflow:Saving the model...  
INFO:tensorflow:Step:89641 Test Loss:0.40572 Test Accuracy:0.93230  