In [1]:
# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os # handling file i/o
import sys
import math
import time # timing epochs
import random

# for ordered dict when building layer components
import collections

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib import colors # making colors consistent
from mpl_toolkits.axes_grid1 import make_axes_locatable # colorbar helper


# from imageio import imread # read image from disk
# + data augmentation
from scipy import ndimage
from scipy import misc


import pickle # manually saving best params
from sklearn.utils import shuffle # shuffling data batches
from tqdm import tqdm # display training progress bar

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
def make_standard_dirs(root="trial", saver=True, best_params=True, tf_logs=True):
    # `saver/` will hold tf saver files
    maybe_create_dir(root + "/saver")
    # `best_params/` will hold a serialized version of the best params
    # I like to keep this as a backup in case I run into issues with
    # the saver files
    maybe_create_dir(root + "/best_params")
    # `tf_logs/` will hold the logs that will be visable in tensorboard
    maybe_create_dir(root + "/tf_logs")

    
# set tf log level to supress messages, unless an error
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

Python: (3, 6, 5, 'final', 0)
TensorFlow: 1.8.0
Default GPU Device: /device:GPU:0


In [2]:
make_standard_dirs()

trial/saver already exists
trial/best_params already exists
trial/tf_logs already exists


In [3]:
BEST_PARAMS_PATH = "best_params"
TFR_DIR = "./data/record_holder/150"
for _, _, files in os.walk(TFR_DIR):
    files = sorted(files)
    for filename in files:
        print(filename)

test.tfrecords
train.tfrecords
val.tfrecords


In [4]:
# these two functions (get_model_params and restore_model_params) are 
# ad[a|o]pted from; 
# https://github.com/ageron/handson-ml/blob/master/11_deep_learning.ipynb
def get_model_params():
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {global_vars.op.name: value for global_vars, value in 
            zip(global_vars, tf.get_default_session().run(global_vars))}

def restore_model_params(model_params, g, sess):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: g.get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    sess.run(assign_ops, feed_dict=feed_dict)

# these two functions are used to manually save the best
# model params to disk
def save_obj(obj, name):
    with open('trial/best_params/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('trial/best_params/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [5]:
GLOBAL_SET_TYPE = None

def _parse_function(example_proto):
    global GLOBAL_SET_TYPE
    labelName = str(GLOBAL_SET_TYPE) + '/label'
    featureName = str(GLOBAL_SET_TYPE) + '/image'
    feature = {featureName: tf.FixedLenFeature([], tf.string),
               labelName: tf.FixedLenFeature([], tf.int64)}
    
    # decode
    parsed_features = tf.parse_single_example(example_proto, features=feature)
    
    # convert image data from string to number
    image = tf.decode_raw(parsed_features[featureName], tf.float32)
    image = tf.reshape(image, [150, 150, 3])
    label = tf.cast(parsed_features[labelName], tf.int64)
    
    # [do any preprocessing here]
    
    return image, label

In [6]:
def return_batched_iter(setType, data_params, sess):
    global GLOBAL_SET_TYPE
    global TFR_DIR
    GLOBAL_SET_TYPE = setType
    
    filenames_ph = tf.placeholder(tf.string, shape=[None])

    dataset = tf.data.TFRecordDataset(filenames_ph)
    dataset = dataset.map(_parse_function)  # Parse the record into tensors.
    if GLOBAL_SET_TYPE != 'test':
        dataset = dataset.shuffle(buffer_size=data_params['buffer_size'])
    #dataset = dataset.shuffle(buffer_size=1)
    dataset = dataset.batch(data_params['batch_size'])
    dataset = dataset.repeat(1)
    
    iterator = dataset.make_initializable_iterator()
    
    tfrecords_file_name = str(GLOBAL_SET_TYPE) + '.tfrecords'
    tfrecord_file_path = os.path.join(TFR_DIR, tfrecords_file_name)
    
    # initialize
    sess.run(iterator.initializer, feed_dict={filenames_ph: [tfrecord_file_path]})
    
    return iterator

In [7]:
def create_hyper_params():
    data_params = {}
    data_params['n_epochs'] = 5
    data_params['batch_size'] = 32
    data_params['buffer_size'] = 128 # for shuffling

    data_params['init_lr'] = 1e-5

    return data_params

In [8]:
def build_graph(data_params):
    g = tf.Graph()
    n_outputs = 1
    IMG_HEIGHT = 150
    IMG_WIDTH = 150
    CHANNELS = 3
    with g.as_default():
        with tf.name_scope("inputs"):
            X = tf.placeholder(tf.float32, shape=[None, IMG_HEIGHT, IMG_WIDTH, CHANNELS], name="X") # Input
            #y = tf.placeholder(tf.float32, shape=(None, n_outputs), name="labels") # Target
            y_raw = tf.placeholder(tf.int64, shape=[None, n_outputs], name="y_input")
            y = tf.cast(y_raw, tf.float32, name="label")

        with tf.name_scope("cnn"):
            #224x224x3
            h_1 = tf.layers.conv2d(X, filters=32, kernel_size=3, activation=tf.nn.elu,
                                   padding='SAME', strides=2, name="conv_1") # 112x112x3
            
            #112x112x32
            h_2 = tf.layers.conv2d(h_1, filters=64, kernel_size=3, activation=tf.nn.elu,
                                   padding='SAME', strides=2, name="conv_2") # 64x64x64
            
            #64x64x64
            h_3 = tf.layers.conv2d(h_2, filters=96, kernel_size=3, activation=tf.nn.elu,
                                   padding='SAME', strides=2, name="conv_3") # 32x32x96
            
            # 32x32x96
            h_4 = tf.layers.max_pooling2d(h_3, pool_size=[2,2],
                                          strides=2, name="max_pool_01") # 16x16x96
            
            # 16x16x96
            h_5 = tf.layers.conv2d(h_4, filters=128, kernel_size=3, activation=tf.nn.elu,
                                   padding='SAME', strides=1, name="conv_4") # 16x16x128
            
            # 16x16x128
            h_6 = tf.layers.conv2d(h_5, filters=192, kernel_size=3, activation=tf.nn.elu,
                                   padding='SAME', strides=1, name="conv_5") # 16x16x192
            
            # 16x16x192
#             h_7 = tf.layers.max_pooling2d(h_6, pool_size=[2,2],
#                                           strides=2, name="max_pool_02") # 8x8x192
            
#             # 8x8x192
#             h_8 = tf.layers.conv2d(h_7, filters=256, kernel_size=3, activation=tf.nn.elu,
#                                    padding='SAME', strides=2, name="conv_6") # 4x4x256
            
#             # 4x4x256
#             h_9 = tf.layers.conv2d(h_8, filters=1024, kernel_size=4, activation=tf.nn.elu,
#                                    padding='SAME', strides=1, name="conv_7") # 1x1x1024
            last_shape = int(np.prod(h_6.get_shape()[1:]))
            h_out_flat = tf.reshape(h_6, shape=[-1, last_shape]) # 1024
            
            # 1024
            h_10 = tf.layers.dense(h_out_flat, 256, name="layer_01", activation=tf.nn.elu)
            h_11 = tf.layers.dense(h_10, 64, name="layer_02", activation=tf.nn.elu)
            h_12 = tf.layers.dense(h_11, 16, name="layer_03", activation=tf.nn.elu)
            
            logits = tf.layers.dense(h_12, n_outputs, name="logits")
            preds = tf.sigmoid(logits, name="preds")

        with tf.name_scope("loss"):
            xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
            batch_loss = tf.reduce_mean(xentropy, name="loss")
        
        with tf.name_scope("train"):
            optimizer = tf.train.AdamOptimizer(learning_rate=data_params['init_lr'],
                                               beta1=0.9,
                                               beta2=0.999,
                                               epsilon=1e-08,
                                               use_locking=False,
                                               name='Adam')
            training_op = optimizer.minimize(batch_loss, name="training_op")
            
        with tf.name_scope("save_session"):
            init_global = tf.global_variables_initializer()
            init_local = tf.local_variables_initializer()
            saver = tf.train.Saver()

        # Ops: training metrics
        with tf.name_scope("metrics"):
            # ================================== performance
            with tf.name_scope("common"):
                #preds = tf.nn.softmax(logits, name="prediction")
                #y_true_cls = tf.argmax(y,1)
                #y_pred_cls = tf.argmax(preds,1)
                y_true_cls = tf.greater_equal(y, 0.5)
                y_pred_cls = tf.greater_equal(preds, 0.5)

                correct_prediction = tf.equal(y_pred_cls, y_true_cls, name="correct_predictions")
                batch_acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            with tf.name_scope("train_metrics") as scope:
                train_auc, train_auc_update = tf.metrics.auc(labels=y, predictions=preds)
                train_acc, train_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                train_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                train_met_reset_op = tf.variables_initializer(train_acc_vars, name="train_met_reset_op")
            with tf.name_scope("val_metrics") as scope:
                val_auc, val_auc_update = tf.metrics.auc(labels=y, predictions=preds)
                val_acc, val_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                val_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                val_met_reset_op = tf.variables_initializer(val_acc_vars, name="val_met_reset_op")
            with tf.name_scope("test_metrics") as scope:
                test_auc, test_auc_update = tf.metrics.auc(labels=y, predictions=preds)
                test_acc, test_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                test_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                test_acc_reset_op = tf.variables_initializer(test_acc_vars, name="test_met_reset_op")

            # =============================================== loss 
            with tf.name_scope("train_loss_eval") as scope:
                train_mean_loss, train_mean_loss_update = tf.metrics.mean(batch_loss)
                train_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                train_loss_reset_op = tf.variables_initializer(train_loss_vars, name="train_loss_reset_op")
            with tf.name_scope("val_loss_eval") as scope:
                val_mean_loss, val_mean_loss_update = tf.metrics.mean(batch_loss)
                val_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                val_loss_reset_op = tf.variables_initializer(val_loss_vars, name="val_loss_reset_op")
            with tf.name_scope("test_loss_eval")as scope:
                test_mean_loss, test_mean_loss_update = tf.metrics.mean(batch_loss)
                test_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                test_loss_reset_op = tf.variables_initializer(test_loss_vars, name="test_loss_rest_op")

        # --- create collections
        for node in (saver, init_global, init_local):
            g.add_to_collection("save_init", node)
        for node in (X, y_raw, training_op):
            g.add_to_collection("main_ops", node)
        for node in (preds, y_true_cls, y_pred_cls, correct_prediction):
            g.add_to_collection("preds", node)
        for node in (train_auc, train_auc_update, train_acc, train_acc_update, train_met_reset_op):
            g.add_to_collection("train_metrics", node)
        for node in (val_auc, val_auc_update, val_acc, val_acc_update, val_met_reset_op):
            g.add_to_collection("val_metrics", node)
        for node in (test_auc, test_auc_update, test_acc, test_acc_update, test_acc_reset_op):
            g.add_to_collection("test_metrics", node)
        for node in (train_mean_loss, train_mean_loss_update, train_loss_reset_op):
            g.add_to_collection("train_loss", node)
        for node in (val_mean_loss, val_mean_loss_update, val_loss_reset_op):
            g.add_to_collection("val_loss", node)
        for node in (test_mean_loss, test_mean_loss_update, test_loss_reset_op):
            g.add_to_collection("test_loss", node)
        g.add_to_collection("logits", logits)
            
        # ===================================== tensorboard
        with tf.name_scope("tensorboard_writer") as scope:
            epoch_train_loss_scalar = tf.summary.scalar('train_epoch_loss', train_mean_loss)
            epoch_train_acc_scalar = tf.summary.scalar('train_epoch_acc', train_acc)
            epoch_train_auc_scalar = tf.summary.scalar('train_epoch_auc', train_auc)
            epoch_train_write_op = tf.summary.merge([epoch_train_loss_scalar, epoch_train_acc_scalar, epoch_train_auc_scalar], name="epoch_train_write_op")

            # ===== epoch, validation
            epoch_validation_loss_scalar = tf.summary.scalar('validation_epoch_loss', val_mean_loss)
            epoch_validation_acc_scalar = tf.summary.scalar('validation_epoch_acc', val_acc)
            epoch_validation_auc_scalar = tf.summary.scalar('validation_epoch_auc', val_auc)
            epoch_validation_write_op = tf.summary.merge([epoch_validation_loss_scalar, epoch_validation_acc_scalar, epoch_validation_auc_scalar], name="epoch_validation_write_op")
        
        for node in (epoch_train_write_op, epoch_validation_write_op):
            g.add_to_collection("tensorboard", node)
            

    return g

In [9]:
def train_graph(g):
    global BEST_PARAMS_PATH
    saver, init_global, init_local = g.get_collection("save_init")
    X, y_raw, training_op = g.get_collection("main_ops")
    preds, y_true_cls, y_pred_cls, _ = g.get_collection("preds")
    train_auc, train_auc_update, train_acc, train_acc_update, train_met_reset_op = g.get_collection("train_metrics")
    val_auc, val_auc_update, val_acc, val_acc_update, val_met_reset_op = g.get_collection("val_metrics")
    train_mean_loss, train_mean_loss_update, train_loss_reset_op = g.get_collection("train_loss")
    val_mean_loss, val_mean_loss_update, val_loss_reset_op = g.get_collection("val_loss")
    epoch_train_write_op, epoch_validation_write_op = g.get_collection("tensorboard")
#     next_tr_element, next_val_element, _ = g.get_collection("data_sets")

    train_writer = tf.summary.FileWriter(os.path.join("tf_logs","train"))
    val_writer = tf.summary.FileWriter(os.path.join("tf_logs","validation"))
    
    best_val_loss = np.inf
    
    with tf.Session(graph=g) as sess:

        # test
#         test_iter = return_batched_iter('test', data_params, sess)
#         next_test_element = test_iter.get_next()
        sess.run([init_global, init_local])
        
        for e in tqdm(range(1,data_params['n_epochs']+1)):
            sess.run([val_met_reset_op,val_loss_reset_op,train_met_reset_op,train_loss_reset_op])
            # training
            tr_iter = return_batched_iter('train', data_params, sess)
            next_tr_element = tr_iter.get_next()
            
            # loop entire training set
            while True:
                try:
                    data, target = sess.run(next_tr_element)
                    target = np.reshape(target, (target.shape[0], 1))
                    sess.run([training_op, train_auc_update, train_acc_update, train_mean_loss_update], 
                             feed_dict={X:data, y_raw:target})
#                     pr, yt, yp = sess.run([preds, y_true_cls, y_pred_cls], feed_dict={X:data, y_raw:target})
#                     print(pr)
#                     print(yt)
#                     print(yp)
                except tf.errors.OutOfRangeError:
                    break
        
            # write average for epoch
            summary = sess.run(epoch_train_write_op)    
            train_writer.add_summary(summary, e)
            train_writer.flush()

            # run validation
            # validation
            val_iter = return_batched_iter('val', data_params, sess)
            next_val_element = val_iter.get_next()
            while True:
                try:
                    Xb, yb = sess.run(next_val_element)
                    yb = np.reshape(yb, (yb.shape[0], 1))
                    sess.run([val_auc_update, val_acc_update, val_mean_loss_update], feed_dict={X:Xb, y_raw:yb})
                except tf.errors.OutOfRangeError:
                    break

            # check for (and save) best validation params here
            cur_loss, cur_acc = sess.run([val_mean_loss, val_acc])
            if cur_loss < best_val_loss:
                best_val_loss = cur_loss
                best_params = get_model_params()
                save_obj(best_params, BEST_PARAMS_PATH)
                print("best params saved: val acc: {:.3f}% val loss: {:.4f}".format(cur_acc*100, cur_loss))

            summary = sess.run(epoch_validation_write_op) 
            val_writer.add_summary(summary, e)
            val_writer.flush()
        
        train_writer.close()
        val_writer.close()
    return sess

In [10]:
reset_graph()
data_params = create_hyper_params()
g = build_graph(data_params)
sess = train_graph(g)

 20%|██        | 1/5 [00:17<01:09, 17.26s/it]

best params saved: val acc: 66.875% val loss: 0.6003


 40%|████      | 2/5 [00:32<00:48, 16.27s/it]

best params saved: val acc: 67.800% val loss: 0.5869


 60%|██████    | 3/5 [00:48<00:32, 16.10s/it]

best params saved: val acc: 72.025% val loss: 0.5530


 80%|████████  | 4/5 [01:03<00:15, 15.89s/it]

best params saved: val acc: 73.250% val loss: 0.5433


100%|██████████| 5/5 [01:18<00:00, 15.76s/it]

best params saved: val acc: 72.400% val loss: 0.5393





In [11]:
reset_graph()
data_params = create_hyper_params()
g2 = build_graph(data_params)
best_params = load_obj(BEST_PARAMS_PATH)
with tf.Session(graph=g2) as sess:
    saver, init_global, init_local = g2.get_collection("save_init")
    X, y_raw, training_op = g2.get_collection("main_ops")
    preds, y_true_cls, y_pred_cls, _ = g2.get_collection("preds")
    test_auc, test_auc_update, test_acc, test_acc_update, test_acc_reset_op = g2.get_collection("test_metrics")
    test_mean_loss, test_mean_loss_update, test_loss_reset_op = g2.get_collection("test_loss")
    
    restore_model_params(model_params=best_params, g=g2, sess=sess)
    sess.run([test_acc_reset_op, test_loss_reset_op])
    
    test_iter = return_batched_iter('test', data_params, sess)
    next_test_element = test_iter.get_next()
    while True:
        try:
            Xb, yb = sess.run(next_test_element)
            yb = np.reshape(yb, (yb.shape[0], 1))
            sess.run([test_auc_update, test_acc_update, test_mean_loss_update], feed_dict={X:Xb, y_raw:yb})
        except tf.errors.OutOfRangeError:
            break    
    
    # print
    final_test_acc, final_test_loss, final_test_auc = sess.run([test_acc, test_mean_loss, test_auc])
    print("test auc: {:.3f}% acc: {:.3f}% loss: {:.5f}".format(final_test_auc*100, 
                                                              final_test_acc*100,
                                                              final_test_loss))

test auc: 80.719% acc: 72.380% loss: 0.54868
