In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import PIL.Image
import tensorflow as tf
from cStringIO import StringIO
import os
import pandas as pd
import numpy
import shutil
from IPython.display import clear_output, Image, display, HTML

sys.path.append(os.path.abspath("./slim/"))
import slim

In [3]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
def randfloat(a, b):
    return rand.random() * (b - a) + a

def my_print(s):
    sys.stdout.write(s)
    sys.stdout.flush()

In [4]:
# Directories and constants

data_dir = "./data/arrows/input_col/"
save_dir = "./save_small/"
save_file_path = os.path.join(save_dir, "model.ckpt")
logs_dir = "./logs_small/"

create_dir(save_dir)
create_dir(logs_dir)

BATCH_SIZE = 32
IMAGE_SIZE = 128

In [5]:
# Parsing images

def _create_fname_producers():
    lines = os.listdir(data_dir)    
    cnt = len(lines)
    p = 0.9
    train_cnt = int(cnt * p)
    my_print("Total: %i images, train: %i\n" % (cnt, train_cnt))
    train_lines = lines[:train_cnt]
    test_lines = lines[train_cnt:]
    return tf.train.string_input_producer(train_lines), tf.train.string_input_producer(test_lines)

def _read_image(fname):
    with tf.name_scope("read_image"):
        file_content = tf.read_file(tf.constant(data_dir) + fname)
        image3c = tf.cast(tf.image.decode_png(file_content, channels=3, dtype=tf.uint8), tf.float32)
        image1, image2 = tf.split(1, 2, image3c)
        image1.set_shape((IMAGE_SIZE, IMAGE_SIZE, 3))
        image2.set_shape((IMAGE_SIZE, IMAGE_SIZE, 3))
    return image1, image2

def _generate_batch(image1, image2, batch_size, min_after_dequeue):
    with tf.name_scope("generate_batch"):
        images1, images2 = tf.train.shuffle_batch([image1, image2], batch_size=batch_size / 2, 
                                                capacity=min_after_dequeue + batch_size * 4, 
                                                min_after_dequeue=min_after_dequeue,
                                                num_threads=5)

        images12 = tf.concat(0, [images1, images2])
        images21 = tf.concat(0, [images2, images1])
        labels12 = tf.concat(0, [tf.constant(1, tf.int64, [batch_size / 2]), 
                                 tf.constant(0, tf.int64, [batch_size / 2])])
    return images12, images21, labels12

def _producer_to_batch(queue, BATCH_SIZE, min_after_dequeue=5000):
    image1, image2 = _read_image(queue.dequeue())
    images1, images2, labels = _generate_batch(image1, image2, BATCH_SIZE,
                                               min_after_dequeue=min_after_dequeue)
    return images1, images2, labels
    
def get_data_batch(dual=True):
    train_queue, test_queue = _create_fname_producers()
    return _producer_to_batch(train_queue, BATCH_SIZE) + \
           _producer_to_batch(test_queue, BATCH_SIZE, BATCH_SIZE)

def dense_to_one_hot(label_batch, num_labels=2):
    with tf.name_scope("one_hot_encoder"):
        sparse_labels = tf.cast(tf.reshape(label_batch, [-1, 1]), tf.int32)
        derived_size = tf.shape(sparse_labels)[0]
        indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
        concated = tf.concat(1, [indices, sparse_labels])
        outshape = tf.concat(0, [tf.reshape(derived_size, [1]), tf.reshape(num_labels, [1])])
        labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
        labels = tf.cast(labels, tf.float32)
    return labels


In [6]:
# Building network

def normalize_images(images1, images2):
    sz = tf.cast(tf.size(images1), tf.float32)
    mean1 = tf.reduce_mean(images1, [1, 2, 3], True)
#     sd1 = tf.sqrt(tf.reduce_sum(tf.square(images1 - mean1), [1, 2, 3], True) / sz)
    mean1 = tf.tile(mean1, tf.shape(images1))
#     return (images1 - mean1) / sd1, (images2 - mean1) / sd1
    return (images1 - mean1), (images2 - mean1)

def build_classifier(images1, images2, trainable=True):
    images1, images2 = normalize_images(images1, images2)
    images = tf.concat(3, [images1, images2])
    
    wd = 0.000005

    with slim.arg_scope([slim.ops.conv2d], stddev=0.01, weight_decay=wd, trainable=trainable):
        net = slim.ops.repeat_op(1, images, slim.ops.conv2d, 16, [3, 3], scope='conv1')
        net = slim.ops.max_pool(net, [2, 2], scope='pool1')
        net = tf.nn.lrn(net, name='lrn1')
        net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 32, [3, 3], scope='conv2')
        net = slim.ops.max_pool(net, [2, 2], scope='pool2')
        net = tf.nn.lrn(net, name='lrn2')
        net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 32, [3, 3], scope='conv3')
        net = slim.ops.max_pool(net, [2, 2], scope='pool3')
        net = tf.nn.lrn(net, name='lrn3')
        net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 32, [3, 3], scope='conv4')
        net = slim.ops.max_pool(net, [2, 2], scope='pool4')
        net = tf.nn.lrn(net, name='lrn4')
        net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 2, [3, 3], activation=None, scope='conv5')
        
        net = tf.reduce_mean(net, reduction_indices=[1, 2], name="reduce")
        net = tf.nn.softmax(net, name="softmax")
        
    return net
        
    

In [7]:
# Handling loss operations

def build_loss(logits, labels):
    with tf.name_scope("cross_entropy"):
        labels = tf.cast(labels, tf.int64)
        dense_labels = dense_to_one_hot(labels)
        clipped_logits = tf.clip_by_value(logits, 1e-10, 100.0)
        cross_entropy = -dense_labels * tf.log(clipped_logits)
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection(slim.losses.LOSSES_COLLECTION, cross_entropy_mean)

    return cross_entropy_mean

def get_total_loss(losses_list):
    total_loss = tf.add_n(losses_list, name='total_loss')
    loss_summary = _add_loss_summaries(losses_list + [total_loss])
    return total_loss, loss_summary

def _add_loss_summaries(losses_list):
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    loss_averages_op = loss_averages.apply(losses_list)
    for l in losses_list:
        tf.scalar_summary(l.op.name + '/raw', l)
        tf.scalar_summary(l.op.name + '/avg', loss_averages.average(l))

    return loss_averages_op

In [8]:
# Building train operation

def build_train(loss_op, step, init_rate=0.01, decay_steps=6000):
    
    with tf.name_scope("train"):
        optimizer = tf.train.AdamOptimizer(init_rate)
        grads = optimizer.compute_gradients(loss_op)

        apply_gradient_op = optimizer.apply_gradients(grads, global_step=step)

        for var in tf.trainable_variables():
            tf.histogram_summary(var.op.name, var)

        for grad, var in grads:
            if grad is not None:
                tf.histogram_summary(var.op.name + '/gradients', grad)

    return apply_gradient_op

In [9]:
# Loading & Saving session

def load(saver, sess, to_print=True):
    saver.restore(sess, save_file_path)
    if to_print:
        my_print("Model restored.\n")
    
def save(saver, sess):
    save_path = saver.save(sess, save_file_path)
    my_print("Model saved in file: %s\n" % save_path)
    

In [10]:
def calc_accuracy(prediction, labels_sparse):
    correct_prediction = tf.equal(tf.argmax(prediction, 1), labels_sparse)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy_summary = tf.scalar_summary("accuracy", accuracy)
    
    return accuracy, accuracy_summary

In [11]:
def create_input_placeholders(image_size):
    images1_ph = tf.placeholder(tf.float32, [None] + image_size, "images1")
    images2_ph = tf.placeholder(tf.float32, [None] + image_size, "images2")
    labels_ph = tf.placeholder(tf.int64, [None], "labels")
    return images1_ph, images2_ph, labels_ph

### Main method

In [12]:
# Training network

def train_classifier(need_load, N=10000, init_rate=0.01):
    
    with tf.Graph().as_default() as g: 
        
        step = slim.variables.variable('step', [], tf.int32, tf.constant_initializer(0), trainable=False)
    
        images1_train, images2_train, labels_train, images1_test, images2_test, labels_test = get_data_batch()
        
        images1_ph, images2_ph, labels_ph = create_input_placeholders([None, None, 3])
        net_op = build_classifier(images1_ph, images2_ph, trainable=True)
        
        main_loss = build_loss(net_op, labels_ph)
        total_loss, loss_summary = get_total_loss(tf.get_collection(slim.losses.LOSSES_COLLECTION))
        
        with tf.control_dependencies([loss_summary]):
            train_op = build_train(total_loss, step, init_rate=init_rate)
            
        accuracy, accuracy_summary = calc_accuracy(net_op, labels_ph)

        merged_summaries = tf.merge_all_summaries()

        init = tf.initialize_all_variables()

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        saver = tf.train.Saver(var_list=tf.get_collection(slim.variables.VARIABLES_TO_RESTORE))
        coord = tf.train.Coordinator()
        writer = tf.train.SummaryWriter(logs_dir, sess.graph_def, flush_secs=30)

        sess.run(init)
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:

            if need_load:
                load(saver, sess)

            my_print("Starting...\n")

            for i in xrange(0, N):
                if i % 11 == 0:
                    im1, im2, lab = sess.run([images1_test, images2_test, labels_test])
                    feed = {
                        images1_ph : im1,
                        images2_ph : im2,
                        labels_ph : lab
                    }
                    result = sess.run([merged_summaries, accuracy, step], feed_dict=feed)
                    summary_str = result[0]
                    acc = result[1]
                    st = result[2]
#                     writer.add_summary(summary_str, st)
                    print("Accuracy on test at step %s: %s" % (st, acc))
                else:
                    im1, im2, lab = sess.run([images1_train, images2_train, labels_train])
                    feed = {
                        images1_ph : im1,
                        images2_ph : im2,
                        labels_ph : lab
                    }
                    result = sess.run([train_op, merged_summaries, accuracy, step], feed_dict=feed)
                    summary_str = result[1]
                    acc = result[2]
                    st = result[3]
                    if st % 10 == 0:
                        writer.add_summary(summary_str, st)
#                     print("Accuracy at step %s: %s" % (st, acc))
                    

                if i % 100 == 0:
                    save(saver, sess)
        finally:
            coord.request_stop()
            coord.join(threads)


In [13]:
# train_classifier(need_load=True, N=1000000, init_rate=0.0001)

### Generating inputs

In [14]:
def showarray(a, name=None, fmt='png'):
    a = np.uint8(np.clip(a, 0, 1)*255)[0]
    if name is None:
        f = StringIO()
        PIL.Image.fromarray(a).save(f, fmt)
        display(Image(data=f.getvalue()))
    else:
        PIL.Image.fromarray(a).save(name, fmt)
    
def visstd(a, s=0.1):
    '''Normalize the image range for visualization'''
    return (a-a.mean())/max(a.std(), 1e-4)*s + 0.5

In [15]:
def prepare_graph():
        
    images1_ph, images2_ph, labels_ph = create_input_placeholders([IMAGE_SIZE, IMAGE_SIZE, 3])
    net_op = build_classifier(images1_ph, images2_ph, trainable=True)

    init = tf.initialize_all_variables()

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    saver = tf.train.Saver(var_list=tf.get_collection(slim.variables.VARIABLES_TO_RESTORE))

    sess.run(init)
    load(saver, sess, to_print=False)
    
    def maximize_output(layer_name, channel, iter_n=20, step=1.0, folder=None, seed=None):

        t_obj = tf.get_default_graph().get_tensor_by_name(layer_name + ':0')[:, :, :, channel]
        t_score = tf.reduce_mean(t_obj)
        t_grad = tf.gradients(t_score, [images1_ph, images2_ph])

        if seed is not None:
            np.random.seed(seed)
        img1 = np.random.uniform(64, 192, size=(1, IMAGE_SIZE, IMAGE_SIZE, 3))
        img2 = img1.copy()

        for i in xrange(iter_n):
            g1, g2, score = sess.run([t_grad[0], t_grad[1], t_score], {images1_ph: img1, images2_ph: img2})
            div = (np.concatenate((g1, g2))).std() + 1e-8
            g1 /= div      
            g2 /= div
            img1 += g1 * step
            img2 += g2 * step
        if folder is None:
            showarray(visstd(np.concatenate((img1, img2), 2)))
        else:
            showarray(visstd(np.concatenate((img1, img2), 2)), '%s%d.png' % (folder, channel))
            
    return maximize_output


In [16]:
# maximize_output = prepare_graph()

In [17]:
# cnts = [48, 64, 128, 128]
# for i in xrange(4):
#     print i
#     folder = 'images/%d/' % i
#     create_dir(folder)
#     for j in xrange(cnts[i]):
#         maximize_output('conv%i/Conv/Relu' % (i + 1), j, 100, 10, folder=folder, seed=123)
#     clear_output()

In [18]:
# maximize_output('conv3/Conv/Relu', 31, 100, 10, seed=123)

### Miltiscale generation

In [19]:
def tffunc(*argtypes):
    '''Helper that transforms TF-graph generating function into a regular one.
    See "resize" function below.
    '''
    placeholders = map(tf.placeholder, argtypes)
    def wrap(f):
        out = f(*placeholders)
        def wrapper(*args, **kw):
            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
        return wrapper
    return wrap

# Helper function that uses TF to resize an image
def resize(img, size):
#     img = tf.expand_dims(img, 0)
    return tf.image.resize_bilinear(img, size)#[0, :, :, :]
resize = tffunc(np.float32, np.int32)(resize)


def calc_grad_tiled(sess, img1, img2, t_grad, images1_ph, images2_ph, tile_size=128):
    '''Compute the value of tensor t_grad over the image in a tiled way.
    Random shifts are applied to the image to blur tile boundaries over 
    multiple iterations.'''
    sz = tile_size
    h, w = img1.shape[1:3]
    sx, sy = np.random.randint(sz, size=2)
    img1_shift = np.roll(np.roll(img1, sx, 2), sy, 1)
    img2_shift = np.roll(np.roll(img2, sx, 2), sy, 1)
    grad1 = np.zeros_like(img1)
    grad2 = np.zeros_like(img2)
    for y in xrange(0, max(h - sz // 2, sz), sz):
        for x in xrange(0, max(w - sz // 2, sz), sz):
            sub1 = img1_shift[:, y : y + sz, x : x + sz]
            sub2 = img2_shift[:, y : y + sz, x : x + sz]
            g1, g2 = sess.run(t_grad, {images1_ph: sub1, images2_ph: sub2})
            grad1[:, y : y + sz, x : x + sz] = g1
            grad2[:, y : y + sz, x : x + sz] = g2
    return np.roll(np.roll(grad1, -sx, 2), -sy, 1), np.roll(np.roll(grad2, -sx, 2), -sy, 1)

In [20]:
def prepare_graph_multi():
        
    images1_ph, images2_ph, labels_ph = create_input_placeholders([None, None, 3])
    net_op = build_classifier(images1_ph, images2_ph, trainable=True)

    init = tf.initialize_all_variables()

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    saver = tf.train.Saver(var_list=tf.get_collection(slim.variables.VARIABLES_TO_RESTORE))

    sess.run(init)
    load(saver, sess, to_print=False)
    
    def maximize_output(layer_name, channel, octave_n=3, octave_scale=2, iter_n=20, step=1.0, folder=None, seed=None):

        t_obj = tf.get_default_graph().get_tensor_by_name(layer_name + ':0')[:, :, :, channel]
        t_score = tf.reduce_mean(t_obj)
        t_grad = tf.gradients(t_score, [images1_ph, images2_ph])

        if seed is not None:
            np.random.seed(seed)
        img1 = np.random.uniform(64, 192, size=(1, IMAGE_SIZE, IMAGE_SIZE, 3))
        img2 = img1.copy()
        
        for octave in xrange(octave_n):
            print layer_name, channel, octave, 
            if octave > 0:
                hw = np.float32(img1.shape[1:3]) * octave_scale
                with sess.as_default():
                    img1 = resize(img1, np.int32(hw))
                    img2 = resize(img2, np.int32(hw))
            for i in xrange(iter_n):
                g1, g2 = calc_grad_tiled(sess, img1, img2, t_grad, images1_ph, images2_ph)
                div = np.concatenate((g1, g2)).std() + 1e-8
                g1 /= div
                g2 /= div
                img1 += g1 * step
                img2 += g2 * step
                print '.',
            clear_output()
            showarray(visstd(np.concatenate((img1, img2), 2)))
        clear_output()
        if folder is None:
            showarray(visstd(np.concatenate((img1, img2), 2)))
        else:
            showarray(visstd(np.concatenate((img1, img2), 2)), '%s%d.png' % (folder, channel))
            
#         for i in xrange(iter_n):
#             g1, g2, score = sess.run([t_grad[0], t_grad[1], t_score], {images1_ph: img1, images2_ph: img2})
#             div = (np.concatenate((g1, g2))).std() + 1e-8
#             g1 /= div      
#             g2 /= div
#             img1 += g1 * step
#             img2 += g2 * step
#         if folder is None:
#             showarray(visstd(np.concatenate((img1, img2), 2)))
#         else:
#             showarray(visstd(np.concatenate((img1, img2), 2)), '%s%d.png' % (folder, channel))
            
    return maximize_output


In [21]:
maximize_output_multi = prepare_graph_multi()

In [None]:
# cnts = [48, 64, 128, 128]
cnts = [16, 32, 32, 32]
for i in xrange(4):
    print i + 1
    folder = 'images_hd_small/%d/' % (i + 1)
    create_dir(folder)
    for j in xrange(cnts[i]):
        maximize_output_multi('conv%i/Conv/Relu' % (i + 1), j, octave_n=4, octave_scale=1.4, iter_n=50, step=10,
                        folder=folder, seed=123)
    clear_output()

In [25]:
maximize_output_multi('conv5/Conv/BiasAdd', 0, octave_n=4, octave_scale=1.4, iter_n=400, step=10, seed=123, 
                      folder='./')




In [None]:
# gen_logs_dir = "./logs_col_gen/"

# def predict_image(fname, N=10000000, init_rate=0.001):
    
#     with tf.Graph().as_default() as g: 
        
#         step = slim.variables.variable('step_pred', [], tf.int32, tf.constant_initializer(0), 
#                                             trainable=False, restore=False)
        
#         image = imread(data_dir + fname, 'RGB')[:, :IMAGE_SIZE, :]
#         imsave('in.png', image)
#         imshow(image)
#         show()
#         image = np.reshape(image, (1, IMAGE_SIZE, IMAGE_SIZE, 3))
#         image1 = tf.constant(image, tf.float32, name='image1')
#         image2 = tf.Variable(image1, trainable=True, name='image2')
# #         image2 = tf.Variable(tf.random_uniform([1, IMAGE_SIZE, IMAGE_SIZE, 3], 0, 255), 
# #                     trainable=True, name='image2')
        
#         tf.image_summary("image1", image1)
#         tf.image_summary("image2", image2)
    
#         images12 = tf.concat(0, [image1, image2], 'images1')
#         images21 = tf.concat(0, [image2, image1], 'images2')
#         labels = tf.constant([1, 0], tf.int64, [2], "labels")
        
#         answer_op = build_classifier(images12, images21, trainable=False)
    
#         loss_op = build_loss(answer_op, labels)
#         losses_list = [
#             loss_op 
# #             , slim.losses.l2_loss(image1 - image2, weight=1e-7)
#         ]
#         total_loss = tf.add_n(losses_list, name='total_loss')
#         loss_summaries = _add_loss_summaries(losses_list + [total_loss])
    
#         with tf.control_dependencies([loss_summaries]):
#             train_op = build_train(total_loss, step, init_rate=init_rate)
    
#         correct_prediction = tf.equal(tf.argmax(answer_op, 1), labels)
#         accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#         accuracy_summary = tf.scalar_summary("accuracy", accuracy)

#         merged_summaries = tf.merge_all_summaries()

#         init = tf.initialize_all_variables()

#         sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
#         saver = tf.train.Saver(var_list=tf.get_collection(slim.variables.VARIABLES_TO_RESTORE))
#         coord = tf.train.Coordinator()
#         writer = tf.train.SummaryWriter(gen_logs_dir, sess.graph_def, flush_secs=30)

#         sess.run(init)


#         load(saver, sess)

#         my_print("Starting...\n")

#         for i in xrange(0, N):
#             result = sess.run([train_op, merged_summaries, accuracy, step, image2])
#             summary_str = result[1]
#             acc = result[2]
#             st = result[3]
#             img = result[4]
#             writer.add_summary(summary_str, st)
# #                     print("Accuracy at step %s: %s" % (st, acc))


#             if i % 1000 == 0:
#                 print i
#                 out = np.clip(img[0], 0, 255).astype(np.uint8)
#                 imsave('out.png', out)
# #                     save(saver, sess)


In [None]:
# predict_image("543.png", init_rate=.01)