In [1]:
from __future__ import print_function


import scipy.io
import time
import tensorflow as tf
import os
from PIL import Image, ImageOps
import numpy as np
import scipy.misc
from six.moves import urllib


  return f(*args, **kwds)


In [2]:

def download(download_link, file_name, expected_bytes):
    if os.path.exists(file_name):
        print("pre-trained model ready")
        return
    print("Downloading the pre-trained model. This might take a while ...")
    file_name, _ = urllib.request.urlretrieve(download_link, file_name)
    file_stat = os.stat(file_name)
    if file_stat.st_size == expected_bytes:
        print('Successfully downloaded  pre-trained model', file_name)
    else:
        raise Exception('File ' + file_name +
                        ' might be corrupted. You should try downloading it with a browser.')
        

def get_resized_image(img_path, height, width, save=True):
    image = Image.open(img_path)
    # it's because PIL is column major so you have to change place of width and height
    # this is stupid, i know
    image = ImageOps.fit(image, (width, height), Image.ANTIALIAS)
    if save:
        image_dirs = img_path.split('/')
        image_dirs[-1] = 'resized_' + image_dirs[-1]
        out_path = '/'.join(image_dirs)
        if not os.path.exists(out_path):
            image.save(out_path)
    image = np.asarray(image, np.float32)
    return np.expand_dims(image, 0)

def generate_noise_image(content_image, height, width, noise_ratio=0.6):
    noise_image = np.random.uniform(-20, 20,
                                    (1, height, width, 3)).astype(np.float32)
    return noise_image * noise_ratio + content_image * (1 - noise_ratio)

def save_image(path, image):
    # Output should add back the mean pixels we subtracted at the beginning
    image = image[0] # the image
    image = np.clip(image, 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)

def make_dir(path):
    """ Create a directory if there isn't one already. """
    try:
        os.mkdir(path)
    except OSError:
        pass


In [3]:
# LOAD ALEX NET MODEL
def load_image(filename, max_size=None):
    image = Image.open(filename)

    if max_size is not None:
        # Calculate the appropriate rescale-factor for
        # ensuring a max height and width, while keeping
        # the proportion between them.
        factor = max_size / np.max(image.size)

        # Scale the image's height and width.
        size = np.array(image.size) * factor

        # The size is now floating-point because it was scaled.
        # But PIL requires the size to be integers.
        size = size.astype(int)

        # Resize the image.
        image = image.resize(size, Image.LANCZOS)

    # Convert to numpy floating-point array.
    return np.float32(image)


def _weights(trained_layers, layer_idx, expected_layer_name):
    """ Return the weights and biases already trained by the net """
    W = trained_layers[0][layer_idx][0][0][2][0][0]
    b = trained_layers[0][layer_idx][0][0][2][0][1]
    layer_name = trained_layers[0][layer_idx][0][0][0][0]

    assert layer_name == expected_layer_name

    # if (layer_idx == 0): put this in comment to fix for laoding alex net
    b = b.reshape(b.size)

    return W, b


def _conv2d_relu_2(trained_layers, prev_layer, layer_idx, layer_name, s_h, s_w, group, padding='SAME'):
    """
    Adapted from: https://github.com/ethereon/caffe-tensorflow
    """
    with tf.variable_scope(layer_name) as scope:
        w, b = _weights(trained_layers, layer_idx, layer_name)
        w = tf.constant(w, name='weights')
        b = tf.constant(b, name='bias')

        convolve = lambda inputs, weigths: tf.nn.conv2d(inputs, weigths, [1, s_h, s_w, 1], padding=padding)

        if group == 1:
            conv2d = convolve(prev_layer, w)
        else:
            # Split input and weights and conv2d them separately
            input_groups = tf.split(prev_layer, group, 3)
            kernel_groups = tf.split(w, group, 3)
            output_groups = [tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
                             for i, k in zip(input_groups, kernel_groups)]
            conv2d = tf.concat(output_groups, 3)

        conv_lin = tf.reshape(tf.nn.bias_add(conv2d, b), [-1] + conv2d.get_shape().as_list()[1:], name='lin')
        conv_relu = tf.nn.relu(conv_lin, name='relu')

        return conv_relu


def _maxpool(prev_layer, layer_name):
    return tf.nn.max_pool(prev_layer, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name=layer_name)


def _avgpool(prev_layer, layer_name):
    return tf.nn.avg_pool(prev_layer, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name=layer_name)


def _lrn(prev_layer, layer_name):
    return tf.nn.local_response_normalization(prev_layer, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0,
                                              name=layer_name)


def load_net_from_file(path, input_image):
    """ Load net into a TensorFlow model. Use a dictionary to hold the model instead of using a Python class """
    net = scipy.io.loadmat(path)
    trained_layers = net['layers']

    graph = {}
    graph['conv1'] = _conv2d_relu_2(trained_layers, input_image, 0, 'conv1', s_h=4, s_w=4, group=1)
    graph['norm1'] = _lrn(graph['conv1'], 'norm1')
    graph['pool1'] = _avgpool(graph['norm1'], 'pool1')

    graph['conv2'] = _conv2d_relu_2(trained_layers, graph['pool1'], 4, 'conv2', s_h=1, s_w=1, group=2)
    graph['norm2'] = _lrn(graph['conv2'], 'norm2')
    graph['pool2'] = _avgpool(graph['norm2'], 'pool2')

    graph['conv3'] = _conv2d_relu_2(trained_layers, graph['pool2'], 8, 'conv3', s_h=1, s_w=1, group=1)
    graph['conv4'] = _conv2d_relu_2(trained_layers, graph['conv3'], 10, 'conv4', s_h=1, s_w=1, group=2)
    graph['conv5'] = _conv2d_relu_2(trained_layers, graph['conv4'], 12, 'conv5', s_h=1, s_w=1, group=2)

    return graph



# THIS IS THE END OF ALEXNET STYLE TRANSFER


In [4]:

""" An implementation of the paper "A Neural Algorithm of Artistic Style"
by Gatys et al. in TensorFlow.

Author: Chip Huyen (huyenn@stanford.edu)
Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"
For more details, please read the assignment handout:
http://web.stanford.edu/class/cs20si/assignments/a2.pdf
"""


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


# new alex net
DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-caffe-alex.mat'
MODEL = 'imagenet-caffe-alex.mat'
EXPECTED_BYTES = 228041398

STYLE = 'vanGogh'
CONTENT = 'maayan'
STYLE_IMAGE = '/Users/ybercovich/Documents/mta_tensorflow/assignment2/images/' + STYLE + '.jpg'
CONTENT_IMAGE = '/Users/ybercovich/Documents/mta_tensorflow/assignment2/images/' + CONTENT + '.jpg'

#alexnet is trained for 227 * 227 sized images
IMAGE_HEIGHT = 500 #227
IMAGE_WIDTH = 500 #227

CONTENT_WEIGHT = 0.50 #0.01
STYLE_WEIGHT = 1000 #1

NOISE_RATIO = 0.6  # percentage of weight of the noise for intermixing with the content image


# Layers used for style features. You can change this.
#STYLE_LAYERS = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
STYLE_LAYERS = ['conv1', 'conv3', 'conv4', 'conv5']
W = [0.5, 1.5, 3.0, 4.0]  # give more weights to deeper layers.

# Layer used for content features. You can change this.
CONTENT_LAYER = 'conv2'

ITERS = 600
LR = 2.0

MEAN_PIXELS = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))
""" MEAN_PIXELS is defined according to description on their github:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8
'In the paper, the model is denoted as the configuration D trained with scale jittering. 
The input images should be zero-centered by mean pixel (rather than mean image) subtraction. 
Namely, the following BGR values should be subtracted: [103.939, 116.779, 123.68].'
"""



# VGG-19 parameters file
#VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
#VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'
#EXPECTED_BYTES = 534904783


def _create_content_loss(p, f):
    """ Calculate the loss between the feature representation of the
    content image and the generated image.

    Inputs:
        p, f are just P, F in the paper
        (read the assignment handout if you're confused)
        Note: we won't use the coefficient 0.5 as defined in the paper
        but the coefficient as defined in the assignment handout.
    Output:
        the content loss

    """
    return tf.reduce_sum((f - p) ** 2) / (4.0 * p.size)


def _gram_matrix(F, N, M):
    """ Create and return the gram matrix for tensor F
        Hint: you'll first have to reshape F
    """
    F = tf.reshape(F, (M, N))
    return tf.matmul(tf.transpose(F), F)


def _single_style_loss(a, g):
    """ Calculate the style loss at a certain layer
    Inputs:
        a is the feature representation of the real image
        g is the feature representation of the generated image
    Output:
        the style loss at a certain layer (which is E_l in the paper)

    Hint: 1. you'll have to use the function _gram_matrix()
        2. we'll use the same coefficient for style loss as in the paper
        3. a and g are feature representation, not gram matrices
    """
    N = a.shape[3]  # number of filters
    M = a.shape[1] * a.shape[2]  # height times width of the feature map
    A = _gram_matrix(a, N, M)
    G = _gram_matrix(g, N, M)
    return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2))


def _create_style_loss(A, model):
    """ Return the total style loss
    """
    n_layers = len(STYLE_LAYERS)
    E = [_single_style_loss(A[i], model[STYLE_LAYERS[i]]) for i in range(n_layers)]

    return sum([W[i] * E[i] for i in range(n_layers)])


def _create_losses(model, input_image, content_image, style_image):
    with tf.variable_scope('loss') as scope:
        with tf.Session() as sess:
            sess.run(input_image.assign(content_image))  # assign content image to the input variable
            p = sess.run(model[CONTENT_LAYER])
        content_loss = _create_content_loss(p, model[CONTENT_LAYER])

        with tf.Session() as sess:
            sess.run(input_image.assign(style_image))
            A = sess.run([model[layer_name] for layer_name in STYLE_LAYERS])
        style_loss = _create_style_loss(A, model)


        total_loss = CONTENT_WEIGHT * content_loss + STYLE_WEIGHT * style_loss


    return content_loss, style_loss, total_loss


def _create_summary(model):
    """ Create summary ops necessary
        Hint: don't forget to merge them
    """
    with tf.name_scope('summaries'):
        tf.summary.scalar('content loss', model['content_loss'])
        tf.summary.scalar('style loss', model['style_loss'])
        tf.summary.scalar('total loss', model['total_loss'])
        tf.summary.histogram('histogram content loss', model['content_loss'])
        tf.summary.histogram('histogram style loss', model['style_loss'])
        tf.summary.histogram('histogram total loss', model['total_loss'])
        return tf.summary.merge_all()





In [5]:
def train(model, generated_image, initial_image):
    """ Train your model.
    Don't forget to create folders for checkpoints and outputs.
    """
    skip_step = 1
    with tf.Session() as sess:

        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter('graphs', sess.graph)

        sess.run(generated_image.assign(initial_image))
        ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        initial_step = model['global_step'].eval()

        start_time = time.time()
        for index in range(initial_step, ITERS):
            if index >= 5 and index < 20:
                skip_step = 10
            elif index >= 20:
                skip_step = 20

            sess.run(model['optimizer'])
            if (index + 1) % skip_step == 0:

                gen_image, total_loss, summary = sess.run([generated_image, model['total_loss'],
                                                           model['summary_op']])

                gen_image = gen_image + MEAN_PIXELS
                writer.add_summary(summary, global_step=index)
                print('Step {}\n   Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
                print('   Loss: {:5.1f}'.format(total_loss))
                print('   Time: {}'.format(time.time() - start_time))
                start_time = time.time()

                filename = 'outputs/%d.png' % (index)

                save_image(filename, gen_image)

                if (index + 1) % 20 == 0:
                    saver.save(sess, 'checkpoints/style_transfer', index)


In [6]:


def main():
    with tf.variable_scope('input') as scope:
        # use variable instead of placeholder because we're training the intial image to make it
        # look like both the content image and the style image
        input_image = tf.Variable(np.zeros([1, IMAGE_HEIGHT, IMAGE_WIDTH, 3]), dtype=tf.float32)

    download(DOWNLOAD_LINK, MODEL, EXPECTED_BYTES)

    make_dir('checkpoints')
    make_dir('outputs')

    model = load_net_from_file(MODEL, input_image)
    model['global_step'] = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

    content_image = get_resized_image(CONTENT_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)
    content_image = content_image - MEAN_PIXELS

    style_image = get_resized_image(STYLE_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)
    style_image = style_image - MEAN_PIXELS

    model['content_loss'], model['style_loss'], model['total_loss'] = _create_losses(model,
                                                                                     input_image, content_image,
                                                                                     style_image)

    model['optimizer'] = tf.train.AdamOptimizer(LR).minimize(model['total_loss'],
                                                             global_step=model['global_step'])
    model['summary_op'] = _create_summary(model)

    initial_image = generate_noise_image(content_image, IMAGE_HEIGHT, IMAGE_WIDTH, NOISE_RATIO)
    train(model, input_image, initial_image)




In [7]:
main()

pre-trained model ready
INFO:tensorflow:Summary name content loss is illegal; using content_loss instead.
INFO:tensorflow:Summary name style loss is illegal; using style_loss instead.
INFO:tensorflow:Summary name total loss is illegal; using total_loss instead.
INFO:tensorflow:Summary name histogram content loss is illegal; using histogram_content_loss instead.
INFO:tensorflow:Summary name histogram style loss is illegal; using histogram_style_loss instead.
INFO:tensorflow:Summary name histogram total loss is illegal; using histogram_total_loss instead.
INFO:tensorflow:Restoring parameters from checkpoints/style_transfer-299
Step 320
   Sum: 109148014.9
   Loss: 28253.2
   Time: 8.613300323486328
Step 340
   Sum: 109184894.3
   Loss: 25463.3
   Time: 8.366456031799316
Step 360
   Sum: 109219141.8
   Loss: 23065.3
   Time: 9.030625104904175
Step 380
   Sum: 109251296.4
   Loss: 20978.7
   Time: 8.804209232330322
Step 400
   Sum: 109281694.7
   Loss: 19148.9
   Time: 9.847063064575195
St