## Importing Libraries

In [None]:
import os
import sys
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import numpy as np
import tensorflow as tf
from skimage.transform import resize

%matplotlib inline

VGG-19 is the pre-trained model that is used as the CNN

In [None]:
def load_model(path):
    """
    Returns a model for the purpose of 'painting' the picture.
    Takes only the convolution layer weights and wrap using the TensorFlow
    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
    the paper indicates that using AveragePooling yields better results.
    The last few fully connected layers are not used.
    Here is the detailed configuration of the VGG model:
        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu    
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    
    vgg = scipy.io.loadmat(path)

    vgg_layers = vgg['layers']
    
    def _weights(layer, expected_layer_name):
        """
        Return the weights and bias from the VGG model for a given layer.
        """
        wb = vgg_layers[0][layer][0][0][2]
        W = wb[0][0]
        b = wb[0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b

        return W, b

    def _relu(conv2d_layer):
        """
        Return the RELU function wrapped over a TensorFlow layer. Expects a
        Conv2d layer input.
        """
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name):
        """
        Return the Conv2D layer using the weights, biases from the VGG
        model at 'layer'.
        """
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name):
        """
        Return the Conv2D + RELU layer using the weights, biases from the VGG
        model at 'layer'.
        """
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        """
        Return the AveragePooling layer.
        """
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model.
    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, 300, 400, 3)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    
    return graph
    

In [None]:
vgg_model = load_model('imagenet-vgg-verydeep-19.mat') ## Test the model

For Neural Style Transfer, 2 images needed. 
1. Content Image
2. Style Image

For Content Image, I will use "Sigiriya-Rock.jpg" and for Style image I'll use one of picasso's image.

In [None]:
content_image = scipy.misc.imread("Sigiriya-Rock-fron-view.jpg")
content_image = resize(content_image,(300,400))
imshow(content_image)

In [None]:
style_image = scipy.misc.imread("picasso-image.jpg")
style_image = resize(style_image,(300,400))
imshow(style_image)

In [None]:
mean = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

In [None]:
def reshape_and_normalize_image(image):
    """
    Reshape and normalize the input image
    """
    
    # Reshape image to match expected input of VGG19
    image = np.reshape(image, ((1,) + image.shape))
    image = image - mean
    
    return image

In [None]:
def generate_noise_image(content_image, noise_ratio = 0.6):
    """
    Generates a noisy image by adding random noise to the content_image
    """
    
    # Generate a random noise_image
    noise_image = np.random.uniform(-20, 20, (1, 300, 400, 3)).astype('float32')
    
    # Set the input_image to be a weighted average of the content_image and a noise_image
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    
    return input_image

### Computing the Content Cost

$$J_{content}(C,G) =  \frac{1}{4 \times n_H \times n_W \times n_C}\sum _{ \text{all entries}} (a^{(C)} - a^{(G)})^2\tag{1} $$

In [None]:
def compute_content_cost(a_C, a_G):
    """
    This function computes the content cost
    
    Arguments:
        a_C - (1, n_H, n_W, n_C) dimension Tensor - Layer L activations for Content Image
        a_G - (1, n_H, n_W, n_C) dimension Tensor - Layer L activations for Generated Image
        
    Returns:
        J_content
    """
    
    m, n_H, n_W, n_C = a_C.shape
    
    a_C_unrolled = tf.reshape(a_C, [n_H*n_W, n_C])
    a_G_unrolled = tf.reshape(a_G, [n_H*n_W, n_C])
    
    J_content = (1/(4 * n_H * n_W * n_C)) * tf.reduce_sum(tf.square(a_C_unrolled - a_G_unrolled))
    
    return J_content

In [None]:
def gram_matrix(A):
    """
    Argument:
        A -- Activation of Layer L, (n_C, n_H*n_W)
        
    Return:
        GA -- (n_C * n_C)
    """
    
    GA = tf.matmul(A, tf.transpose(A))
    
    return GA

### Computing the Style Cost

$$J_{style}^{[l]}(S,G) = \frac{1}{4 \times {n_C}^2 \times (n_H \times n_W)^2} \sum _{i=1}^{n_C}\sum_{j=1}^{n_C}(G^{(S)}_{ij} - G^{(G)}_{ij})^2\tag{2} $$

In [None]:
def compute_style_layer_cost(a_G, a_S):
    """
    Arguments:
        a_G  - Tensor - activations of layer L of the Generated Image (1, n_H, n_W, n_C)
        a_S  - Tensor - activations of layer L of the Style Image (1, n_H, n_W, n_C)
    
    Returns:
        J_style_layer
    """
    
    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    
    # Unroll a_G, a_S to (n_C, n_H*n_W) before calculating Gram Matrix
    a_G_unrolled = tf.transpose((tf.reshape(a_G,[n_H * n_W, n_C]))) 
    a_S_unrolled = tf.transpose((tf.reshape(a_S,[n_H * n_W, n_C])))
    
    GG = gram_matrix(a_G_unrolled)
    GS = gram_matrix(a_S_unrolled)
    
    # Computing the cost
    alpha = 4 * (n_C**2) * ((n_H * n_W)**2)
    J_style_layer = tf.reduce_sum(tf.square(GS - GG)) / alpha
    
    return J_style_layer

Style Weights:
$$J_{style}(S,G) = \sum_{l} \lambda^{[l]} J^{[l]}_{style}(S,G)$$

In [None]:
STYLE_LAYERS = [
    ('conv1_1', 0.2),
    ('conv2_1', 0.2),
    ('conv3_1', 0.2),
    ('conv4_1', 0.2),
    ('conv5_1', 0.2)]

In [None]:
def compute_style_cost(model, STYLE_LAYERS):
    J_style = 0
    
    for layer_name, lamdha_val in STYLE_LAYERS:
        
        # Select the output tensor of the currently selected layer
        out = model[layer_name]
        
        a_S = sess.run(out)
        a_G = out
        
        J_style_layer = compute_style_layer_cost(a_G, a_S)
        
        J_style += lamdha_val * J_style_layer
    
    return J_style

## Computing Total Cost

In [None]:
def total_cost(J_content, J_style, alpha=10, beta=40):
    """
    Computes the total cost
    Arguments:
        J_content -- Content Cost
        J_style   -- Style Cost
        alpha     -- Hyperparameter for weighting the importance of J_content
        beta      -- Hyperparameter for weighting the importance of J_style
    Return:
        Total Cost
    """
    
    J = (alpha * J_content) + (beta * J_style)
    
    return J

## Training the model

In [None]:
# Reset the graph
tf.reset_default_graph()

# Start interactive session
sess = tf.InteractiveSession()

In [None]:
content_image = reshape_and_normalize_image(content_image)
style_image = reshape_and_normalize_image(style_image)

In [None]:
generated_image = generate_noise_image(content_image)

In [None]:
model = load_model("imagenet-vgg-verydeep-19.mat")

In [None]:
sess.run(model['input'].assign(content_image))

# Select the output tensor of layer conv4_2
out = model['conv4_2']
a_C = sess.run(out)
a_G = out

# Compute the content cost
J_content = compute_content_cost(a_C, a_G)

In [None]:
sess.run(model['input'].assign(style_image))

# Compute the style cost
J_style = compute_style_cost(model, STYLE_LAYERS)

In [None]:
J = total_cost(J_content, J_style, alpha = 10, beta = 40)

In [None]:
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)

In [None]:
def model_nn(sess, input_image, num_iterations = 200):
    
    # Initialize global variables (you need to run the session on the initializer)
    sess.run(tf.global_variables_initializer())
    
    # Run the noisy input image (initial generated image) through the model. Use assign().
    sess.run(model['input'].assign(input_image))
    
    for i in range(num_iterations):
        print(i)
        # Run the session on the train_step to minimize the total cost
        sess.run(train_step)
        
        # Compute the generated image by running the session on the current model['input']
        generated_image = sess.run(model['input'])
        
        # Print every 20 iteration.
        if i%20 == 0:
            Jt, Jc, Js = sess.run([J, J_content, J_style])
            print("Iteration " + str(i) + " :")
            print("total cost = " + str(Jt))
            print("content cost = " + str(Jc))
            print("style cost = " + str(Js))
            
            # save current generated image in the "/output" directory
            save_image("output/" + str(i) + ".png", generated_image)
    
    # save last generated image
    save_image('output/generated_image.jpg', generated_image)
    
    return generated_image

In [None]:
model_nn(sess, generated_image)