This is an implementation of the artistic neural network as described in this [paper](https://arxiv.org/abs/1508.06576). VGG pretrained model weights can be downloaded from [here](http://www.vlfeat.org/matconvnet/models/). Use imagenet-vgg-verydeep-19.mat and imagenet-vgg-verydeep-16.mat

In [12]:
from __future__ import print_function
import os
import sys
import numpy as np
import scipy.io
import scipy.misc
import tensorflow as tf

In [25]:
class VGG(object):
    
    def __init__(self):
        self.input_shape = [1, 600, 800, 3]
        self.graph = None
        self.content_layer = None
        self.style_layer = None
        self.path = None
        self.layer_names = []
    
    def load_model(self):
        """
        The pretrained model contains the layer name and layer type (i.e. pool, conv etc.)
        To access those information, we can do the index access:
        vgg_layers[0]       [0]      [0]      [0]      [2]                                    [0]      [0] ## weight
        vgg_layers[0]       [0]      [0]      [0]      [2]                                    [0]      [1] ## bias
                #  always 0 |layer idx|always 0|always 0|0:layer name; 1:layer type; 2: weights|always 0|0:weight; 1:bias
        vgg_layers[0][30][0][0][0][0] # to access layer name
        vgg_layers[0][30][0][0][1][0] # to access layer type
        
        Note that the fully connected layers and the softmax are not required for this task, therefore we will skip it. 
        The fully connected layers have name fc* (It's type is conv though).
        """
        if self.path == None:
            raise Exception("Run from the child class")
            
        vgg_layers = scipy.io.loadmat(self.path)['layers']
        num_layers = len(vgg_layers[0])
        
        graph = {}
        graph["input"] = tf.Variable(np.zeros(self.input_shape), dtype=tf.float32)
        prev = "input"
        self.layer_names.append("input")
        
        for idx in range(num_layers):
            
            layer_name = vgg_layers[0][idx][0][0][0][0]
            layer_type = vgg_layers[0][idx][0][0][1][0]
            
            if layer_name[:2] == "fc":
                break        # stop before adding the first fc layer
            
            self.layer_names.append(layer_name)
            
            if layer_type == "conv":
                W = vgg_layers[0][idx][0][0][2][0][0]
                b = vgg_layers[0][idx][0][0][2][0][1]
                W = tf.constant(W)   # we don't want to update the network
                b = tf.constant(np.reshape(b, (b.size)))
                graph[layer_name] = tf.nn.conv2d(graph[prev], filter=W, strides=[1, 1, 1, 1], padding="SAME") + b
            elif layer_type == "relu":
                graph[layer_name] = tf.nn.relu(graph[prev])
            elif layer_type == "pool":    # according to the paper, average pooling behaves better
                graph[layer_name] = tf.nn.avg_pool(graph[prev], ksize=[1, 2, 2, 1], 
                                                   strides=[1, 2, 2, 1], padding="SAME")
            
            prev = layer_name
        self.graph = graph
        
    def gram_matrix(self, F, N, M):
        """
        The gram matrix G.
        F -- the features
        N -- number of filters
        M -- hight x width of one feature map 
        Names as per paper
        """
        Ft = tf.reshape(F, (M, N))
        return tf.matmul(tf.transpose(Ft), Ft)
    
    def content_loss(self, sess, content_img):
        """
        Compute the context loss as described in the paper. We only need to do the forward 
        pass once on the content image
        
        sess -- the current session
        content_img -- the content image. should be a numpy array with dimension [1, hight, width, 3]
        Note: the dimension of the image should match with the one set at resize_input.
        Also, the image should be centered. The mean should be the training set mean of the VGG network.
        """
        if content_img.shape != self.input_shape:
            raise Exception("Dimension doesn't match")
        if self.content_layer == None:
            raise Exception("Call from the child class")
        sess.run(self.graph["input"].assign(content_img))
        P = sess.run(self.graph[self.content_layer])
        F = self.graph[self.content_layer]
        return 0.5 * tf.reduce_sum(tf.pow(F - P, 2))
        
    
    def style_loss(self, sess, style_img):
        """
        Compute the style loss as described in the papaer. Again, only do forward pass once for style image
        
        sess -- the current session
        sytle_image -- the style image. Should be a numpy array
        Note: the style image should also have the same dimension as the content image, either by cropping or 
        some other methods.
        
        The style layers will contain multiple layers. Should be a dictionary with keys are the layer name, 
        and values are the associated weight
        """
        if style_img.shape != self.input_shape:
            raise Exception("Dimension doesn't match")
        if self.style_layer == None:
            raise Exception("Call from child class")
        sess.run(self.graph["input"].assign(style_img))
        loss = 0
        for key in self.style_layer:
            P = sess.run(self.graph[key])
            N = P.shape[3]  # number of filters
            M = P.shape[1] * P.shape[2] # hight x width of one feature map 
            F = self.graph[key]
            A = gram_matrix(P, N, M)
            G = gram_matrix(F, N, M)
            loss += (1 / (4 * N**2 * M**2)) * tf.reduce_sum(tf.pow(G - A, 2)) * self.style_layer[key]
        return loss
        
    def emit(self):
        return self.graph
    
    def resize_input(self, new_shape):
        self.input_shape = new_shape
        tf.reshape(self.graph["input"], new_shape)
    
    def get_layer_names(self):
        return self.layer_names
    
    def get_instance(model_path, force=False):
        raise Exception("Do not instantiate this class")

In [29]:
class VGG19(VGG):
    
    instance = None
    
    def __init__(self, model_path):
        super(VGG19, self).__init__()
        self.path = model_path
        self.graph = self.load_model()
        self.content_layer = "relu4_2"
        self.style_layer = {
            "relu1_1": 0.5,
            "relu2_1": 1.0,
            "relu3_1": 1.5,
            "relu4_1": 3.0,
            "relu5_1": 4.0
        }
    
    def get_instance(model_path, force=False):
        """
        Singleton to avoid load graph on every run
        If force is True, then force reload
        """
        if instance == None or force:
            instance = VGG19(model_path)
        return instance
    
    get_instance = staticmethod(get_instance)

In [27]:
class VGG16(VGG):
    
    instance = None
    
    def __init__(self, model_path):
        super(VGG16, self).__init__()
        self.path = model_path
        self.graph = self.load_model()
        self.content_layer = "relu4_2"
        self.style_layer = {
            "relu1_2": 0.5,
            "relu2_2": 1.0,
            "relu3_3": 1.5,
            "relu4_3": 3.0,
            "relu5_3": 4.0
        }
    
    def get_instance(model_path, force=False):
        """
        Singleton to avoid load graph on every run
        If force is True, then force reload
        """
        if instance == None or force:
            instance = VGG16(model_path)
        return instance
    
    get_instance = staticmethod(get_instance)

In [32]:
class VGGFactory(object):
    
    def factory(name, model_path, force=False):
        """
        The factory to create the corresponding model we will use
        Available names include "VGG16" and "VGG19"
        If force is set, then force reload the graph
        """
        if name == "VGG16": return VGG16.get_instance(model_path, force)
        if name == "VGG19": return VGG19.get_instance(model_path, force)
    
    factory = staticmethod(factory)

In [33]:
vgg19 = VGGFactory.factory("VGG19", "models/imagenet-vgg-verydeep-19.mat")
print(vgg19.get_layer_names())
vgg16 = VGGFactory.factory("VGG16", "models/imagenet-vgg-verydeep-16.mat")
print(vgg16.get_layer_names())

UnboundLocalError: local variable 'instance' referenced before assignment

In [None]:
class Paint(object):
    vgg = None
    def 

In [136]:
vgg_layers = scipy.io.loadmat("models/imagenet-vgg-verydeep-16.mat")['layers']

In [145]:
w =     vgg_layers[0]       [0]      [0]      [0]      [2]                                    [0]      [0] ## weight
b =     vgg_layers[0]       [0]      [0]      [0]      [2]                                    [0]      [1] ## bias
                #  always 0 |layer idx|always 0|always 0|0:layer name; 1:layer type; 2: weights|always 0|0:weight; 1:bias
    
W = tf.constant(w)
b = tf.constant(b)
print W
print b

Tensor("Const_7:0", shape=(3, 3, 3, 64), dtype=float32)
Tensor("Const_8:0", shape=(64, 1), dtype=float32)


In [143]:
print len(vgg_layers[0])
# print vgg_layers[0][30][0][0][0][0]
# print vgg_layers[0][30][0][0][1][0]

for i in range(len(vgg_layers[0])):
    print vgg_layers[0][i][0][0][0][0], vgg_layers[0][i][0][0][1][0]

37
conv1_1 conv
relu1_1 relu
conv1_2 conv
relu1_2 relu
pool1 pool
conv2_1 conv
relu2_1 relu
conv2_2 conv
relu2_2 relu
pool2 pool
conv3_1 conv
relu3_1 relu
conv3_2 conv
relu3_2 relu
conv3_3 conv
relu3_3 relu
pool3 pool
conv4_1 conv
relu4_1 relu
conv4_2 conv
relu4_2 relu
conv4_3 conv
relu4_3 relu
pool4 pool
conv5_1 conv
relu5_1 relu
conv5_2 conv
relu5_2 relu
conv5_3 conv
relu5_3 relu
pool5 pool
fc6 conv
relu6 relu
fc7 conv
relu7 relu
fc8 conv
prob softmax
