# Get COCO dataset. Only run once

In [0]:
!wget http://images.cocodataset.org/zips/test2015.zip
! unzip test2015.zip

--2019-04-25 22:05:50--  http://images.cocodataset.org/zips/test2015.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.177.211
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.177.211|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13270587935 (12G) [application/zip]
Saving to: ‘test2015.zip.1’

test2015.zip.1        0%[                    ]  96.13M  44.7MB/s               ^C


In [0]:
import keras
import numpy as np
# from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import backend as K
from PIL import Image
from scipy.optimize import fmin_l_bfgs_b
from keras.backend.tensorflow_backend import set_session
from keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf
from keras.regularizers import Regularizer
from keras.layers.merge import concatenate

from scipy.misc import imsave



In [0]:
K.clear_session()
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 1
init_op = tf.global_variables_initializer()
sess = tf.Session(config=config)
set_session(sess)

# VGG16

In [0]:
#https://github.com/misgod/fast-neural-style-keras/blob/master/VGG16.py
"""VGG16 model for Keras.
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
"""
from __future__ import print_function
from __future__ import absolute_import

import warnings

from keras.models import Model
from keras.layers import Flatten, Dense, Input
from keras.layers import Conv2D, MaxPooling2D,AveragePooling2D, GlobalAveragePooling2D,GlobalMaxPooling2D
from keras.engine.topology import get_source_inputs
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras import backend as K
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
from keras_applications.imagenet_utils import _obtain_input_shape




WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'


def VGG16(include_top=True, weights='imagenet',
          input_tensor=None, input_shape=None,
          pooling=None,
          classes=1000):
    """Instantiates the VGG16 architecture.
    Optionally loads weights pre-trained
    on ImageNet. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format="channels_last"` in your Keras config
    at ~/.keras/keras.json.
    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.
    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization)
            or "imagenet" (pre-training on ImageNet).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 244)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 48.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as imagenet with `include_top`'
                         ' as true, `classes` should be 1000')
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = Flatten(name='flatten')(x)
        x = Dense(4096, activation='relu', name='fc1')(x)
        x = Dense(4096, activation='relu', name='fc2')(x)
        x = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = Model(inputs, x, name='vgg16')

    # load weights
    if weights == 'imagenet':
        if include_top:
            weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH,
                                    cache_subdir='models')
        else:
            weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                    WEIGHTS_PATH_NO_TOP,
                                    cache_subdir='models')
        model.load_weights(weights_path,by_name=True)
        if K.backend() == 'theano':
            layer_utils.convert_all_kernels_in_model(model)

        if K.image_data_format() == 'channels_first':
            if include_top:
                maxpool = model.get_layer(name='block5_pool')
                shape = maxpool.output_shape[1:]
                dense = model.get_layer(name='fc1')
                layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first')

            if K.backend() == 'tensorflow':
                warnings.warn('You are using the TensorFlow backend, yet you '
                              'are using the Theano '
                              'image data format convention '
                              '(`image_data_format="channels_first"`). '
                              'For best performance, set '
                              '`image_data_format="channels_last"` in '
                              'your Keras config '
                              'at ~/.keras/keras.json.')
    return model

# Loss Regularizations 

In [0]:
def gram_matrix(F):
    """
    input:
        F: response of a layler L
    output:
        gram matrix of F
    """
    shape = K.shape(F)
    chw = K.cast(shape[0]*shape[1]*shape[2],dtype='float32')
    F_new = K.reshape(F,(shape[0]*shape[1],shape[2]))
    F_t = K.transpose(F_new)
    return K.dot(F_t,F_new)/chw

class Total_variation(Regularizer):
    def __init__(self, weight=2e2):
        self.weight= weight
        super(Total_variation, self).__init__()
    def __call__(self,x):
        predict = x.output #image after normalize
        a = K.square(predict[:,:255,:255,:]-predict[:,1:,:255,:])
        b = K.square(predict[:,:255,:255,:]-predict[:,255:,:1,:])
        loss = self.weight*K.sum(K.pow(a+b,1.25))
        return loss
        
class layer_content_loss(Regularizer):
    def __init__(self, weight=2e2):
        super(layer_content_loss, self).__init__()
        self.weight= weight
    def __call__(self,x):
        images = x.output #image after normalize
        content = images[1]
        pred = images[0]
        shape = K.int_shape(pred)
#         chw = 1/(shape[0]*shape[1]*shape[2])
        return K.sum(K.mean(K.square(content-pred)))*self.weight      
    
class layer_style_loss(Regularizer):
    def __init__(self, weight=2e2):
        self.weight= weight
        super(layer_style_loss, self).__init__()
    def __call__(self,x):
        images = x.output #image after normalize
        style = gram_matrix(images[2])
        pred = gram_matrix(images[0])
        return K.sum(K.square(style-pred))*self.weight   

# Models

## image transformation supplement functions

In [0]:
#residual block 
from keras import layers
from keras.models import Model
from keras.engine.topology import Layer

def load_image(path):
    img = load_img(path,target_size=(256,256,3))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img.astype('float32')
    return img
        

def display_img(i,x,style,is_val=False):
    # save current generated image
    img = x 
    if is_val:
        fname = '%s_%d_val.png' % (style,i)
    else:
        fname = '%s_%d.png' % (style,i)
    imsave(fname, img)
    print('Image saved as', fname)


#Normalize input image

class InputNormalize(Layer):
    def __init__(self, **kwargs):
        super(InputNormalize, self).__init__(**kwargs)

    def build(self, input_shape):
        pass

    def call(self, x, mask=None):
        return x/255.

    def compute_output_shape(self,input_shape):
        return input_shape
    

class InputNormalizeLossNet(Layer):
    def __init__(self, **kwargs):
        super(InputNormalizeLossNet, self).__init__(**kwargs)

    def build(self, input_shape):
        pass

    def call(self, x, mask=None):
        x = x[:, :, :, ::-1]       
        x -= 120
        return x

    def compute_output_shape(self,input_shape):
        return input_shape


class InputDenormalize(Layer):
    def __init__(self, **kwargs):
        super(InputDenormalize, self).__init__(**kwargs)

    def build(self, input_shape):
        pass

    def call(self, x):
        return x*150+255/2

    def compute_output_shape(self, input_shape):
        return input_shape
    


## Image transform model

In [0]:
def residual_block(x, num):
    shortcut = x
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', name='resi_conv_%d_1' % num)(x)
    x = layers.BatchNormalization(name='resi_normal_%d_1' % num)(x)
    x = layers.Activation('relu', name='resi_relu_%d_1' % num)(x)
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', name='resi_conv_%d_2' % num)(x)
    x = layers.BatchNormalization(name='resi_normal_%d_2' % num)(x)
    m = layers.add([x, shortcut], name='resi_add_%d' % num)
    return m


def Convolution_block(x,filters,kernel_size,stride,i,activation):
    x = layers.Conv2D(filters,kernel_size, strides=stride, padding='same', name='conv_'+str(i))(x)
    x = layers.BatchNormalization(name='normal_%d' % i)(x)
    x = layers.Activation(activation, name='%s_%d' % (activation,i))(x)
    return x

def Convolution_block_T(x,filters,kernel_size,stride,i,activation):
    x = layers.Conv2DTranspose(filters,kernel_size, strides=stride, padding='same', name='conv_'+str(i))(x)
    x = layers.BatchNormalization(name='normal_%d' % i)(x)
    x = layers.Activation(activation, name='%s_%d' % (activation,i))(x)
    return x



def image_transform_model(vari_weight):
    image_size = (256,256,3)
    #Image transformation network
    input_origin = layers.Input(shape=image_size,name="input_origin")
    #normalize image 
    norm_input = InputNormalize()(input_origin)
    
    #the network
    
    c1 = Convolution_block(x= norm_input,
                           filters= 32,
                           kernel_size =(9,9),
                           stride = (1,1),
                           i=1,
                           activation='relu')

    c2 = Convolution_block(x= c1,
                           filters= 64,
                           kernel_size =(3,3),
                           stride = (2,2),
                           i=2,
                           activation='relu')

    c3 = Convolution_block(x= c2,
                           filters= 128,
                           kernel_size =(3,3),
                           stride = (2,2),
                           i=3,
                           activation='relu')

    r1 = residual_block(c3, 1)
    r2 = residual_block(r1, 2)
    r3 = residual_block(r2, 3)
    r4 = residual_block(r3, 4)
    r5 = residual_block(r4, 5)

    d1 = Convolution_block_T(x= r5,
                           filters= 64,
                           kernel_size =(3,3),
                           stride = (2,2),
                           i=4,
                           activation='relu')
    d2 = Convolution_block_T(x= d1,
                           filters= 32,
                           kernel_size =(3,3),
                           stride = (2,2),
                           i=5,
                           activation='relu')

    c4 = Convolution_block(x= d2,
                           filters= 3,
                           kernel_size =(9,9),
                           stride = (1,1),
                           i=6,
                           activation='tanh')
    
    output= InputDenormalize()(c4)
#Need to have denormalize layer at the last one because we need the full
#unormalized image before putting it back VGG16
    model = Model([input_origin],[output])
    
    vari_loss = Total_variation(vari_weight)(model.layers[-1])
    (model.layers[-1]).add_loss(vari_loss)
    """
    add Total variation regularizer
    We use regularizer because in our final model design for each loss because
    for each of the designated layer in our total network, we will have 
    one loss (total variation loss in this case) and we want to cooperate it
    into the final model's loss. 
    """
    return model

## Dummy style network

In [0]:
def style_network():
    inp = layers.Input((256, 256, 3),name="DUMMY")
    out = layers.Lambda(lambda x: x)(inp)
    model = Model(inp, out)
    return model 


def dummy_loss(y_true, y_pred ):
    return K.variable(0.0)
        


## Loss network

In [0]:
def loss_network(x_trans,x_input,style_input,content_weight,style_weight):
    """Append initial input with the output of the image transform 
    network because we will the intial input as the content input.
    """
#     style_matrix = K.constant(style)
#     style_matrix= preprocess_input(style_matrix)
#     x_trans= preprocess_input(x_trans)
#     x_input = preprocess_input(x_input)
#     x = concatenate([x_trans,x_input,style_matrix], axis=0)

    x = concatenate([x_trans,x_input,style_input], axis=0)
    input_origin = layers.Input(shape=image_size,name="input_origin")
    
    x = InputNormalizeLossNet()(x)


    model = VGG16(input_tensor=x, include_top=False)
    out_layers = {layer.name: layer for layer in model.layers[-18:]}

    #style loss
    STYLE_LAYER = ['block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3']
    
    for layer_name in STYLE_LAYER:
        layer = out_layers[layer_name]
        style_loss = layer_style_loss(style_weight)(layer)
        layer.add_loss(style_loss)


    #content loss 
    CONTENT_LAYER = 'block3_conv3'
    content_layer = out_layers[CONTENT_LAYER]
    content_loss = layer_content_loss(content_weight)(content_layer)
    content_layer.add_loss(content_loss)

    for lay in model.layers[-19:]:
        lay.trainable = False
    return model

# Build model

## Put the path to style and desired content image here

In [None]:
style = load_image("../data/style/son_dau.jpg")
img = load_image("../data/content/bird.jpg") #test_img

In [0]:
image_size = (256,256,3)


content_weight = 1
style_weight = 4
transform_model =  image_transform_model(1e-3)
style_network = style_network() 
loss_network = loss_network(transform_model.output,transform_model.input
                            ,style_network.input,
                            content_weight,style_weight)
dummy_y = np.zeros((1,256,256,3))


## Fit with real train data

In [0]:
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

loss_network.compile(optimizer = adam,loss=dummy_loss)

In [0]:
loss_network.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_origin (InputLayer)       (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
input_normalize_1 (InputNormali (None, 256, 256, 3)  0           input_origin[0][0]               
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 256, 256, 32) 7808        input_normalize_1[0][0]          
__________________________________________________________________________________________________
normal_1 (BatchNormalization)   (None, 256, 256, 32) 128         conv_1[0][0]                     
__________________________________________________________________________________________________
relu_1 (Ac

In [0]:
import os
from tqdm import tqdm
file_count = 0
content_imgs = None
paths = []
for filename in tqdm(os.listdir('test2015')):
    path = 'test2015/' + filename
    paths.append(path)
    file_count += 1
#     if file_count >= 10:
#         break
        
def gen_img(paths,style): 
    while True: 
        for path in paths:
            image = load_image(path)
            features = image.copy()
            label = image.copy()
            yield ([features,style], dummy_y) 

100%|██████████| 81434/81434 [00:00<00:00, 1420020.50it/s]


In [0]:
loss_network.fit_generator(generator=gen_img(paths,style), steps_per_epoch=81000,epochs=2, verbose=1)

Epoch 1/2
Epoch 2/2

In [0]:
Image.fromarray(transform_model.predict(img)[0].astype('uint8'))

In [0]:
img = load_image("/data/ttran/val2017/000000084170.jpg")
display_img(2,transform_model.predict(img)[0], "starry night")