# M2177.003100 Deep Learning <br> Final Proejct: Text to Image Synthesis (Tensorflow)

Copyright (C) Data Science & AI Laboratory, Seoul National University. This material is for educational uses only. Some contents are based on the material provided by other paper/book authors and may be copyrighted by them. 

**For understanding of this work, please carefully look at given PPT file.**

**Note**: certain details are missing or ambiguous on purpose, in order to test your knowledge on the related materials. However, if you really feel that something essential is missing and cannot proceed to the next step, then contact the teaching staff with clear description of your problem.

### Submitting your work:
<font color=red>**DO NOT clear the training process **</font> so that TAs can grade both your code and results.  
**The TA will set a config file as 'eval_birds.yml' when evaluating the code using 'hidden test dataset'. Thus, please make sure that your code can generate proper data to measure inception score and R-precision of 'hidden test dataset'.**

## 1. Load datasets
The Birds dataset will be downloaded automatically if it is not located in the *data* directory. <br>

In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os, nltk

from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz
import numpy as np
import scipy
import time
import random

from utils.data_utils import CUBDataset
from utils.loss import cosine_similarity

from utils.data_utils import *

#################################################
# DO NOT CHANGE 
from utils.model import CNN_ENCODER, RNN_ENCODER, GENERATOR, DISCRIMINATOR
#################################################

%matplotlib inline

In [3]:
# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
cfg_from_file('cfg/train_birds.yml') # eval_birds.yml

print('Using config:')
pprint.pprint(cfg)

os.environ['CUDA_VISIBLE_DEVICES'] = '3' #cfg.GPU_ID

now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

Using config:
{'BATCH_SIZE': 32,
 'CHECKPOINT_DIR': './checkpoint',
 'CHECKPOINT_NAME': 'model.ckpt',
 'CNN': {'EMBEDDING_DIM': 0, 'H_DIM': 0},
 'CONFIG_NAME': 'text-to-image',
 'CUDA': False,
 'DATASET_NAME': 'birds',
 'DATA_DIR': 'data/birds',
 'EMBEDDING_TYPE': 'cnn-rnn',
 'GAN': {'B_ATTENTION': False,
         'B_CONDITION': False,
         'B_DCGAN': False,
         'CONDITION_DIM': 0,
         'DF_DIM': 64,
         'EMBEDDING_DIM': 0,
         'GF_DIM': 128,
         'R_NUM': 0,
         'Z_DIM': 512},
 'GPU_ID': '0',
 'IMAGE_SIZE': 256,
 'NUM_BATCH_FOR_TEST': 0,
 'RANDOM_SEED': 0,
 'RNN': {'EMBEDDING_DIM': 0,
         'H_DIM': 256,
         'TYPE': '',
         'VOCAB_SIZE': 8000,
         'WORD_EMBEDDING_DIM': 256},
 'R_PRECISION_DIR': './evaluation',
 'R_PRECISION_FILE': 'r_precision.npz',
 'R_PRECISION_FILE_HIDDEN': 'r_precision_hidden.npz',
 'TEST': {'B_EXAMPLE': False,
          'GENERATED_HIDDEN_TEST_IMAGES': './evaluation/generated_images_hidden',
          'GENERATED_TE

In [4]:
train_dataset = CUBDataset(cfg.DATA_DIR, split='train')
test_dataset = CUBDataset(cfg.DATA_DIR, split='test')

print(f'\ntrain data directory:\n{train_dataset.split_dir}')
print(f'test data directory:\n{test_dataset.split_dir}\n')

print(f'# of train filenames:{train_dataset.filenames.shape}')
print(f'# of test filenames:{test_dataset.filenames.shape}\n')

print(f'example of filename of train image:{train_dataset.filenames[0]}')
print(f'example of filename of valid image:{test_dataset.filenames[0]}\n')

print(f'example of caption and its ids:\n{train_dataset.captions[0]}\n{train_dataset.captions_ids[0]}\n')
print(f'example of caption and its ids:\n{test_dataset.captions[0]}\n{test_dataset.captions_ids[0]}\n')

print(f'# of train captions:{np.asarray(train_dataset.captions).shape}')
print(f'# of test captions:{np.asarray(test_dataset.captions).shape}\n')

print(f'# of train caption ids:{np.asarray(train_dataset.captions_ids).shape}')
print(f'# of test caption ids:{np.asarray(test_dataset.captions_ids).shape}\n')

print(f'# of train images:{train_dataset.images.shape}')
print(f'# of test images:{test_dataset.images.shape}\n')

self.current_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf

self.data_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011

Dataset already exists
self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle
self.current_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf

self.data_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011

Dataset already exists
self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-p

## 2. Define models and go to train/evaluate

In [5]:
###======================== DEFIINE VARIABLES ===================================###
lr = 0.0002
lr_decay = 0.5      
decay_every = 100  
beta1 = 0.5
batch_size = cfg.BATCH_SIZE
image_size = cfg.IMAGE_SIZE
z_dim = cfg.GAN.Z_DIM

tf.reset_default_graph()
from importlib import reload
import utils.model as model
model = reload(model)
RNN_ENCODER = model.RNN_ENCODER
GENERATOR = model.GENERATOR
DISCRIMINATOR = model.DISCRIMINATOR
CNN_ENCODER = model.CNN_ENCODER

In [6]:
###======================== DEFIINE PLACEHOLDER ===================================###
t_real_image = tf.placeholder('float32', [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3], name = 'real_image')
t_real_caption = tf.placeholder(dtype=tf.int64, shape=[cfg.BATCH_SIZE , None], name='real_caption_input')
t_wrong_image = tf.placeholder('float32', [cfg.BATCH_SIZE ,cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3], name = 'wrong_image')
t_wrong_caption = tf.placeholder(dtype=tf.int64, shape=[cfg.BATCH_SIZE , None], name='wrong_caption_input')
t_z = tf.placeholder(tf.float32, [cfg.BATCH_SIZE , cfg.GAN.Z_DIM], name='z_noise')

In [7]:
rnn_encoder = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=False, reuse=False)
generator = GENERATOR(t_z, rnn_encoder.outputs, is_training=False, reuse=False)
discriminator = DISCRIMINATOR(generator.outputs, rnn_encoder.outputs, is_training=False, reuse=False)
cnn_encoder = CNN_ENCODER(t_real_image, is_training=False, reuse=False)

In [8]:
### Define image and text mapping
net_cnn = CNN_ENCODER(t_real_image, is_training=True, reuse=True)
x = net_cnn.outputs
v = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=True, reuse=True).outputs
x_w = CNN_ENCODER(t_wrong_image, is_training=True, reuse=True).outputs
v_w = RNN_ENCODER(t_wrong_caption, cfg.BATCH_SIZE, is_training=True, reuse=True).outputs

alpha = 0.2 # margin alpha
rnn_loss = tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x, v_w))) + \
            tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x_w, v)))

In [9]:
### Define generative model
net_rnn = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=False, reuse=True)
net_fake_image = GENERATOR(t_z, net_rnn.outputs, is_training=True, reuse=True)

net_disc_fake = DISCRIMINATOR(net_fake_image.outputs, net_rnn.outputs, is_training=True, reuse=True)
disc_fake_logits = net_disc_fake.logits

net_disc_real = DISCRIMINATOR(t_real_image, net_rnn.outputs, is_training=True, reuse=True)
disc_real_logits = net_disc_real.logits

net_disc_mismatch = DISCRIMINATOR(t_real_image, 
                            RNN_ENCODER(t_wrong_caption, cfg.BATCH_SIZE, is_training=False, reuse=True).outputs,
                            is_training=True, reuse=True)
disc_mismatch_logits = net_disc_mismatch.logits

d_loss1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_real_logits,     labels=tf.ones_like(disc_real_logits),      name='d1'))
d_loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_mismatch_logits, labels=tf.zeros_like(disc_mismatch_logits), name='d2'))
d_loss3 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits,     labels=tf.zeros_like(disc_fake_logits),     name='d3'))
d_loss = d_loss1 + (d_loss2 + d_loss3) * 0.5

g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits, labels=tf.ones_like(disc_fake_logits), name='g'))

In [10]:
### Define optimzers
rnn_vars = [var for var in tf.trainable_variables() if 'rnnencoder' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'generator' in var.name]
d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
cnn_vars = [var for var in tf.trainable_variables() if 'cnnencoder' in var.name]

update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'discriminator' in var.name]
update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'generator' in var.name]
update_ops_CNN = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'cnnencoder' in var.name]

'''print('----------Update_ops_D--------')
for var in update_ops_D:
    print(var.name)
print('----------Update_ops_G--------')
for var in update_ops_G:
    print(var.name)
print('----------Update_ops_CNN--------')
for var in update_ops_CNN:
    print(var.name)'''

with tf.variable_scope('learning_rate'):
    lr_v = tf.Variable(lr, trainable=False)

with tf.control_dependencies(update_ops_D):
    d_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(d_loss, var_list=d_vars)

with tf.control_dependencies(update_ops_G):
    g_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(g_loss, var_list=g_vars)

with tf.control_dependencies(update_ops_CNN):
    grads, _ = tf.clip_by_global_norm(tf.gradients(rnn_loss, rnn_vars + cnn_vars), 10)
    optimizer = tf.train.AdamOptimizer(lr_v, beta1=beta1)
    rnn_optim = optimizer.apply_gradients(zip(grads, rnn_vars + cnn_vars))

In [None]:
### Train
sess = tf.Session(config=tf.ConfigProto())
init = tf.global_variables_initializer()
sess.run(init)

#saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=100)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)
checkpoint_dir = cfg.CHECKPOINT_DIR
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
'''if ckpt and ckpt.model_checkpoint_path:
    loader = tf.train.Saver(var_list=tf.global_variables())
    load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
    saver.restore(sess, ckpt.model_checkpoint_path)
    print("Restored model parameters from {}".format(ckpt_path))
else:
    print('no checkpoints find.')'''

n_epoch = cfg.TRAIN.MAX_EPOCH
n_images_train = len(train_dataset.images)
n_batch_epoch = int(n_images_train / batch_size)
train_captions = np.array(train_dataset.captions_ids)
train_images = np.array(train_dataset.images)
n_captions_train = len(train_captions)
n_captions_per_image = cfg.TEXT.CAPTIONS_PER_IMAGE

for epoch in range(cfg.TRAIN.MAX_EPOCH):
    #################################################
    # TODO: Implement text to image synthesis
    start_time = time.time()

    if epoch !=0 and (epoch % decay_every == 0):
        new_lr_decay = lr_decay ** (epoch // decay_every)
        sess.run(tf.assign(lr_v, lr * new_lr_decay))
        log = " ** new learning rate: %f" % (lr * new_lr_decay)
        print(log)

    elif epoch == 0:
        log = " ** init lr: %f  decay_every_epoch: %d, lr_decay: %f" % (lr, decay_every, lr_decay)
        print(log)
    
    for step in range(n_batch_epoch):
        step_time = time.time()

        ## get matched text & image
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_real_caption = train_captions[idexs]
        b_real_images = train_images[np.floor(np.asarray(idexs).astype('float')/n_captions_per_image).astype('int')]

        ## get wrong caption & wrong image
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_wrong_caption = train_captions[idexs]
        idexs2 = get_random_int(min=0, max=n_images_train-1, number=batch_size)
        b_wrong_images = train_images[idexs2]

        ## get noise
        b_z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)

        b_real_images = threading_data(b_real_images, prepro_img, mode='train')   # [0, 255] --> [-1, 1] + augmentation
        b_wrong_images = threading_data(b_wrong_images, prepro_img, mode='train')

        ## update RNN
        if epoch < int(n_epoch/5):
            errRNN, _ = sess.run([rnn_loss, rnn_optim], feed_dict={
                                            t_real_image : b_real_images,
                                            t_wrong_image : b_wrong_images,
                                            t_real_caption : b_real_caption,
                                            t_wrong_caption : b_wrong_caption})
        else:
            errRNN = 0

        ## updates D
        errD, _ = sess.run([d_loss, d_optim], feed_dict={
                        t_real_image : b_real_images,
                        t_wrong_caption : b_wrong_caption,
                        t_real_caption : b_real_caption,
                        t_z : b_z})
        ## updates G
        errG, _ = sess.run([g_loss, g_optim], feed_dict={
                        t_real_caption : b_real_caption,
                        t_z : b_z})

        print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4fs, d_loss: %.8f, g_loss: %.8f, rnn_loss: %.8f" \
                    % (epoch, n_epoch, step, n_batch_epoch, time.time() - step_time, errD, errG, errRNN))

    if (epoch + 1) % 1 == 0:
        print(" ** Epoch %d took %fs" % (epoch, time.time()-start_time))
        '''img_gen, rnn_out = sess.run([net_g.outputs, net_rnn.outputs], feed_dict={
                                    t_real_caption : sample_sentence,
                                    t_z : sample_seed})

        save_images(img_gen, [ni, ni], 'train_samples/train_{:02d}.png'.format(epoch))'''

    if (epoch != 0) and (epoch % 10) == 0:
        #################################################
        # save checkpoints
        checkpoint_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        saver.save(sess, checkpoint_path, global_step=epoch)
        print('The checkpoint has been created.')

 ** init lr: 0.000200  decay_every_epoch: 100, lr_decay: 0.500000
Epoch: [ 0/600] [   0/ 276] time: 63.1723s, d_loss: 6.23484230, g_loss: 39.32595444, rnn_loss: 0.40280259
Epoch: [ 0/600] [   1/ 276] time: 6.0927s, d_loss: 17.23238945, g_loss: 11.64991283, rnn_loss: 0.38357860
Epoch: [ 0/600] [   2/ 276] time: 3.7340s, d_loss: 6.36725235, g_loss: 0.27039823, rnn_loss: 0.40155798
Epoch: [ 0/600] [   3/ 276] time: 7.3573s, d_loss: 10.66509056, g_loss: 15.84979153, rnn_loss: 0.42965314
Epoch: [ 0/600] [   4/ 276] time: 3.6836s, d_loss: 4.12254572, g_loss: 23.20690918, rnn_loss: 0.34749711
Epoch: [ 0/600] [   5/ 276] time: 7.1800s, d_loss: 5.92113686, g_loss: 15.14709949, rnn_loss: 0.38436699
Epoch: [ 0/600] [   6/ 276] time: 4.1041s, d_loss: 3.60993147, g_loss: 5.07561350, rnn_loss: 0.43422377
Epoch: [ 0/600] [   7/ 276] time: 7.0240s, d_loss: 6.93212175, g_loss: 27.30155945, rnn_loss: 0.42405000
Epoch: [ 0/600] [   8/ 276] time: 4.2256s, d_loss: 4.57340956, g_loss: 15.40741634, rnn_loss:

Epoch: [ 0/600] [  78/ 276] time: 5.5845s, d_loss: 2.58349800, g_loss: 9.64740562, rnn_loss: 0.37367791
Epoch: [ 0/600] [  79/ 276] time: 5.7236s, d_loss: 3.73621202, g_loss: 5.84757996, rnn_loss: 0.35291892
Epoch: [ 0/600] [  80/ 276] time: 6.0420s, d_loss: 2.27605391, g_loss: 7.13578415, rnn_loss: 0.41712159
Epoch: [ 0/600] [  81/ 276] time: 5.3392s, d_loss: 3.03734398, g_loss: 6.15043831, rnn_loss: 0.41445935
Epoch: [ 0/600] [  82/ 276] time: 6.0953s, d_loss: 2.76614571, g_loss: 8.06743908, rnn_loss: 0.41888702
Epoch: [ 0/600] [  83/ 276] time: 5.3929s, d_loss: 2.06157708, g_loss: 7.65023899, rnn_loss: 0.36965406
Epoch: [ 0/600] [  84/ 276] time: 6.1096s, d_loss: 2.72530508, g_loss: 8.16736984, rnn_loss: 0.38652378
Epoch: [ 0/600] [  85/ 276] time: 5.1447s, d_loss: 2.39750481, g_loss: 9.42800808, rnn_loss: 0.42467290
Epoch: [ 0/600] [  86/ 276] time: 5.6534s, d_loss: 2.35646534, g_loss: 8.01881599, rnn_loss: 0.41260284
Epoch: [ 0/600] [  87/ 276] time: 4.7323s, d_loss: 3.04952931, g

Epoch: [ 0/600] [ 157/ 276] time: 5.9420s, d_loss: 2.31515121, g_loss: 2.12479353, rnn_loss: 0.32485321
Epoch: [ 0/600] [ 158/ 276] time: 5.1188s, d_loss: 2.86995721, g_loss: 5.89485264, rnn_loss: 0.38428402
Epoch: [ 0/600] [ 159/ 276] time: 5.4569s, d_loss: 2.00934982, g_loss: 5.71240091, rnn_loss: 0.34296393
Epoch: [ 0/600] [ 160/ 276] time: 5.3025s, d_loss: 3.18020320, g_loss: 1.13519013, rnn_loss: 0.41942060
Epoch: [ 0/600] [ 161/ 276] time: 4.9227s, d_loss: 2.72794867, g_loss: 5.38164520, rnn_loss: 0.34972164
Epoch: [ 0/600] [ 162/ 276] time: 3.6922s, d_loss: 1.88686323, g_loss: 5.01856422, rnn_loss: 0.40973467
Epoch: [ 0/600] [ 163/ 276] time: 6.1710s, d_loss: 1.82138300, g_loss: 3.77082682, rnn_loss: 0.41383132
Epoch: [ 0/600] [ 164/ 276] time: 5.9691s, d_loss: 2.01513839, g_loss: 5.56316376, rnn_loss: 0.37942716
Epoch: [ 0/600] [ 165/ 276] time: 4.6388s, d_loss: 2.39445162, g_loss: 2.91132379, rnn_loss: 0.35905597
Epoch: [ 0/600] [ 166/ 276] time: 5.6247s, d_loss: 1.77917659, g

## 3. Evaluation metric

In [24]:
def generate_r_precision_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids), (-1, cfg.TEXT.WORDS_NUM))
    captions_ids_wrong = np.reshape(test_dataset.random_wrong_captions(), (-1, cfg.WRONG_CAPTION, cfg.TEXT.WORDS_NUM))

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    # load the trained checkpoint
    cfg.CHECKPOINT_DIR = 'checkpoint'
    cfg.CHECKPOINT_NAME = 'model.ckpt-430'
    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    true_cnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    true_rnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    wrong_rnn_features = np.zeros((num_batches, cfg.WRONG_CAPTION, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)

    for i in range(num_batches):
        test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

        z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
        
        rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap})
        gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        cnn_features = sess.run(cnn_encoder.outputs, feed_dict={t_real_image: gen})

        true_cnn_features[i] = cnn_features
        true_rnn_features[i] = rnn_features

        for per_wrong_caption in range(cfg.WRONG_CAPTION):
            test_cap = captions_ids_wrong[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]
            rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap[:, per_wrong_caption]})
            wrong_rnn_features[i, per_wrong_caption] = rnn_features
    
    # if exists, remove the existing file first
    try:
        os.remove(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE))
    except OSError:
        pass
    np.savez(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE), true_cnn=true_cnn_features, true_rnn=true_rnn_features,
             wrong_rnn=wrong_rnn_features)

In [25]:
def generate_inception_score_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids),
                             (-1, cfg.TEXT.CAPTIONS_PER_IMAGE, cfg.TEXT.WORDS_NUM))
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    for i in range(num_batches):
        for per_caption in range(cfg.TEXT.CAPTIONS_PER_IMAGE):
            test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE, per_caption]
            test_directory = test_dataset.filenames[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

            z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
            gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
            
            for j in range(cfg.BATCH_SIZE):
                if not os.path.exists(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0])):
                    os.mkdir(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0]))

                scipy.misc.imsave(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j] + '_{}.png'.format(per_caption)), gen[j])

In [26]:
generate_r_precision_data()

INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt-430
Restored model parameters from checkpoint/model.ckpt-430


In [27]:
generate_inception_score_data()

INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt-430
Restored model parameters from checkpoint/model.ckpt-430


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


## 3. Measure Inception score and R-precision of given test dataset

After set the config file as 'eval_birds.yml' and run the 'generate_inception_score_data()' and 'generate_r_precision_data()', the synthesized images based on given captions and set of image and caption features should be saved inside a 'evaluation' folder, specifically in 'evaluation/generated_images/..' and as 'evaluation/r_precision.npz' respectively.

**Then, go to the 'evaluation' folder and run each 'inception_score.ipynb' and 'r_precision.ipynb' file in order to measure inception score and r-precision score.**