# M2177.003100 Deep Learning <br> Final Proejct: Text to Image Synthesis (Tensorflow)

Copyright (C) Data Science & AI Laboratory, Seoul National University. This material is for educational uses only. Some contents are based on the material provided by other paper/book authors and may be copyrighted by them. 

**For understanding of this work, please carefully look at given PPT file.**

**Note**: certain details are missing or ambiguous on purpose, in order to test your knowledge on the related materials. However, if you really feel that something essential is missing and cannot proceed to the next step, then contact the teaching staff with clear description of your problem.

### Submitting your work:
<font color=red>**DO NOT clear the training process **</font> so that TAs can grade both your code and results.  
**The TA will set a config file as 'eval_birds.yml' when evaluating the code using 'hidden test dataset'. Thus, please make sure that your code can generate proper data to measure inception score and R-precision of 'hidden test dataset'.**

## 1. Load datasets
The Birds dataset will be downloaded automatically if it is not located in the *data* directory. <br>

In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os, nltk

from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz
import numpy as np
import scipy
import time
import random

from utils.data_utils import CUBDataset
from utils.loss import cosine_similarity

from utils.data_utils import *

#################################################
# DO NOT CHANGE 
from utils.model import CNN_ENCODER, RNN_ENCODER, GENERATOR, DISCRIMINATOR
#################################################

%matplotlib inline

In [3]:
# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
cfg_from_file('cfg/train_birds.yml') # eval_birds.yml

print('Using config:')
pprint.pprint(cfg)

os.environ['CUDA_VISIBLE_DEVICES'] = '3' #cfg.GPU_ID

now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

Using config:
{'BATCH_SIZE': 32,
 'CHECKPOINT_DIR': './checkpoint',
 'CHECKPOINT_NAME': 'model.ckpt',
 'CNN': {'EMBEDDING_DIM': 0, 'H_DIM': 0},
 'CONFIG_NAME': 'text-to-image',
 'CUDA': False,
 'DATASET_NAME': 'birds',
 'DATA_DIR': 'data/birds',
 'EMBEDDING_TYPE': 'cnn-rnn',
 'GAN': {'B_ATTENTION': False,
         'B_CONDITION': False,
         'B_DCGAN': False,
         'CONDITION_DIM': 0,
         'DF_DIM': 0,
         'EMBEDDING_DIM': 0,
         'GF_DIM': 0,
         'R_NUM': 0,
         'Z_DIM': 512},
 'GPU_ID': '0',
 'IMAGE_SIZE': 256,
 'NUM_BATCH_FOR_TEST': 0,
 'RANDOM_SEED': 0,
 'RNN': {'EMBEDDING_DIM': 0,
         'H_DIM': 0,
         'TYPE': '',
         'VOCAB_SIZE': 0,
         'WORD_EMBEDDING_DIM': 0},
 'R_PRECISION_DIR': './evaluation',
 'R_PRECISION_FILE': 'r_precision.npz',
 'R_PRECISION_FILE_HIDDEN': 'r_precision_hidden.npz',
 'TEST': {'B_EXAMPLE': False,
          'GENERATED_HIDDEN_TEST_IMAGES': './evaluation/generated_images_hidden',
          'GENERATED_TEST_IMAGES'

  yaml_cfg = edict(yaml.load(f))


In [4]:
train_dataset = CUBDataset(cfg.DATA_DIR, split='train')
test_dataset = CUBDataset(cfg.DATA_DIR, split='test')

print(f'\ntrain data directory:\n{train_dataset.split_dir}')
print(f'test data directory:\n{test_dataset.split_dir}\n')

print(f'# of train filenames:{train_dataset.filenames.shape}')
print(f'# of test filenames:{test_dataset.filenames.shape}\n')

print(f'example of filename of train image:{train_dataset.filenames[0]}')
print(f'example of filename of valid image:{test_dataset.filenames[0]}\n')

print(f'example of caption and its ids:\n{train_dataset.captions[0]}\n{train_dataset.captions_ids[0]}\n')
print(f'example of caption and its ids:\n{test_dataset.captions[0]}\n{test_dataset.captions_ids[0]}\n')

print(f'# of train captions:{np.asarray(train_dataset.captions).shape}')
print(f'# of test captions:{np.asarray(test_dataset.captions).shape}\n')

print(f'# of train caption ids:{np.asarray(train_dataset.captions_ids).shape}')
print(f'# of test caption ids:{np.asarray(test_dataset.captions_ids).shape}\n')

print(f'# of train images:{train_dataset.images.shape}')
print(f'# of test images:{test_dataset.images.shape}\n')

self.current_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf

self.data_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011

Dataset already exists
self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle
self.current_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf

self.data_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-project-deep-learning-19-tf/data/birds/CUB_200_2011

Dataset already exists
self.image_dir:
/home/duclv/homework/deep-learning-2019/final_project/final-p

## 2. Define models and go to train/evaluate

In [5]:
###======================== DEFIINE VARIABLES ===================================###
lr = 0.0002
lr_decay = 0.5      
decay_every = 100  
beta1 = 0.5
batch_size = cfg.BATCH_SIZE
image_size = cfg.IMAGE_SIZE
z_dim = cfg.GAN.Z_DIM

tf.reset_default_graph()
from importlib import reload
import utils.model as model
model = reload(model)
RNN_ENCODER = model.RNN_ENCODER
GENERATOR = model.GENERATOR
DISCRIMINATOR = model.DISCRIMINATOR
CNN_ENCODER = model.CNN_ENCODER

In [6]:
###======================== DEFIINE PLACEHOLDER ===================================###
t_real_image = tf.placeholder('float32', [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3], name = 'real_image')
t_real_caption = tf.placeholder(dtype=tf.int64, shape=[cfg.BATCH_SIZE , None], name='real_caption_input')
t_wrong_image = tf.placeholder('float32', [cfg.BATCH_SIZE ,cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3], name = 'wrong_image')
t_wrong_caption = tf.placeholder(dtype=tf.int64, shape=[cfg.BATCH_SIZE , None], name='wrong_caption_input')
t_z = tf.placeholder(tf.float32, [cfg.BATCH_SIZE , cfg.GAN.Z_DIM], name='z_noise')

In [7]:
rnn_encoder = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=False, reuse=False)
generator = GENERATOR(t_z, rnn_encoder.outputs, is_training=False, reuse=False)
discriminator = DISCRIMINATOR(generator.outputs, rnn_encoder.outputs, is_training=False, reuse=False)
cnn_encoder = CNN_ENCODER(t_real_image, is_training=False, reuse=False)

In [8]:
### Define image and text mapping
net_cnn = CNN_ENCODER(t_real_image, is_training=True, reuse=True)
x = net_cnn.outputs
v = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=True, reuse=True).outputs
x_w = CNN_ENCODER(t_wrong_image, is_training=True, reuse=True).outputs
v_w = RNN_ENCODER(t_wrong_caption, cfg.BATCH_SIZE, is_training=True, reuse=True).outputs

alpha = 0.2 # margin alpha
rnn_loss = tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x, v_w))) + \
            tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x_w, v)))

In [9]:
### Define generative model
net_rnn = RNN_ENCODER(t_real_caption, cfg.BATCH_SIZE, is_training=False, reuse=True)
net_fake_image = GENERATOR(t_z, net_rnn.outputs, is_training=True, reuse=True)

net_disc_fake = DISCRIMINATOR(net_fake_image.outputs, net_rnn.outputs, is_training=True, reuse=True)
disc_fake_logits = net_disc_fake.logits

net_disc_real = DISCRIMINATOR(t_real_image, net_rnn.outputs, is_training=True, reuse=True)
disc_real_logits = net_disc_real.logits

net_disc_mismatch = DISCRIMINATOR(t_real_image, 
                            RNN_ENCODER(t_wrong_caption, cfg.BATCH_SIZE, is_training=False, reuse=True).outputs,
                            is_training=True, reuse=True)
disc_mismatch_logits = net_disc_mismatch.logits

d_loss1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_real_logits,     labels=tf.ones_like(disc_real_logits),      name='d1'))
d_loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_mismatch_logits, labels=tf.zeros_like(disc_mismatch_logits), name='d2'))
d_loss3 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits,     labels=tf.zeros_like(disc_fake_logits),     name='d3'))
d_loss = d_loss1 + (d_loss2 + d_loss3) * 0.5

g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits, labels=tf.ones_like(disc_fake_logits), name='g'))

In [10]:
### Define optimzers
rnn_vars = [var for var in tf.trainable_variables() if 'rnnencoder' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'generator' in var.name]
d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
cnn_vars = [var for var in tf.trainable_variables() if 'cnnencoder' in var.name]

update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'discriminator' in var.name]
update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'generator' in var.name]
update_ops_CNN = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'cnnencoder' in var.name]

'''print('----------Update_ops_D--------')
for var in update_ops_D:
    print(var.name)
print('----------Update_ops_G--------')
for var in update_ops_G:
    print(var.name)
print('----------Update_ops_CNN--------')
for var in update_ops_CNN:
    print(var.name)'''

with tf.variable_scope('learning_rate'):
    lr_v = tf.Variable(lr, trainable=False)

with tf.control_dependencies(update_ops_D):
    d_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(d_loss, var_list=d_vars)

with tf.control_dependencies(update_ops_G):
    g_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(g_loss, var_list=g_vars)

with tf.control_dependencies(update_ops_CNN):
    grads, _ = tf.clip_by_global_norm(tf.gradients(rnn_loss, rnn_vars + cnn_vars), 10)
    optimizer = tf.train.AdamOptimizer(lr_v, beta1=beta1)
    rnn_optim = optimizer.apply_gradients(zip(grads, rnn_vars + cnn_vars))

In [11]:
import threading
import scipy.ndimage as ndi
from skimage import transform
from skimage import exposure
import skimage

# Data Augmentation reference: https://github.com/tensorlayer/tensorlayer/tree/master/tensorlayer
def threading_data(data=None, fn=None, **kwargs):
    def apply_fn(results, i, data, kwargs):
        results[i] = data #fn(data, **kwargs)

    ## start multi-threaded reading.
    results = [None] * len(data) ## preallocate result list
    threads = []
    for i in range(len(data)):
        t = threading.Thread(
                        name='threading_and_return',
                        target=apply_fn,
                        args=(results, i, data[i], kwargs)
                        )
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    return np.asarray(results)

def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
    x = np.rollaxis(x, channel_index, 0)
    final_affine_matrix = transform_matrix[:2, :2]
    final_offset = transform_matrix[:2, 2]
    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
                      final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x]
    x = np.stack(channel_images, axis=0)
    x = np.rollaxis(x, 0, channel_index+1)
    return x

def transform_matrix_offset_center(matrix, x, y):
    o_x = float(x) / 2 + 0.5
    o_y = float(y) / 2 + 0.5
    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
    return transform_matrix

def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
                    fill_mode='nearest', cval=0.):
    if is_random:
        theta = np.pi / 180 * np.random.uniform(-rg, rg)
    else:
        theta = np.pi /180 * rg
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
                                [np.sin(theta), np.cos(theta), 0],
                                [0, 0, 1]])

    h, w = x.shape[row_index], x.shape[col_index]
    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
    return x

def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
    h, w = x.shape[row_index], x.shape[col_index]
    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
    if is_random:
        h_offset = int(np.random.uniform(0, h-hrg) -1)
        w_offset = int(np.random.uniform(0, w-wrg) -1)
        return x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset]
    else:   # central crop
        h_offset = int(np.floor((h - hrg)/2.))
        w_offset = int(np.floor((w - wrg)/2.))
        h_end = h_offset + hrg
        w_end = w_offset + wrg
        return x[h_offset: h_end, w_offset: w_end]

def flip_axis(x, axis, is_random=False):
    if is_random:
        factor = np.random.uniform(-1, 1)
        if factor > 0:
            x = np.asarray(x).swapaxes(axis, 0)
            x = x[::-1, ...]
            x = x.swapaxes(0, axis)
            return x
        else:
            return x
    else:
        x = np.asarray(x).swapaxes(axis, 0)
        x = x[::-1, ...]
        x = x.swapaxes(0, axis)
        return x

def imresize(x, size=[100, 100], interp='bilinear', mode=None):
    if x.shape[-1] == 1:
        # greyscale
        x = scipy.misc.imresize(x[:,:,0], size, interp=interp, mode=mode)
        return x[:, :, np.newaxis]
    elif x.shape[-1] == 3:
        # rgb, bgr ..
        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
    else:
        raise Exception("Unsupported channel %d" % x.shape[-1])

def prepro_img(x, mode=None):
    # rescale [0, 255] --> (-1, 1), random flip, crop, rotate

    if mode=='train':
        x = flip_axis(x, axis=1, is_random=True)
        x = rotation(x, rg=16, is_random=True, fill_mode='nearest')
        x = imresize(x, size=[image_size+35, image_size+35], interp='bilinear', mode=None)
        x = crop(x, wrg=image_size, hrg=image_size, is_random=True)
        x = x / (255. / 2.)
        x = x - 1.
        # x = x * 0.9999

    return x

def get_random_int(min=0, max=10, number=5):
    """Return a list of random integer by the given range and quantity.
    Examples
    ---------
    >>> r = get_random_int(min=0, max=10, number=5)
    ... [10, 2, 3, 3, 7]
    """
    return [random.randint(min,max) for p in range(0,number)]

In [None]:
### Train
sess = tf.Session(config=tf.ConfigProto())
init = tf.global_variables_initializer()
sess.run(init)

#saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=100)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)
checkpoint_dir = cfg.CHECKPOINT_DIR
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
'''if ckpt and ckpt.model_checkpoint_path:
    loader = tf.train.Saver(var_list=tf.global_variables())
    load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
    saver.restore(sess, ckpt.model_checkpoint_path)
    print("Restored model parameters from {}".format(ckpt_path))
else:
    print('no checkpoints find.')'''

n_epoch = cfg.TRAIN.MAX_EPOCH
n_images_train = len(train_dataset.images)
n_batch_epoch = int(n_images_train / batch_size)
train_captions = np.array(train_dataset.captions_ids)
train_images = np.array(train_dataset.images)
n_captions_train = len(train_captions)
n_captions_per_image = cfg.TEXT.CAPTIONS_PER_IMAGE

for epoch in range(cfg.TRAIN.MAX_EPOCH):
    #################################################
    # TODO: Implement text to image synthesis
    start_time = time.time()

    if epoch !=0 and (epoch % decay_every == 0):
        new_lr_decay = lr_decay ** (epoch // decay_every)
        sess.run(tf.assign(lr_v, lr * new_lr_decay))
        log = " ** new learning rate: %f" % (lr * new_lr_decay)
        print(log)

    elif epoch == 0:
        log = " ** init lr: %f  decay_every_epoch: %d, lr_decay: %f" % (lr, decay_every, lr_decay)
        print(log)
    
    for step in range(n_batch_epoch):
        step_time = time.time()

        ## get matched text & image
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_real_caption = train_captions[idexs]
        b_real_images = train_images[np.floor(np.asarray(idexs).astype('float')/n_captions_per_image).astype('int')]

        """ check for loading right images
        save_images(b_real_images, [ni, ni], 'train_samples/train_00.png')
        for caption in b_real_caption[:8]:
            print(IdList2sent(caption))
        exit()
        """

        ## get wrong caption & wrong image
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_wrong_caption = train_captions[idexs]
        idexs2 = get_random_int(min=0, max=n_images_train-1, number=batch_size)
        b_wrong_images = train_images[idexs2]

        ## get noise
        b_z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)

        b_real_images = threading_data(b_real_images, prepro_img, mode='train')   # [0, 255] --> [-1, 1] + augmentation
        b_wrong_images = threading_data(b_wrong_images, prepro_img, mode='train')

        ## update RNN
        if epoch < 80:
            errRNN, _ = sess.run([rnn_loss, rnn_optim], feed_dict={
                                            t_real_image : b_real_images,
                                            t_wrong_image : b_wrong_images,
                                            t_real_caption : b_real_caption,
                                            t_wrong_caption : b_wrong_caption})
        else:
            errRNN = 0

        ## updates D
        errD, _ = sess.run([d_loss, d_optim], feed_dict={
                        t_real_image : b_real_images,
                        t_wrong_caption : b_wrong_caption,
                        t_real_caption : b_real_caption,
                        t_z : b_z})
        ## updates G
        errG, _ = sess.run([g_loss, g_optim], feed_dict={
                        t_real_caption : b_real_caption,
                        t_z : b_z})

        print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4fs, d_loss: %.8f, g_loss: %.8f, rnn_loss: %.8f" \
                    % (epoch, n_epoch, step, n_batch_epoch, time.time() - step_time, errD, errG, errRNN))

    if (epoch + 1) % 1 == 0:
        print(" ** Epoch %d took %fs" % (epoch, time.time()-start_time))
        '''img_gen, rnn_out = sess.run([net_g.outputs, net_rnn.outputs], feed_dict={
                                    t_real_caption : sample_sentence,
                                    t_z : sample_seed})

        save_images(img_gen, [ni, ni], 'train_samples/train_{:02d}.png'.format(epoch))'''

    if (epoch != 0) and (epoch % 10) == 0:
        #################################################
        # save checkpoints
        checkpoint_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        saver.save(sess, checkpoint_path, global_step=epoch)
        print('The checkpoint has been created.')

 ** init lr: 0.000200  decay_every_epoch: 100, lr_decay: 0.500000
Epoch: [ 0/600] [   0/ 276] time: 55.2869s, d_loss: 4.16007280, g_loss: 0.07942918, rnn_loss: 0.39139336
Epoch: [ 0/600] [   1/ 276] time: 3.7368s, d_loss: 13.87337589, g_loss: 22.77612686, rnn_loss: 0.42251897
Epoch: [ 0/600] [   2/ 276] time: 3.7422s, d_loss: 4.09953928, g_loss: 16.37288857, rnn_loss: 0.35128343
Epoch: [ 0/600] [   3/ 276] time: 3.7483s, d_loss: 6.11808014, g_loss: 6.61497927, rnn_loss: 0.39881286
Epoch: [ 0/600] [   4/ 276] time: 3.7411s, d_loss: 5.01200771, g_loss: 30.65848160, rnn_loss: 0.41313750
Epoch: [ 0/600] [   5/ 276] time: 3.7104s, d_loss: 6.32387066, g_loss: 24.52893829, rnn_loss: 0.41684318
Epoch: [ 0/600] [   6/ 276] time: 3.7585s, d_loss: 3.65051436, g_loss: 15.54294491, rnn_loss: 0.36780831
Epoch: [ 0/600] [   7/ 276] time: 3.5741s, d_loss: 5.12466049, g_loss: 18.29709435, rnn_loss: 0.47264713
Epoch: [ 0/600] [   8/ 276] time: 3.5599s, d_loss: 4.25928974, g_loss: 13.77301407, rnn_loss: 

Epoch: [ 0/600] [  78/ 276] time: 3.5808s, d_loss: 2.80912209, g_loss: 5.63739300, rnn_loss: 0.35911918
Epoch: [ 0/600] [  79/ 276] time: 3.6412s, d_loss: 2.24622297, g_loss: 5.87053108, rnn_loss: 0.42442584
Epoch: [ 0/600] [  80/ 276] time: 3.6153s, d_loss: 2.89113855, g_loss: 4.98837280, rnn_loss: 0.33477572
Epoch: [ 0/600] [  81/ 276] time: 3.6137s, d_loss: 3.10320926, g_loss: 5.26163244, rnn_loss: 0.47184196
Epoch: [ 0/600] [  82/ 276] time: 3.5878s, d_loss: 1.97518945, g_loss: 2.08299780, rnn_loss: 0.41990560
Epoch: [ 0/600] [  83/ 276] time: 3.5959s, d_loss: 2.55480266, g_loss: 9.06345367, rnn_loss: 0.37490943
Epoch: [ 0/600] [  84/ 276] time: 3.5842s, d_loss: 2.37568498, g_loss: 4.30137825, rnn_loss: 0.38230443
Epoch: [ 0/600] [  85/ 276] time: 3.5800s, d_loss: 2.50229526, g_loss: 8.04148293, rnn_loss: 0.40785220
Epoch: [ 0/600] [  86/ 276] time: 3.5904s, d_loss: 2.36605024, g_loss: 4.94594145, rnn_loss: 0.38502944
Epoch: [ 0/600] [  87/ 276] time: 3.5803s, d_loss: 2.71006918, g

Epoch: [ 0/600] [ 157/ 276] time: 3.5796s, d_loss: 2.71002436, g_loss: 3.02970243, rnn_loss: 0.39578730
Epoch: [ 0/600] [ 158/ 276] time: 3.5779s, d_loss: 1.97470403, g_loss: 2.31319284, rnn_loss: 0.38005447
Epoch: [ 0/600] [ 159/ 276] time: 3.5835s, d_loss: 1.95334911, g_loss: 4.73461533, rnn_loss: 0.39319861
Epoch: [ 0/600] [ 160/ 276] time: 3.5895s, d_loss: 1.62461960, g_loss: 3.16406274, rnn_loss: 0.37456971
Epoch: [ 0/600] [ 161/ 276] time: 3.5781s, d_loss: 1.95011580, g_loss: 2.00341415, rnn_loss: 0.36516267
Epoch: [ 0/600] [ 162/ 276] time: 3.5830s, d_loss: 2.36315393, g_loss: 4.16792488, rnn_loss: 0.39587057
Epoch: [ 0/600] [ 163/ 276] time: 3.6112s, d_loss: 2.63088894, g_loss: 1.58857703, rnn_loss: 0.41746259
Epoch: [ 0/600] [ 164/ 276] time: 3.5793s, d_loss: 2.59066772, g_loss: 4.02055740, rnn_loss: 0.39489135
Epoch: [ 0/600] [ 165/ 276] time: 3.6149s, d_loss: 1.91531086, g_loss: 2.80771422, rnn_loss: 0.41245684
Epoch: [ 0/600] [ 166/ 276] time: 3.6016s, d_loss: 2.16514158, g

Epoch: [ 0/600] [ 236/ 276] time: 3.5863s, d_loss: 1.82462764, g_loss: 1.57256961, rnn_loss: 0.30304429
Epoch: [ 0/600] [ 237/ 276] time: 3.5959s, d_loss: 2.69437599, g_loss: 1.30602264, rnn_loss: 0.36016408
Epoch: [ 0/600] [ 238/ 276] time: 3.5811s, d_loss: 1.94722581, g_loss: 3.86980295, rnn_loss: 0.32490924
Epoch: [ 0/600] [ 239/ 276] time: 3.5934s, d_loss: 1.59851527, g_loss: 1.73156118, rnn_loss: 0.28559917
Epoch: [ 0/600] [ 240/ 276] time: 3.5757s, d_loss: 2.20682359, g_loss: 1.90149570, rnn_loss: 0.36417413
Epoch: [ 0/600] [ 241/ 276] time: 3.5565s, d_loss: 2.56060553, g_loss: 3.70670652, rnn_loss: 0.33964613
Epoch: [ 0/600] [ 242/ 276] time: 3.5537s, d_loss: 2.01462531, g_loss: 2.16558313, rnn_loss: 0.33073497
Epoch: [ 0/600] [ 243/ 276] time: 3.6083s, d_loss: 2.40315628, g_loss: 2.62518811, rnn_loss: 0.36386043
Epoch: [ 0/600] [ 244/ 276] time: 3.5890s, d_loss: 1.86878610, g_loss: 2.94165087, rnn_loss: 0.34604123
Epoch: [ 0/600] [ 245/ 276] time: 3.5805s, d_loss: 2.51315594, g

Epoch: [ 1/600] [  39/ 276] time: 3.6162s, d_loss: 1.96083903, g_loss: 0.96206719, rnn_loss: 0.34574950
Epoch: [ 1/600] [  40/ 276] time: 3.6022s, d_loss: 1.89862180, g_loss: 2.55492687, rnn_loss: 0.31253779
Epoch: [ 1/600] [  41/ 276] time: 3.5792s, d_loss: 1.67720819, g_loss: 1.87301707, rnn_loss: 0.33143771
Epoch: [ 1/600] [  42/ 276] time: 3.5698s, d_loss: 1.86976624, g_loss: 2.49510264, rnn_loss: 0.33956051
Epoch: [ 1/600] [  43/ 276] time: 3.5806s, d_loss: 2.04730392, g_loss: 2.55108929, rnn_loss: 0.28494644
Epoch: [ 1/600] [  44/ 276] time: 3.5829s, d_loss: 2.28556347, g_loss: 0.58043146, rnn_loss: 0.34361982
Epoch: [ 1/600] [  45/ 276] time: 3.5262s, d_loss: 2.26557136, g_loss: 3.02004743, rnn_loss: 0.27582192
Epoch: [ 1/600] [  46/ 276] time: 3.5800s, d_loss: 1.70706797, g_loss: 2.60828447, rnn_loss: 0.29503202
Epoch: [ 1/600] [  47/ 276] time: 3.6236s, d_loss: 1.71549439, g_loss: 0.70891058, rnn_loss: 0.28627145
Epoch: [ 1/600] [  48/ 276] time: 3.6963s, d_loss: 2.09692812, g

Epoch: [ 1/600] [ 118/ 276] time: 3.6206s, d_loss: 1.77980089, g_loss: 4.52469826, rnn_loss: 0.41709280
Epoch: [ 1/600] [ 119/ 276] time: 3.5808s, d_loss: 1.88690996, g_loss: 1.49025011, rnn_loss: 0.28900436
Epoch: [ 1/600] [ 120/ 276] time: 3.5978s, d_loss: 1.79045701, g_loss: 1.26943684, rnn_loss: 0.33236873
Epoch: [ 1/600] [ 121/ 276] time: 3.5761s, d_loss: 1.90524483, g_loss: 2.63229871, rnn_loss: 0.35082996
Epoch: [ 1/600] [ 122/ 276] time: 3.6739s, d_loss: 1.71228051, g_loss: 2.69023418, rnn_loss: 0.34405452
Epoch: [ 1/600] [ 123/ 276] time: 3.6860s, d_loss: 1.93378139, g_loss: 1.20265126, rnn_loss: 0.29242280
Epoch: [ 1/600] [ 124/ 276] time: 3.7004s, d_loss: 1.80545449, g_loss: 4.03573895, rnn_loss: 0.27136081
Epoch: [ 1/600] [ 125/ 276] time: 3.6214s, d_loss: 1.82080460, g_loss: 1.11576867, rnn_loss: 0.25657916
Epoch: [ 1/600] [ 126/ 276] time: 3.5546s, d_loss: 1.66898441, g_loss: 1.77860236, rnn_loss: 0.37004310
Epoch: [ 1/600] [ 127/ 276] time: 3.6184s, d_loss: 1.92631030, g

Epoch: [ 1/600] [ 197/ 276] time: 3.5713s, d_loss: 2.11086059, g_loss: 1.55860484, rnn_loss: 0.31264114
Epoch: [ 1/600] [ 198/ 276] time: 3.6766s, d_loss: 1.74136186, g_loss: 4.86272192, rnn_loss: 0.38751096
Epoch: [ 1/600] [ 199/ 276] time: 3.6868s, d_loss: 1.67662346, g_loss: 1.07050300, rnn_loss: 0.19680285
Epoch: [ 1/600] [ 200/ 276] time: 3.6189s, d_loss: 2.34546614, g_loss: 6.32136440, rnn_loss: 0.27501214
Epoch: [ 1/600] [ 201/ 276] time: 3.5736s, d_loss: 2.32623076, g_loss: 1.10547888, rnn_loss: 0.21107306
Epoch: [ 1/600] [ 202/ 276] time: 3.5798s, d_loss: 3.02215743, g_loss: 3.33741212, rnn_loss: 0.34812185
Epoch: [ 1/600] [ 203/ 276] time: 3.5816s, d_loss: 2.29423928, g_loss: 3.08821440, rnn_loss: 0.30550158
Epoch: [ 1/600] [ 204/ 276] time: 3.5721s, d_loss: 1.86604893, g_loss: 1.17374384, rnn_loss: 0.30627599
Epoch: [ 1/600] [ 205/ 276] time: 3.5872s, d_loss: 2.06411886, g_loss: 3.78251839, rnn_loss: 0.35577029
Epoch: [ 1/600] [ 206/ 276] time: 3.5861s, d_loss: 1.92041898, g

Epoch: [ 2/600] [   0/ 276] time: 3.5964s, d_loss: 3.04568648, g_loss: 0.68864655, rnn_loss: 0.23106042
Epoch: [ 2/600] [   1/ 276] time: 3.5870s, d_loss: 2.76746988, g_loss: 2.30887890, rnn_loss: 0.26803669
Epoch: [ 2/600] [   2/ 276] time: 3.6049s, d_loss: 1.71020389, g_loss: 4.32619715, rnn_loss: 0.28940326
Epoch: [ 2/600] [   3/ 276] time: 3.5856s, d_loss: 2.59307170, g_loss: 2.05020285, rnn_loss: 0.25390121
Epoch: [ 2/600] [   4/ 276] time: 3.5709s, d_loss: 2.93502784, g_loss: 2.43019485, rnn_loss: 0.24726202
Epoch: [ 2/600] [   5/ 276] time: 3.5546s, d_loss: 1.58987784, g_loss: 3.30708933, rnn_loss: 0.31982154
Epoch: [ 2/600] [   6/ 276] time: 3.6212s, d_loss: 1.53861904, g_loss: 4.39856625, rnn_loss: 0.28878015
Epoch: [ 2/600] [   7/ 276] time: 3.6978s, d_loss: 2.25813317, g_loss: 1.09206021, rnn_loss: 0.24735633
Epoch: [ 2/600] [   8/ 276] time: 3.6768s, d_loss: 1.75637209, g_loss: 2.56848192, rnn_loss: 0.24381992
Epoch: [ 2/600] [   9/ 276] time: 3.6192s, d_loss: 1.69949591, g

Epoch: [ 2/600] [  79/ 276] time: 3.6022s, d_loss: 1.98357558, g_loss: 4.26847553, rnn_loss: 0.29788044
Epoch: [ 2/600] [  80/ 276] time: 3.5781s, d_loss: 1.92067146, g_loss: 2.04544806, rnn_loss: 0.28290957
Epoch: [ 2/600] [  81/ 276] time: 3.5826s, d_loss: 2.06226015, g_loss: 5.57367992, rnn_loss: 0.24699037
Epoch: [ 2/600] [  82/ 276] time: 3.5740s, d_loss: 2.86266589, g_loss: 1.62593102, rnn_loss: 0.28051889
Epoch: [ 2/600] [  83/ 276] time: 3.5875s, d_loss: 2.16381192, g_loss: 4.77808142, rnn_loss: 0.25028720
Epoch: [ 2/600] [  84/ 276] time: 3.5673s, d_loss: 2.36053038, g_loss: 3.01492262, rnn_loss: 0.23323369
Epoch: [ 2/600] [  85/ 276] time: 3.5744s, d_loss: 1.80154705, g_loss: 4.23784447, rnn_loss: 0.35047266
Epoch: [ 2/600] [  86/ 276] time: 3.5848s, d_loss: 1.58233857, g_loss: 2.77896738, rnn_loss: 0.22759643
Epoch: [ 2/600] [  87/ 276] time: 3.5906s, d_loss: 1.72670317, g_loss: 3.40045500, rnn_loss: 0.19463488
Epoch: [ 2/600] [  88/ 276] time: 3.6125s, d_loss: 2.19273090, g

Epoch: [ 2/600] [ 158/ 276] time: 3.5954s, d_loss: 1.88863850, g_loss: 0.91705239, rnn_loss: 0.24948570
Epoch: [ 2/600] [ 159/ 276] time: 3.5995s, d_loss: 1.97404456, g_loss: 2.86362076, rnn_loss: 0.27285117
Epoch: [ 2/600] [ 160/ 276] time: 3.5607s, d_loss: 1.61006761, g_loss: 1.77435386, rnn_loss: 0.26288575
Epoch: [ 2/600] [ 161/ 276] time: 3.5701s, d_loss: 1.70683122, g_loss: 2.01883125, rnn_loss: 0.29751086
Epoch: [ 2/600] [ 162/ 276] time: 3.5575s, d_loss: 1.53979290, g_loss: 2.57749057, rnn_loss: 0.30835322
Epoch: [ 2/600] [ 163/ 276] time: 3.5984s, d_loss: 1.29435992, g_loss: 1.99341202, rnn_loss: 0.28712878
Epoch: [ 2/600] [ 164/ 276] time: 3.5783s, d_loss: 1.39610875, g_loss: 2.77430582, rnn_loss: 0.23592320
Epoch: [ 2/600] [ 165/ 276] time: 3.5779s, d_loss: 1.33774567, g_loss: 1.68685198, rnn_loss: 0.26825243
Epoch: [ 2/600] [ 166/ 276] time: 3.6119s, d_loss: 1.57106781, g_loss: 2.22799134, rnn_loss: 0.25150675
Epoch: [ 2/600] [ 167/ 276] time: 3.5738s, d_loss: 1.51539779, g

Epoch: [ 2/600] [ 237/ 276] time: 3.5277s, d_loss: 1.62039828, g_loss: 5.84879780, rnn_loss: 0.25654817
Epoch: [ 2/600] [ 238/ 276] time: 3.5519s, d_loss: 2.60651541, g_loss: 1.30814064, rnn_loss: 0.29676285
Epoch: [ 2/600] [ 239/ 276] time: 3.6260s, d_loss: 2.43198991, g_loss: 3.23216629, rnn_loss: 0.27271718
Epoch: [ 2/600] [ 240/ 276] time: 3.6927s, d_loss: 1.91557300, g_loss: 2.38552523, rnn_loss: 0.25710997
Epoch: [ 2/600] [ 241/ 276] time: 3.5950s, d_loss: 2.31640053, g_loss: 2.35125923, rnn_loss: 0.24394031
Epoch: [ 2/600] [ 242/ 276] time: 3.5610s, d_loss: 2.73475480, g_loss: 2.62840319, rnn_loss: 0.28745818
Epoch: [ 2/600] [ 243/ 276] time: 3.5967s, d_loss: 1.89695096, g_loss: 6.02543592, rnn_loss: 0.24228945
Epoch: [ 2/600] [ 244/ 276] time: 3.5979s, d_loss: 2.22347951, g_loss: 1.12109351, rnn_loss: 0.33630610
Epoch: [ 2/600] [ 245/ 276] time: 3.5820s, d_loss: 2.47154284, g_loss: 4.58988810, rnn_loss: 0.26641256
Epoch: [ 2/600] [ 246/ 276] time: 3.5745s, d_loss: 2.04027176, g

Epoch: [ 3/600] [  40/ 276] time: 3.6430s, d_loss: 1.70754743, g_loss: 2.88921738, rnn_loss: 0.28530866
Epoch: [ 3/600] [  41/ 276] time: 3.6720s, d_loss: 1.37009573, g_loss: 2.74233270, rnn_loss: 0.26058850
Epoch: [ 3/600] [  42/ 276] time: 3.6505s, d_loss: 1.76104057, g_loss: 2.57896900, rnn_loss: 0.19193658
Epoch: [ 3/600] [  43/ 276] time: 3.6868s, d_loss: 1.44768906, g_loss: 2.26626635, rnn_loss: 0.24686965
Epoch: [ 3/600] [  44/ 276] time: 3.6444s, d_loss: 1.58659267, g_loss: 1.43198371, rnn_loss: 0.21263027
Epoch: [ 3/600] [  45/ 276] time: 3.6355s, d_loss: 2.10792661, g_loss: 3.85982251, rnn_loss: 0.24169977
Epoch: [ 3/600] [  46/ 276] time: 3.6411s, d_loss: 1.85352814, g_loss: 1.90644538, rnn_loss: 0.28094792
Epoch: [ 3/600] [  47/ 276] time: 3.6292s, d_loss: 1.78044534, g_loss: 2.36277533, rnn_loss: 0.31760737
Epoch: [ 3/600] [  48/ 276] time: 3.6644s, d_loss: 1.73150420, g_loss: 2.74407744, rnn_loss: 0.28308809
Epoch: [ 3/600] [  49/ 276] time: 3.6149s, d_loss: 1.53237653, g

Epoch: [ 3/600] [ 119/ 276] time: 3.6578s, d_loss: 1.52948844, g_loss: 4.24436188, rnn_loss: 0.21434307
Epoch: [ 3/600] [ 120/ 276] time: 3.5985s, d_loss: 1.84262383, g_loss: 2.22442389, rnn_loss: 0.30931515
Epoch: [ 3/600] [ 121/ 276] time: 3.6191s, d_loss: 2.02144623, g_loss: 1.71442592, rnn_loss: 0.22789776
Epoch: [ 3/600] [ 122/ 276] time: 3.6189s, d_loss: 1.47390389, g_loss: 3.16477680, rnn_loss: 0.33500016
Epoch: [ 3/600] [ 123/ 276] time: 3.6387s, d_loss: 1.65839362, g_loss: 2.01883864, rnn_loss: 0.28089345
Epoch: [ 3/600] [ 124/ 276] time: 3.6588s, d_loss: 1.98680139, g_loss: 2.07632637, rnn_loss: 0.25464451
Epoch: [ 3/600] [ 125/ 276] time: 3.5945s, d_loss: 2.00697231, g_loss: 0.29961106, rnn_loss: 0.23889527
Epoch: [ 3/600] [ 126/ 276] time: 3.5781s, d_loss: 3.00575423, g_loss: 3.86879253, rnn_loss: 0.31744248
Epoch: [ 3/600] [ 127/ 276] time: 3.5834s, d_loss: 1.40102315, g_loss: 3.66035891, rnn_loss: 0.31849679
Epoch: [ 3/600] [ 128/ 276] time: 3.5791s, d_loss: 2.08111358, g

Epoch: [ 3/600] [ 198/ 276] time: 3.6839s, d_loss: 2.01387167, g_loss: 2.90536118, rnn_loss: 0.21322866
Epoch: [ 3/600] [ 199/ 276] time: 3.6697s, d_loss: 1.78638256, g_loss: 2.44796157, rnn_loss: 0.22582327
Epoch: [ 3/600] [ 200/ 276] time: 3.6669s, d_loss: 1.69197035, g_loss: 5.32280684, rnn_loss: 0.19904739
Epoch: [ 3/600] [ 201/ 276] time: 3.6499s, d_loss: 1.88533926, g_loss: 2.46545053, rnn_loss: 0.27088028
Epoch: [ 3/600] [ 202/ 276] time: 3.5838s, d_loss: 2.08519292, g_loss: 2.80172205, rnn_loss: 0.27604005
Epoch: [ 3/600] [ 203/ 276] time: 3.6976s, d_loss: 2.02457213, g_loss: 1.86334729, rnn_loss: 0.23247382
Epoch: [ 3/600] [ 204/ 276] time: 3.6503s, d_loss: 1.57051194, g_loss: 4.53280640, rnn_loss: 0.19053379
Epoch: [ 3/600] [ 205/ 276] time: 3.6790s, d_loss: 1.80631077, g_loss: 2.01729107, rnn_loss: 0.25096104
Epoch: [ 3/600] [ 206/ 276] time: 3.6755s, d_loss: 2.63839197, g_loss: 5.04967308, rnn_loss: 0.25599998
Epoch: [ 3/600] [ 207/ 276] time: 3.6793s, d_loss: 1.95015550, g

Epoch: [ 4/600] [   1/ 276] time: 3.6606s, d_loss: 2.00917292, g_loss: 2.04481673, rnn_loss: 0.22918040
Epoch: [ 4/600] [   2/ 276] time: 3.6471s, d_loss: 1.59930432, g_loss: 2.18019342, rnn_loss: 0.21796070
Epoch: [ 4/600] [   3/ 276] time: 3.6908s, d_loss: 1.38813019, g_loss: 2.44353628, rnn_loss: 0.20881844
Epoch: [ 4/600] [   4/ 276] time: 3.6641s, d_loss: 1.52812052, g_loss: 2.22775459, rnn_loss: 0.16371933
Epoch: [ 4/600] [   5/ 276] time: 3.6734s, d_loss: 1.74393320, g_loss: 2.32960653, rnn_loss: 0.19517712
Epoch: [ 4/600] [   6/ 276] time: 3.6414s, d_loss: 2.20783281, g_loss: 2.09275770, rnn_loss: 0.25006658
Epoch: [ 4/600] [   7/ 276] time: 3.6742s, d_loss: 1.75764227, g_loss: 1.96371102, rnn_loss: 0.25287676
Epoch: [ 4/600] [   8/ 276] time: 3.6606s, d_loss: 1.58372927, g_loss: 1.98371923, rnn_loss: 0.18082991
Epoch: [ 4/600] [   9/ 276] time: 3.6755s, d_loss: 1.37013221, g_loss: 4.03608179, rnn_loss: 0.18837351
Epoch: [ 4/600] [  10/ 276] time: 3.6809s, d_loss: 1.76151645, g

Epoch: [ 4/600] [  80/ 276] time: 3.6870s, d_loss: 1.64404047, g_loss: 3.47062540, rnn_loss: 0.28533250
Epoch: [ 4/600] [  81/ 276] time: 3.6998s, d_loss: 1.71732008, g_loss: 2.28580189, rnn_loss: 0.26960340
Epoch: [ 4/600] [  82/ 276] time: 3.6980s, d_loss: 1.49103689, g_loss: 3.19171762, rnn_loss: 0.22259428
Epoch: [ 4/600] [  83/ 276] time: 3.6813s, d_loss: 1.79905379, g_loss: 1.19614244, rnn_loss: 0.28895015
Epoch: [ 4/600] [  84/ 276] time: 3.6834s, d_loss: 1.81530690, g_loss: 3.07303190, rnn_loss: 0.23004524
Epoch: [ 4/600] [  85/ 276] time: 3.6899s, d_loss: 1.44053125, g_loss: 2.01943755, rnn_loss: 0.25307304
Epoch: [ 4/600] [  86/ 276] time: 3.6607s, d_loss: 1.58183551, g_loss: 0.86068350, rnn_loss: 0.23802038
Epoch: [ 4/600] [  87/ 276] time: 3.6936s, d_loss: 2.01659393, g_loss: 3.25480175, rnn_loss: 0.13267513
Epoch: [ 4/600] [  88/ 276] time: 3.6655s, d_loss: 1.38670897, g_loss: 3.20063066, rnn_loss: 0.26040566
Epoch: [ 4/600] [  89/ 276] time: 3.7378s, d_loss: 1.56173122, g

Epoch: [ 4/600] [ 159/ 276] time: 3.6631s, d_loss: 1.76764798, g_loss: 3.19575047, rnn_loss: 0.27418405
Epoch: [ 4/600] [ 160/ 276] time: 3.6726s, d_loss: 1.55657363, g_loss: 2.51043463, rnn_loss: 0.31593174
Epoch: [ 4/600] [ 161/ 276] time: 3.6707s, d_loss: 1.40302598, g_loss: 2.91324949, rnn_loss: 0.24680582
Epoch: [ 4/600] [ 162/ 276] time: 3.6735s, d_loss: 1.29680014, g_loss: 2.57565546, rnn_loss: 0.22483233
Epoch: [ 4/600] [ 163/ 276] time: 3.6427s, d_loss: 1.30812681, g_loss: 1.54705155, rnn_loss: 0.27485549
Epoch: [ 4/600] [ 164/ 276] time: 3.6964s, d_loss: 1.49270606, g_loss: 2.11495233, rnn_loss: 0.18253261
Epoch: [ 4/600] [ 165/ 276] time: 3.6611s, d_loss: 2.02099204, g_loss: 4.02898026, rnn_loss: 0.21433122
Epoch: [ 4/600] [ 166/ 276] time: 3.6690s, d_loss: 1.39756227, g_loss: 3.31538081, rnn_loss: 0.21468800
Epoch: [ 4/600] [ 167/ 276] time: 3.6782s, d_loss: 2.04945135, g_loss: 3.12633085, rnn_loss: 0.22376655
Epoch: [ 4/600] [ 168/ 276] time: 3.7053s, d_loss: 2.23007393, g

Epoch: [ 4/600] [ 238/ 276] time: 3.6585s, d_loss: 1.74460387, g_loss: 2.90209484, rnn_loss: 0.22116846
Epoch: [ 4/600] [ 239/ 276] time: 3.6854s, d_loss: 1.28111100, g_loss: 4.19725513, rnn_loss: 0.26114488
Epoch: [ 4/600] [ 240/ 276] time: 3.6747s, d_loss: 1.13795948, g_loss: 2.03374600, rnn_loss: 0.23387262
Epoch: [ 4/600] [ 241/ 276] time: 3.6730s, d_loss: 1.41651917, g_loss: 1.46575999, rnn_loss: 0.24874809
Epoch: [ 4/600] [ 242/ 276] time: 3.6896s, d_loss: 1.43244267, g_loss: 3.79788017, rnn_loss: 0.18240246
Epoch: [ 4/600] [ 243/ 276] time: 3.6679s, d_loss: 1.74749672, g_loss: 1.62136340, rnn_loss: 0.16071214
Epoch: [ 4/600] [ 244/ 276] time: 3.6887s, d_loss: 2.23493958, g_loss: 3.31245375, rnn_loss: 0.19957069
Epoch: [ 4/600] [ 245/ 276] time: 3.6450s, d_loss: 1.65694964, g_loss: 1.19481850, rnn_loss: 0.24031138
Epoch: [ 4/600] [ 246/ 276] time: 3.6644s, d_loss: 1.74908459, g_loss: 3.83898878, rnn_loss: 0.21120118
Epoch: [ 4/600] [ 247/ 276] time: 3.6680s, d_loss: 1.62680674, g

Epoch: [ 5/600] [  41/ 276] time: 3.6646s, d_loss: 2.05635691, g_loss: 3.37279081, rnn_loss: 0.24805777
Epoch: [ 5/600] [  42/ 276] time: 3.6746s, d_loss: 1.24874234, g_loss: 3.54158235, rnn_loss: 0.28113919
Epoch: [ 5/600] [  43/ 276] time: 3.6640s, d_loss: 2.07426906, g_loss: 0.73866868, rnn_loss: 0.26666525
Epoch: [ 5/600] [  44/ 276] time: 3.6964s, d_loss: 2.16251206, g_loss: 4.57473564, rnn_loss: 0.26411924
Epoch: [ 5/600] [  45/ 276] time: 3.6597s, d_loss: 1.92938578, g_loss: 2.71944284, rnn_loss: 0.22314993
Epoch: [ 5/600] [  46/ 276] time: 3.6553s, d_loss: 1.20016503, g_loss: 2.69724512, rnn_loss: 0.22218779
Epoch: [ 5/600] [  47/ 276] time: 3.6746s, d_loss: 1.39106905, g_loss: 3.28921819, rnn_loss: 0.27651811
Epoch: [ 5/600] [  48/ 276] time: 3.6726s, d_loss: 1.27112007, g_loss: 6.24888515, rnn_loss: 0.21230268
Epoch: [ 5/600] [  49/ 276] time: 3.6707s, d_loss: 2.15780926, g_loss: 1.85919070, rnn_loss: 0.25561672
Epoch: [ 5/600] [  50/ 276] time: 3.6758s, d_loss: 1.26210475, g

Epoch: [ 5/600] [ 120/ 276] time: 3.6748s, d_loss: 1.13000762, g_loss: 1.60345960, rnn_loss: 0.19482881
Epoch: [ 5/600] [ 121/ 276] time: 3.6547s, d_loss: 1.91658163, g_loss: 0.81291926, rnn_loss: 0.19460040
Epoch: [ 5/600] [ 122/ 276] time: 3.6585s, d_loss: 2.00401425, g_loss: 3.12449169, rnn_loss: 0.21113406
Epoch: [ 5/600] [ 123/ 276] time: 3.6813s, d_loss: 1.60862768, g_loss: 2.58248091, rnn_loss: 0.28237301
Epoch: [ 5/600] [ 124/ 276] time: 3.6767s, d_loss: 1.66603839, g_loss: 2.01445532, rnn_loss: 0.18119743
Epoch: [ 5/600] [ 125/ 276] time: 3.6428s, d_loss: 1.58609867, g_loss: 1.83359551, rnn_loss: 0.27545899
Epoch: [ 5/600] [ 126/ 276] time: 3.6719s, d_loss: 1.82046771, g_loss: 1.21618819, rnn_loss: 0.17163655
Epoch: [ 5/600] [ 127/ 276] time: 3.6631s, d_loss: 1.42045557, g_loss: 2.28654861, rnn_loss: 0.23458888
Epoch: [ 5/600] [ 128/ 276] time: 3.6846s, d_loss: 1.50462127, g_loss: 1.50297427, rnn_loss: 0.27133316
Epoch: [ 5/600] [ 129/ 276] time: 3.6946s, d_loss: 1.96892297, g

Epoch: [ 5/600] [ 199/ 276] time: 3.6817s, d_loss: 1.37212110, g_loss: 1.80596840, rnn_loss: 0.19166481
Epoch: [ 5/600] [ 200/ 276] time: 3.6814s, d_loss: 1.26565540, g_loss: 2.09917116, rnn_loss: 0.18159667
Epoch: [ 5/600] [ 201/ 276] time: 3.6692s, d_loss: 1.59192777, g_loss: 6.05351686, rnn_loss: 0.25355130
Epoch: [ 5/600] [ 202/ 276] time: 3.6711s, d_loss: 1.73063827, g_loss: 2.58403492, rnn_loss: 0.19060743
Epoch: [ 5/600] [ 203/ 276] time: 3.6606s, d_loss: 1.15829587, g_loss: 0.97505903, rnn_loss: 0.19327694
Epoch: [ 5/600] [ 204/ 276] time: 3.6741s, d_loss: 2.20708776, g_loss: 5.64163017, rnn_loss: 0.21264307
Epoch: [ 5/600] [ 205/ 276] time: 3.6279s, d_loss: 2.64376640, g_loss: 0.79031521, rnn_loss: 0.16473663
Epoch: [ 5/600] [ 206/ 276] time: 3.7002s, d_loss: 2.43280840, g_loss: 4.44460630, rnn_loss: 0.14200076
Epoch: [ 5/600] [ 207/ 276] time: 3.7437s, d_loss: 1.72275138, g_loss: 3.20444202, rnn_loss: 0.20259249
Epoch: [ 5/600] [ 208/ 276] time: 3.7368s, d_loss: 1.14147925, g

Epoch: [ 6/600] [   2/ 276] time: 3.6557s, d_loss: 2.10781026, g_loss: 0.97857261, rnn_loss: 0.15621056
Epoch: [ 6/600] [   3/ 276] time: 3.7175s, d_loss: 1.16717863, g_loss: 2.23250103, rnn_loss: 0.26285240
Epoch: [ 6/600] [   4/ 276] time: 3.7046s, d_loss: 1.49388838, g_loss: 1.43918550, rnn_loss: 0.25241691
Epoch: [ 6/600] [   5/ 276] time: 3.6635s, d_loss: 1.33357000, g_loss: 2.11336207, rnn_loss: 0.22751522
Epoch: [ 6/600] [   6/ 276] time: 3.6116s, d_loss: 1.23592234, g_loss: 2.69141054, rnn_loss: 0.20215982
Epoch: [ 6/600] [   7/ 276] time: 3.6960s, d_loss: 2.10457206, g_loss: 0.72991800, rnn_loss: 0.22857577
Epoch: [ 6/600] [   8/ 276] time: 3.6471s, d_loss: 2.29201937, g_loss: 2.54590440, rnn_loss: 0.15309815
Epoch: [ 6/600] [   9/ 276] time: 3.6928s, d_loss: 1.63238549, g_loss: 1.64711607, rnn_loss: 0.17044851
Epoch: [ 6/600] [  10/ 276] time: 3.6853s, d_loss: 1.41685486, g_loss: 1.67708373, rnn_loss: 0.17140847
Epoch: [ 6/600] [  11/ 276] time: 3.7003s, d_loss: 1.95565629, g

Epoch: [ 6/600] [  81/ 276] time: 3.6727s, d_loss: 1.34596860, g_loss: 3.79482627, rnn_loss: 0.23593238
Epoch: [ 6/600] [  82/ 276] time: 3.6581s, d_loss: 1.41569686, g_loss: 4.07638025, rnn_loss: 0.26762235
Epoch: [ 6/600] [  83/ 276] time: 3.5979s, d_loss: 1.24391007, g_loss: 4.13093376, rnn_loss: 0.21003214
Epoch: [ 6/600] [  84/ 276] time: 3.6962s, d_loss: 1.36314416, g_loss: 1.78309011, rnn_loss: 0.21630709
Epoch: [ 6/600] [  85/ 276] time: 3.6861s, d_loss: 2.18751192, g_loss: 3.62447596, rnn_loss: 0.28516376
Epoch: [ 6/600] [  86/ 276] time: 3.6582s, d_loss: 1.48125386, g_loss: 3.67200565, rnn_loss: 0.20821829
Epoch: [ 6/600] [  87/ 276] time: 3.6965s, d_loss: 1.24694479, g_loss: 2.20475197, rnn_loss: 0.18887460
Epoch: [ 6/600] [  88/ 276] time: 3.6504s, d_loss: 1.34567332, g_loss: 2.59973097, rnn_loss: 0.25956708
Epoch: [ 6/600] [  89/ 276] time: 3.6846s, d_loss: 1.62170565, g_loss: 2.45573711, rnn_loss: 0.26810080
Epoch: [ 6/600] [  90/ 276] time: 3.6831s, d_loss: 1.61859965, g

Epoch: [ 6/600] [ 160/ 276] time: 4.3096s, d_loss: 1.09394050, g_loss: 2.32935429, rnn_loss: 0.13693675
Epoch: [ 6/600] [ 161/ 276] time: 4.7101s, d_loss: 1.32444561, g_loss: 1.55749059, rnn_loss: 0.25017893
Epoch: [ 6/600] [ 162/ 276] time: 6.0492s, d_loss: 1.15857494, g_loss: 2.63941860, rnn_loss: 0.24315409
Epoch: [ 6/600] [ 163/ 276] time: 5.3590s, d_loss: 1.20244992, g_loss: 3.99541306, rnn_loss: 0.28399634
Epoch: [ 6/600] [ 164/ 276] time: 5.5685s, d_loss: 1.61351550, g_loss: 1.40825784, rnn_loss: 0.20187563
Epoch: [ 6/600] [ 165/ 276] time: 5.0829s, d_loss: 1.04195642, g_loss: 2.36597109, rnn_loss: 0.21366146
Epoch: [ 6/600] [ 166/ 276] time: 5.8590s, d_loss: 1.28887486, g_loss: 3.01991367, rnn_loss: 0.24897936
Epoch: [ 6/600] [ 167/ 276] time: 5.7116s, d_loss: 1.55538690, g_loss: 1.50768983, rnn_loss: 0.19144715
Epoch: [ 6/600] [ 168/ 276] time: 5.2253s, d_loss: 1.59040689, g_loss: 2.31103468, rnn_loss: 0.18887365
Epoch: [ 6/600] [ 169/ 276] time: 5.7440s, d_loss: 0.98719317, g

Epoch: [ 6/600] [ 239/ 276] time: 5.2048s, d_loss: 1.72685838, g_loss: 2.03677797, rnn_loss: 0.18654297
Epoch: [ 6/600] [ 240/ 276] time: 5.7819s, d_loss: 2.10562897, g_loss: 5.40113926, rnn_loss: 0.15031430
Epoch: [ 6/600] [ 241/ 276] time: 3.5231s, d_loss: 1.90565276, g_loss: 1.40638113, rnn_loss: 0.19590975
Epoch: [ 6/600] [ 242/ 276] time: 3.5168s, d_loss: 1.83232152, g_loss: 5.55493069, rnn_loss: 0.16417634
Epoch: [ 6/600] [ 243/ 276] time: 7.1479s, d_loss: 2.94339204, g_loss: 0.79721236, rnn_loss: 0.17108802
Epoch: [ 6/600] [ 244/ 276] time: 3.7289s, d_loss: 2.09053516, g_loss: 2.30018306, rnn_loss: 0.16482808
Epoch: [ 6/600] [ 245/ 276] time: 7.1455s, d_loss: 1.31772923, g_loss: 3.58380842, rnn_loss: 0.20642920
Epoch: [ 6/600] [ 246/ 276] time: 3.7313s, d_loss: 1.84160924, g_loss: 1.90412474, rnn_loss: 0.23838413
Epoch: [ 6/600] [ 247/ 276] time: 6.6129s, d_loss: 1.79041958, g_loss: 1.36226678, rnn_loss: 0.22942588
Epoch: [ 6/600] [ 248/ 276] time: 3.8749s, d_loss: 1.30412555, g

Epoch: [ 7/600] [  42/ 276] time: 5.8829s, d_loss: 1.70596981, g_loss: 2.92970848, rnn_loss: 0.16606227
Epoch: [ 7/600] [  43/ 276] time: 5.5545s, d_loss: 1.57171452, g_loss: 1.50850093, rnn_loss: 0.18280491
Epoch: [ 7/600] [  44/ 276] time: 5.3701s, d_loss: 1.35405302, g_loss: 1.72425783, rnn_loss: 0.20033640
Epoch: [ 7/600] [  45/ 276] time: 4.1269s, d_loss: 1.22355974, g_loss: 2.15711260, rnn_loss: 0.23911008
Epoch: [ 7/600] [  46/ 276] time: 4.5366s, d_loss: 1.33801579, g_loss: 1.81830978, rnn_loss: 0.21293060
Epoch: [ 7/600] [  47/ 276] time: 3.9382s, d_loss: 1.40654540, g_loss: 1.36766315, rnn_loss: 0.18206844
Epoch: [ 7/600] [  48/ 276] time: 4.1516s, d_loss: 1.26507568, g_loss: 3.52746677, rnn_loss: 0.26968041
Epoch: [ 7/600] [  49/ 276] time: 3.5532s, d_loss: 1.36705804, g_loss: 2.66185880, rnn_loss: 0.24799074
Epoch: [ 7/600] [  50/ 276] time: 3.6468s, d_loss: 1.60176826, g_loss: 0.91410148, rnn_loss: 0.22792298
Epoch: [ 7/600] [  51/ 276] time: 7.2925s, d_loss: 1.23909831, g

Epoch: [ 7/600] [ 121/ 276] time: 3.9690s, d_loss: 1.41818166, g_loss: 4.49922371, rnn_loss: 0.16928652
Epoch: [ 7/600] [ 122/ 276] time: 6.7241s, d_loss: 1.02809811, g_loss: 4.60924816, rnn_loss: 0.18840846
Epoch: [ 7/600] [ 123/ 276] time: 5.6619s, d_loss: 1.23654997, g_loss: 2.15287995, rnn_loss: 0.16358405
Epoch: [ 7/600] [ 124/ 276] time: 4.8720s, d_loss: 1.50852728, g_loss: 4.10394955, rnn_loss: 0.16917309
Epoch: [ 7/600] [ 125/ 276] time: 5.7716s, d_loss: 0.97308266, g_loss: 3.47506881, rnn_loss: 0.17038813
Epoch: [ 7/600] [ 126/ 276] time: 5.2480s, d_loss: 1.11090016, g_loss: 2.62948370, rnn_loss: 0.24677266
Epoch: [ 7/600] [ 127/ 276] time: 5.6449s, d_loss: 1.41278231, g_loss: 4.00973129, rnn_loss: 0.18896480
Epoch: [ 7/600] [ 128/ 276] time: 4.6128s, d_loss: 1.44031096, g_loss: 1.97261596, rnn_loss: 0.15756361
Epoch: [ 7/600] [ 129/ 276] time: 4.5841s, d_loss: 1.39546561, g_loss: 2.16090298, rnn_loss: 0.23637238
Epoch: [ 7/600] [ 130/ 276] time: 3.5221s, d_loss: 1.18122053, g

Epoch: [ 7/600] [ 200/ 276] time: 3.6973s, d_loss: 1.58006322, g_loss: 0.87952256, rnn_loss: 0.20055839
Epoch: [ 7/600] [ 201/ 276] time: 6.8798s, d_loss: 1.28012252, g_loss: 2.82862663, rnn_loss: 0.15765420
Epoch: [ 7/600] [ 202/ 276] time: 3.9320s, d_loss: 0.91271067, g_loss: 3.05593872, rnn_loss: 0.11913800
Epoch: [ 7/600] [ 203/ 276] time: 6.7265s, d_loss: 1.04675674, g_loss: 1.63355613, rnn_loss: 0.25645104
Epoch: [ 7/600] [ 204/ 276] time: 3.7000s, d_loss: 1.75182867, g_loss: 0.68622828, rnn_loss: 0.13564086
Epoch: [ 7/600] [ 205/ 276] time: 6.5717s, d_loss: 1.88415015, g_loss: 4.62045097, rnn_loss: 0.22009626
Epoch: [ 7/600] [ 206/ 276] time: 4.1569s, d_loss: 2.43136597, g_loss: 1.58299780, rnn_loss: 0.24775025
Epoch: [ 7/600] [ 207/ 276] time: 7.1378s, d_loss: 1.37673461, g_loss: 1.68105245, rnn_loss: 0.23869115
Epoch: [ 7/600] [ 208/ 276] time: 4.2408s, d_loss: 1.20255959, g_loss: 4.33334875, rnn_loss: 0.15312168
Epoch: [ 7/600] [ 209/ 276] time: 3.4947s, d_loss: 1.62693906, g

Epoch: [ 8/600] [   3/ 276] time: 5.3288s, d_loss: 0.99020088, g_loss: 2.82469034, rnn_loss: 0.21084729
Epoch: [ 8/600] [   4/ 276] time: 5.5536s, d_loss: 1.48921406, g_loss: 2.17799640, rnn_loss: 0.18925416
Epoch: [ 8/600] [   5/ 276] time: 4.7139s, d_loss: 1.51720512, g_loss: 0.79212797, rnn_loss: 0.25170082
Epoch: [ 8/600] [   6/ 276] time: 6.4613s, d_loss: 1.26051199, g_loss: 2.23050356, rnn_loss: 0.19912067
Epoch: [ 8/600] [   7/ 276] time: 4.6335s, d_loss: 1.33482277, g_loss: 1.52231884, rnn_loss: 0.24642673
Epoch: [ 8/600] [   8/ 276] time: 6.3295s, d_loss: 1.26564372, g_loss: 1.65149045, rnn_loss: 0.20058647
Epoch: [ 8/600] [   9/ 276] time: 5.8836s, d_loss: 1.39306271, g_loss: 2.62598825, rnn_loss: 0.27711484
Epoch: [ 8/600] [  10/ 276] time: 5.1058s, d_loss: 1.19326568, g_loss: 2.32397413, rnn_loss: 0.17292370
Epoch: [ 8/600] [  11/ 276] time: 5.6889s, d_loss: 1.20981526, g_loss: 3.77074933, rnn_loss: 0.22094548
Epoch: [ 8/600] [  12/ 276] time: 5.1492s, d_loss: 1.54772270, g

Epoch: [ 8/600] [  82/ 276] time: 4.1149s, d_loss: 1.00302458, g_loss: 2.72710228, rnn_loss: 0.22334883
Epoch: [ 8/600] [  83/ 276] time: 5.8480s, d_loss: 1.57552624, g_loss: 2.99288249, rnn_loss: 0.20993571
Epoch: [ 8/600] [  84/ 276] time: 3.6117s, d_loss: 1.53131151, g_loss: 0.88148350, rnn_loss: 0.14014331
Epoch: [ 8/600] [  85/ 276] time: 6.3284s, d_loss: 2.46832204, g_loss: 3.42512751, rnn_loss: 0.19344732
Epoch: [ 8/600] [  86/ 276] time: 4.0699s, d_loss: 1.28277636, g_loss: 2.32897329, rnn_loss: 0.19921796
Epoch: [ 8/600] [  87/ 276] time: 5.2142s, d_loss: 0.93257344, g_loss: 2.17476416, rnn_loss: 0.28360206
Epoch: [ 8/600] [  88/ 276] time: 4.2576s, d_loss: 1.22891593, g_loss: 2.61578107, rnn_loss: 0.22425175
Epoch: [ 8/600] [  89/ 276] time: 4.8258s, d_loss: 0.86697125, g_loss: 2.28077126, rnn_loss: 0.21091908
Epoch: [ 8/600] [  90/ 276] time: 4.3578s, d_loss: 1.03841519, g_loss: 1.56072235, rnn_loss: 0.27832195
Epoch: [ 8/600] [  91/ 276] time: 5.9290s, d_loss: 1.43139195, g

Epoch: [ 8/600] [ 161/ 276] time: 6.0676s, d_loss: 1.25713706, g_loss: 1.86090541, rnn_loss: 0.24698447
Epoch: [ 8/600] [ 162/ 276] time: 3.6856s, d_loss: 1.37408507, g_loss: 2.41895962, rnn_loss: 0.22080399
Epoch: [ 8/600] [ 163/ 276] time: 6.1081s, d_loss: 0.96049893, g_loss: 2.01659846, rnn_loss: 0.15570152
Epoch: [ 8/600] [ 164/ 276] time: 3.6275s, d_loss: 1.16055083, g_loss: 2.45525813, rnn_loss: 0.18615803
Epoch: [ 8/600] [ 165/ 276] time: 7.1927s, d_loss: 1.10683894, g_loss: 3.32752371, rnn_loss: 0.22002760
Epoch: [ 8/600] [ 166/ 276] time: 3.6850s, d_loss: 1.05875838, g_loss: 1.64037240, rnn_loss: 0.10370796
Epoch: [ 8/600] [ 167/ 276] time: 5.6224s, d_loss: 2.55429912, g_loss: 9.78082848, rnn_loss: 0.14731777
Epoch: [ 8/600] [ 168/ 276] time: 5.0474s, d_loss: 4.01158047, g_loss: 3.01826859, rnn_loss: 0.25886863
Epoch: [ 8/600] [ 169/ 276] time: 6.6169s, d_loss: 1.34412217, g_loss: 1.17286968, rnn_loss: 0.19496025
Epoch: [ 8/600] [ 170/ 276] time: 4.1417s, d_loss: 2.43384600, g

## 3. Evaluation metric

In [25]:
def generate_r_precision_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids), (-1, cfg.TEXT.WORDS_NUM))
    captions_ids_wrong = np.reshape(test_dataset.random_wrong_captions(), (-1, cfg.WRONG_CAPTION, cfg.TEXT.WORDS_NUM))

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    # load the trained checkpoint
    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    true_cnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    true_rnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    wrong_rnn_features = np.zeros((num_batches, cfg.WRONG_CAPTION, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)

    for i in range(num_batches):
        test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

        z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
        
        rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap})
        gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        cnn_features = sess.run(cnn_encoder.outputs, feed_dict={t_real_image: gen})

        true_cnn_features[i] = cnn_features
        true_rnn_features[i] = rnn_features

        for per_wrong_caption in range(cfg.WRONG_CAPTION):
            test_cap = captions_ids_wrong[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]
            rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap[:, per_wrong_caption]})
            wrong_rnn_features[i, per_wrong_caption] = rnn_features
    
    # if exists, remove the existing file first
    try:
        os.remove(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE))
    except OSError:
        pass
    np.savez(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE), true_cnn=true_cnn_features, true_rnn=true_rnn_features,
             wrong_rnn=wrong_rnn_features)

In [27]:
def generate_inception_score_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids),
                             (-1, cfg.TEXT.CAPTIONS_PER_IMAGE, cfg.TEXT.WORDS_NUM))
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    for i in range(num_batches):
        for per_caption in range(cfg.TEXT.CAPTIONS_PER_IMAGE):
            test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE, per_caption]
            test_directory = test_dataset.filenames[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

            z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
            gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
            
            for j in range(cfg.BATCH_SIZE):
                if not os.path.exists(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0])):
                    os.mkdir(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0]))

                scipy.misc.imsave(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j] + '_{}.png'.format(per_caption)), gen[j])

In [26]:
generate_r_precision_data()

INFO:tensorflow:Restoring parameters from ./checkpoint/model.ckpt
Restored model parameters from ./checkpoint/model.ckpt


In [28]:
generate_inception_score_data()

INFO:tensorflow:Restoring parameters from ./checkpoint/model.ckpt
Restored model parameters from ./checkpoint/model.ckpt


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


## 3. Measure Inception score and R-precision of given test dataset

After set the config file as 'eval_birds.yml' and run the 'generate_inception_score_data()' and 'generate_r_precision_data()', the synthesized images based on given captions and set of image and caption features should be saved inside a 'evaluation' folder, specifically in 'evaluation/generated_images/..' and as 'evaluation/r_precision.npz' respectively.

**Then, go to the 'evaluation' folder and run each 'inception_score.ipynb' and 'r_precision.ipynb' file in order to measure inception score and r-precision score.**