In [1]:
from discriminator.discriminator_wrapper import DiscriminatorWrapper
from coco_utils import load_coco_data_struct
import tensorflow as tf
import numpy as np
from generator.generator_wrapper import GeneratorWrapper, GeneratorSpec
from generator.generator_data import GeneratorData

In [2]:
def initialize_sess():
    global sess
    ruv = set(sess.run(tf.report_uninitialized_variables()))
    uv = [v for v in tf.global_variables() if v.name.split(':')[0].encode('ascii') in ruv]
    tf.variables_initializer(uv).run()
    
def reset_sess():
    global sess
    tf.reset_default_graph()
    sess.close()
    sess = tf.InteractiveSession()

def get_mean_reward(rewards):
    np_rewards = np.array(rewards)
    rows, columns = np.nonzero(np_rewards)
    indices = [i - 1 for i, val in enumerate(rows) if i > 0 and val != rows[i-1] or i == rows.shape[0]-1]
    final_rewards = np.zeros((len(indices)))
    for i, idx in enumerate(indices):
        final_rewards[i] = np_rewards[rows[idx], columns[idx]]
    return np.mean(final_rewards)
    

sess = tf.InteractiveSession()

### Load data

In [3]:
vocab_data, train_data, val_data = load_coco_data_struct()
gendata = GeneratorData()


Loaded vocab data.
Embedding <class 'numpy.ndarray'> (1004, 304) float64
Word to index <class 'dict'> 1004
Index to word <class 'list'> 1004

Loaded train data.
Captions <class 'numpy.ndarray'> (400135, 17) int32
Image indices <class 'numpy.ndarray'> (400135,) int32
Image features <class 'numpy.ndarray'> (82783, 4096) float32
Image urls <class 'numpy.ndarray'> (82783,) <U63

Loaded val data.
Captions <class 'numpy.ndarray'> (195954, 17) int32
Image indices <class 'numpy.ndarray'> (195954,) int32
Image features <class 'numpy.ndarray'> (40504, 4096) float32
Image urls <class 'numpy.ndarray'> (40504,) <U63


### Pretrain
Only run this section to pretrain the network again. This section will save the model which can then be loaded to immediately start training.

In [4]:
disc_iterations = 3
mle_iterations = 3

In [5]:
gen_spec = GeneratorSpec(input_dim=512, hidden_dim=512, output_dim=1004, rnn_activation=None,
                         image_feature_dim=4096, n_seq_steps=16,
                         embedding_init=tf.constant(gendata.word_embedding, dtype=tf.float32),
                         n_baseline_layers=2, baseline_hidden_dim=64,
                         mle_learning_rate=5e-3, pg_learning_rate=5e-4,
                         baseline_learning_rate=5e-3, batch_size=100)

In [6]:
disc = DiscriminatorWrapper(train_data, val_data, vocab_data)
initialize_sess()

train_loss, val_loss = disc.pre_train(sess, iter_num=disc_iterations, batch_size=1000)

iter 0, loss: 5.122125230627717e-07


In [7]:
gen = GeneratorWrapper(gen_spec, disc.assign_reward)
initialize_sess()

cross_entropy, accuracy = gen.train(sess, gendata, num_iterations=mle_iterations, training_type='MLE')

iter 0, cross-entropy: 110.73542022705078, accuracy: 0.0


In [8]:
# Saves the model into a models/ folder which is excluded from git using the .gitignore
gen.save(sess, "pretrained-mle%d-disc%d" % (mle_iterations, disc_iterations))

In [9]:
reset_sess()

### Load Model
Load an existing model that contains both a discrminator and generator (policy gradient) network. If the model has contained a policy gradient LSTM before, then set is_PG to true.

In [6]:
dir_name="models"
model_name = "pretrained-mle%d-disc%d" % (mle_iterations, disc_iterations)

# Load discriminator
disc = DiscriminatorWrapper(train_data, val_data, vocab_data, load_session=sess,
                            saved_model_name=model_name, model_base_dir=dir_name)

# Load generator
gen = GeneratorWrapper(gen_spec, disc.assign_reward, True)

initialize_sess()

INFO:tensorflow:Restoring parameters from models/pretrained-mle3-disc3


### Train
Alternate between running the two cells below or enclose their contents in a loop. The former is probably wiser since sometimes either side will require fewer or more iterations.

In [None]:
pg_iterations = 5
good_seq_req = 3
good_seq = 0
captions = []
probs = []
indexes = []
rewards = []

while pg_iterations > 0 and good_seq < good_seq_req:
    caption, prob, index, reward = gen.train(sess, gendata.shuffle())
    mean_reward = get_mean_reward(reward)
    if mean_reward > 0.8 + cycle * 0.001:
        captions.extend(caption)
        probs.extend(prob)
        indexes.extend(index)
        rewards.extend(reward)
        pg_iterations -= 1
        good_seq += 1
    else:
        good_seq = 0

In [None]:
disc_iterations = 120
online_all_loss, online_val_loss = disc.online_train(sess, iter_num=disc_iterations, img_idxs=np.array(indexes),
                                                     caption_sentences=captions)

disc.assign_reward(sess, np.array(indexes)[0:1], captions[0:1], image_idx_from_training=True, to_examine=True)

### Save Model

In [12]:
lstm.save_model(sess, "full-discriminator-generator")