In [1]:
from discriminator.discriminator_wrapper import DiscriminatorWrapper
from coco_utils import load_coco_data_struct
import tensorflow as tf
import numpy as np
from lstm import MaxLikelihoodLSTM, PolicyGradientLSTM
from data import PGData, COCOData

In [2]:
def initialize_sess():
    global sess
    ruv = set(sess.run(tf.report_uninitialized_variables()))
    uv = [v for v in tf.global_variables() if v.name.split(':')[0].encode('ascii') in ruv]
    tf.variables_initializer(uv).run()
    
def reset_sess():
    global sess
    tf.reset_default_graph()
    sess.close()
    sess = tf.InteractiveSession()

sess = tf.InteractiveSession()

### Load data

In [3]:
vocab_data, train_data, val_data = load_coco_data_struct()
mledata = COCOData()
pgdata = PGData()


Loaded vocab data.
Embedding <class 'numpy.ndarray'> (1004, 304) float64
Word to index <class 'dict'> 1004
Index to word <class 'list'> 1004

Loaded train data.
Captions <class 'numpy.ndarray'> (400135, 17) int32
Image indices <class 'numpy.ndarray'> (400135,) int32
Image features <class 'numpy.ndarray'> (82783, 4096) float32
Image urls <class 'numpy.ndarray'> (82783,) <U63

Loaded val data.
Captions <class 'numpy.ndarray'> (195954, 17) int32
Image indices <class 'numpy.ndarray'> (195954,) int32
Image features <class 'numpy.ndarray'> (40504, 4096) float32
Image urls <class 'numpy.ndarray'> (40504,) <U63


### Pretrain
Only run this section to pretrain the network again. This section will save the model which can then be loaded to immediately start training.

In [4]:
disc_iterations = 2
mle_iterations = 3

In [5]:
disc = DiscriminatorWrapper(train_data, val_data, vocab_data)
initialize_sess()

train_loss, val_loss = disc.pre_train(sess, iter_num=disc_iterations, batch_size=1000)

iter 0, loss: -2.7806818252429366e-05


In [6]:
lstm = MaxLikelihoodLSTM(tf.constant(mledata.word_embedding, dtype=tf.float32), learning_rate=5e-3, batch_size=100)
lstm.build_model()
initialize_sess()

cross_entropy, accuracy = lstm.train(sess, mledata, max_iterations=mle_iterations)

iter 0, cross-entropy: 110.80088806152344, accuracy: 0.0017376195173710585


In [7]:
# Saves the model into a models/ folder which is excluded from git using the .gitignore
lstm.save_model(sess, "pretrained-mle%d-disc%d" % (mle_iterations, disc_iterations))

In [8]:
reset_sess()

### Load Model
Load an existing model that contains both a discrminator and generator (policy gradient) network. If the model has contained a policy gradient LSTM before, then set is_PG to true.

In [9]:
dir_name="models"
model_name = "pretrained-mle%d-disc%d" % (mle_iterations, disc_iterations)
is_PG = False

# Load discriminator
disc = DiscriminatorWrapper(train_data, val_data, vocab_data, load_session=sess,
                            saved_model_name=model_name, model_base_dir=dir_name)

# Load generator
lstm = PolicyGradientLSTM(tf.constant(pgdata.word_embedding, dtype=tf.float32), learning_rate=5e-4, batch_size=100,
                          reward_func=disc.assign_reward)
lstm.load_model(sess, dir_name + "/" + model_name, is_PG=is_PG, restore_session=False)

initialize_sess()

INFO:tensorflow:Restoring parameters from models/pretrained-mle3-disc2


### Train
Alternate between running the two cells below or enclose their contents in a loop. The former is probably wiser since sometimes either side will require fewer or more iterations.

In [10]:
pg_iterations = 10
captions = []
probs = []
indexes = []
rewards = []

for i in range(pg_iterations):
    caption, prob, index, reward = lstm.train(sess, pgdata)
    pgdata.shuffle()
    print(np.mean(np.array(reward)[:, -1]))
    print("------------")
    if i > 0.75 * pg_iterations:
        captions.extend(caption)
        probs.extend(prob)
        indexes.extend(index)
        rewards.extend(reward)

toilets black children men man a on man and <UNK> on on hanging building in and
0.153965
------------
a chicken white the to the in a with holding the a that the <UNK> <UNK>
0.154456
------------
man <UNK> next a <UNK> <UNK> a of the <END>
0.115347
------------
white many train is there and game <UNK> in a <UNK> the the down city
0.146845
------------
lush a <UNK> people and small a to to train the a small white
0.162294
------------
beautiful <UNK> <UNK> giraffe in some to on <UNK> of in with group <UNK> of dog
0.175958
------------
many one board an <UNK> <UNK> <UNK> in game <UNK> to parked with and and of
0.187937
------------
many trucks <UNK> of small <UNK> the on in <UNK> with a and <UNK>
0.188531
------------
standing <UNK> <UNK> <UNK> is <UNK> <UNK> city <UNK> <UNK> are baseball trees sky down game
0.256971
------------
market <UNK> <UNK> <UNK> <UNK> signs <UNK> chicken <UNK> young adult <UNK> game <UNK> zebra
0.263541
------------


In [11]:
disc_iterations = 5

online_all_loss, online_val_loss = disc.online_train(sess, iter_num=disc_iterations, img_idxs=np.array(indexes),
                                                     caption_sentences=captions)

iter 0, loss: 0.0011422252282500267


### Save Model

In [12]:
lstm.save_model(sess, "full-discriminator-generator")