Skip to content

Commit

Permalink
TF 11
Browse files Browse the repository at this point in the history
  • Loading branch information
zsdonghao committed Nov 9, 2016
1 parent ac414f4 commit d8f2b2a
Show file tree
Hide file tree
Showing 32 changed files with 9,024 additions and 312 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
*.DS_Store
.ckpt
.ckpt-*
.DS_Store
._.DS_Store

data/mscoco/*
data/*ckpt
*ckpt
*ckpt*
model
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
We reimplemented the complicated [Google' Image Captioning](https://github.com/tensorflow/models/tree/master/im2txt) model by simple TensorLayer code.

### 1. Prepare MSCOCO data and Inception model
Before you run TensorLayer scripts, you need to follow Google's [setup guide]((https://github.com/tensorflow/models/tree/master/im2txt)). Then setup the model, ckpt and data directories in *.py.

- Creat a ``data`` folder.
- Download and Preprocessing MSCOCO Data [click here](https://github.com/zsdonghao/models/tree/master/im2txt#prepare-the-training-data)
- Download the Inception_V3 CKPT [click here](https://github.com/zsdonghao/models/tree/master/im2txt#download-the-inception-v3-checkpoint)
Expand Down
31 changes: 17 additions & 14 deletions buildmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,15 @@ def image_summary(name, image):
# Resize image.
assert (resize_height > 0) == (resize_width > 0)
if resize_height:
# image = tf.image.resize_images(image,
# size=[resize_height, resize_width],
# method=tf.image.ResizeMethod.BILINEAR)

image = tf.image.resize_images(image, # DH Modify
new_height=resize_height,
new_width=resize_width,
method=tf.image.ResizeMethod.BILINEAR)
try:
image = tf.image.resize_images(image,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR)
except:
image = tf.image.resize_images(image, # for TF 0.10
new_height=resize_height,
new_width=resize_width,
method=tf.image.ResizeMethod.BILINEAR)

# Crop to final dimensions.
if is_training:
Expand Down Expand Up @@ -581,7 +582,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
initial_state = None,
sequence_length = tf.ones([1]),
return_seq_2d = True, # stack denselayer after it
name = '',
name = 'embed',
)
lstm_scope.reuse_variables()

Expand All @@ -600,7 +601,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
initial_state = state_tuple, # different with training
sequence_length = tf.ones([1]),
return_seq_2d = True, # stack denselayer after it
name = '',
name = 'embed',
)
network = net_seq_rnn
network.all_layers = net_image_embeddings.all_layers + network.all_layers
Expand All @@ -620,18 +621,20 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
initial_state = None,
sequence_length = tf.ones([32]),
return_seq_2d = True, # stack denselayer after it
name = '',
name = 'embed',
)
# Then, uses the hidden state which contains image info as the initial_state when feeding the sentence.
lstm_scope.reuse_variables()
tl.layers.set_name_reuse(True)
network = tl.layers.DynamicRNNLayer(net_seq_embeddings,
cell_fn = tf.nn.rnn_cell.BasicLSTMCell,
n_hidden = num_lstm_units,
initializer = initializer,
dropout = dropout,
initial_state = net_img_rnn.final_state, # feed in hidden state after feeding image
sequence_length = tf.reduce_sum(input_mask, 1),
return_seq_2d = True, # stack denselayer after it
name = '',
name = 'embed',
)
network.all_layers = net_image_embeddings.all_layers + network.all_layers
network.all_params = net_image_embeddings.all_params + network.all_params
Expand All @@ -640,7 +643,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
network = tl.layers.DenseLayer(network, n_units=vocab_size, act=tf.identity, W_init=initializer, name="logits") # TL
logits = network.outputs

network.print_layers()
# network.print_layers()

if mode == "inference":
softmax = tf.nn.softmax(logits, name="softmax")
Expand All @@ -660,7 +663,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
total_loss = total_loss
target_cross_entropy_losses = losses # Used in evaluation.
target_cross_entropy_loss_weights = weights # Used in evaluation.
return total_loss, target_cross_entropy_losses, target_cross_entropy_loss_weights
return total_loss, target_cross_entropy_losses, target_cross_entropy_loss_weights, network



Expand Down
4 changes: 4 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Download and Preprocessing MSCOCO Data
[click here](https://github.com/zsdonghao/models/tree/master/im2txt#prepare-the-training-data)
#### Download the Inception_V3 CKPT
[click here](https://github.com/zsdonghao/models/tree/master/im2txt#download-the-inception-v3-checkpoint)
Loading

0 comments on commit d8f2b2a

Please sign in to comment.