In [2]:
    from os import listdir
    from pickle import dump
    from pickle import load
    from keras.applications.vgg16 import VGG16
    from keras.preprocessing.image import load_img
    from keras.preprocessing.image import img_to_array
    from keras.applications.vgg16 import preprocess_input
    from keras.models import Model
    from keras.models import load_model
    from numpy import array
    from numpy import argmax
    import string
    from numpy import array
    from keras.preprocessing.text import Tokenizer
    from keras.preprocessing.sequence import pad_sequences
    from keras.utils import to_categorical
    from keras.utils import plot_model
    from keras.layers import Input
    from keras.layers import Dense
    from keras.layers import LSTM
    from keras.layers import Embedding
    from keras.layers import Dropout
    from keras.layers.merge import add
    from keras.callbacks import ModelCheckpoint
    
  

    
    def load_doc(filename):
        file = open(filename, 'r')
        text = file.read()
        file.close()
        return text

    
    def load_set(filename):
        doc = load_doc(filename)
        dataset = list()
        for line in doc.split('\n'):
            if len(line) < 1:
                continue
            identifier = line.split('.')[0]
            dataset.append(identifier)
        return set(dataset)

    
    def load_clean_descriptions(filename, dataset):
        doc = load_doc(filename)
        descriptions = dict()
        for line in doc.split('\n'):
            tokens = line.split()
            image_id, image_desc = tokens[0], tokens[1:]
            if image_id in dataset:
                if image_id not in descriptions:
                    descriptions[image_id] = list()
                desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
                descriptions[image_id].append(desc)
        return descriptions

    
    def load_photo_features(filename, dataset):
        all_features = load(open(filename, 'rb'))
        features = {k: all_features[k] for k in dataset}
        return features

    
    def to_lines(descriptions):
        all_desc = list()
        for key in descriptions.keys():
            [all_desc.append(d) for d in descriptions[key]]
        return all_desc

    
    def create_tokenizer(descriptions):
        lines = to_lines(descriptions)
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(lines)
        return tokenizer

    
    def max_length(descriptions):
        lines = to_lines(descriptions)
        return max(len(d.split()) for d in lines)

    
    def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
        while 1:
            for key, desc_list in descriptions.items():
                photo = photos[key][0]
                in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
                yield [in_img, in_seq], out_word
                
  
    
    
    def word_for_id(integer, tokenizer):
        for word, index in tokenizer.word_index.items():
            if index == integer:
                return word
        return None

    
    def generate_desc(model, tokenizer, photo, max_length):
        in_text = 'startseq'
        for i in range(max_length):
            sequence = tokenizer.texts_to_sequences([in_text])[0]
            sequence = pad_sequences([sequence], maxlen=max_length)
            yhat = model.predict([photo,sequence], verbose=0)
            yhat = argmax(yhat)
            word = word_for_id(yhat, tokenizer)
            if word is None:
                break
            in_text += ' ' + word
            if word == 'endseq':
                break
        return in_text

    
    def extract_features(filename):
        model = VGG16()
        model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
        image = load_img(filename, target_size=(224, 224))
        image = img_to_array(image)
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        image = preprocess_input(image)
        feature = model.predict(image, verbose=0)
        return feature

In [3]:
    filename = 'Flickr_8k.trainImages.txt'
    train = load_set(filename)
    print('Dataset: %d' % len(train))
    train_descriptions = load_clean_descriptions('descriptions.txt', train)
    print('Descriptions: train=%d' % len(train_descriptions))
    train_features = load_photo_features('features.pkl', train)
    print('Photos: train=%d' % len(train_features))
    tokenizer = create_tokenizer(train_descriptions)
    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)
    max_length = max_length(train_descriptions)
    print('Description Length: %d' % max_length)

Dataset: 6000
Descriptions: train=6000
Photos: train=6000
Vocabulary Size: 3848
Description Length: 30


In [7]:
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    max_length = 30
    model = load_model('model_5.h5')
    photo = extract_features('example5.jpg')
    description = generate_desc(model, tokenizer, photo, max_length)
    print(description)

startseq brown dog is running in the water endseq
