In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import json
import pickle
from keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing import image
from keras.models import Model, load_model
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Embedding, LSTM
from keras.layers.merge import add

In [4]:
model = load_model("model_weights/model_19.h5")    

In [5]:
model_temp = ResNet50(weights="imagenet",input_shape=(224,224,3))

In [6]:
model_resnet = Model(model_temp.input,model_temp.layers[-2].output)

In [7]:
def preprocessing_img(img):
    img = image.load_img(img,target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img,axis=0)
    #Normalisation
    img = preprocess_input(img)
    return img

In [13]:
def encode_image(img):
    img = preprocessing_img(img)
    feature_vector = model_resnet.predict(img)
    feature_vector = feature_vector.reshape(1,2048)
#     print(feature_vector.shape)
    return feature_vector

In [14]:
enc = encode_image("photo.jpg")

In [15]:
enc.shape

(1, 2048)

In [18]:
max_len = 35

In [25]:
with open("saved/word_to_idx.pkl","rb") as f:
    word_to_idx = pickle.load(f)

with open("saved/idx_to_word.pkl","rb") as f:
    idx_to_word = pickle.load(f)

In [26]:
def predict_caption(photo):
    
    in_text = "startseq"
    for i in range(max_len):
        sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
        sequence = pad_sequences([sequence],maxlen=max_len,padding='post')
        
        ypred = model.predict([photo,sequence])
        ypred = ypred.argmax() #WOrd with max prob always - Greedy Sampling
        word = idx_to_word[ypred]
        in_text += (' ' + word)
        
        if word == "endseq":
            break
    
    final_caption = in_text.split()[1:-1]
    final_caption = ' '.join(final_caption)
    return final_caption

In [28]:
predict_caption(enc)

'young girl with nose hair and jean shirt tied'