In [1]:
import string
import numpy as np
import os
from PIL import Image
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow.keras.layers as ly
from tensorflow.keras.models import Model, load_model

In [2]:
text_path = "data\\flickr8k\\text\\Flickr8k.token.txt"
imgs_path = "data\\flickr8k\\img"

In [5]:
with open(text_path, "r") as f:
    doc = f.read()
    doc = doc.split("\n")
    descriptions = {}
    for caption in doc[:-1]:
        img, caption = caption.split("\t")
        if img[:-2] not in descriptions:
            descriptions[img[:-2]] = [ caption ]
        else:
            descriptions[img[:-2]].append(caption)

In [27]:
captions = descriptions
tab = str.maketrans("", "", string.punctuation)
for img, caps in captions.items():
    for i, img_caption in enumerate(caps):
        img_caption.replace("-", " ")
        desc = img_caption.split()

        # lowercasing
        desc = [word.lower() for word in desc]
        # removing punctuation
        desc = [word.translate(tab) for word in desc]
        # remove hanging 's and a
        desc = [word for word in desc if len(word)>1]
        # remove tokens with numbers in them
        desc = [word for word in desc if word.isdigit()]
        # convert back to string
        img_captions = "".join(desc)
        captions[img][i] = img_captions
clear_descriptions = captions

In [28]:
vocabulary = set()
for key in clear_descriptions.keys():
    [vocabulary.update(d.split()) for d in clear_descriptions[key]]

In [29]:
with open("descriptions.txt", "w") as f:
    lines = []
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + "\t" + desc)
        data = "\n".join(lines)
        f.write(data)

In [31]:
model = Xception(include_top=False, pooling="avg")
features = {}
for img in os.listdir(imgs_path):
    filename = f"{imgs_path}\\{img}"
    image = Image.open(filename)
    image = image.resize((299, 299))
    image = np.expand_dims(image, axis=0)
    
    image = image / 127.5
    image = image - 1.0
    
    feature = model.predict(image)
    features[img] = feature

In [33]:
import pickle

pickle.dump(features, open("features.p", "wb"))