In [1]:
import string
import numpy as np
from PIL import Image
import os
from pickle import dump, load
import numpy as np
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import load_img, img_to_array
from keras_preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Add
from keras.models import Model, load_model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
# small library for seeing the progress of loops.
from tqdm.notebook import tqdm
tqdm.pandas()

In [2]:
def load_doc(filename):
    # Opening the file as read only
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def all_img_captions(filename):
    file = load_doc(filename)
    captions = file.split('\n')
    descriptions ={}
    for caption in captions[:-1]:
        img, caption = caption.split('\t')
        if img[:-2] not in descriptions:
            descriptions[img[:-2]] = [ caption ]
        else:
            descriptions[img[:-2]].append(caption)
    return descriptions



In [3]:
def cleaning_text(captions):
    table = str.maketrans('','',string.punctuation)
    for img,caps in captions.items():
        for i,img_caption in enumerate(caps):
            img_caption.replace("-"," ")
            desc = img_caption.split()
            #converts to lowercase
            desc = [word.lower() for word in desc]
            #remove punctuation from each token
            desc = [word.translate(table) for word in desc]
            #remove hanging 's and a 
            desc = [word for word in desc if(len(word)>1)]
            #remove tokens with numbers in them
            desc = [word for word in desc if(word.isalpha())]
            #convert back to string
            img_caption = ' '.join(desc)
            captions[img][i]= img_caption
    return captions



In [4]:
def text_vocabulary(descriptions):
    # build vocabulary of all unique words
    vocab = set()
    for key in descriptions.keys():
        [vocab.update(d.split()) for d in descriptions[key]]
    return vocab


In [8]:
def save_description(description,filename):
    lines=list()
    for img,capt in description.items():
        for c in capt:
            lines.append(img+'\t'+c)
    data='\n'.join(lines)
    file=open(filename,'w')
    file.write(data)
    file.close()

In [20]:
dataset_images='Flickr8k_Dataset/Flicker8k_Dataset'
dataset_text='D:\project_new\image_caption\Flickr8k_text\Flickr8k.token.txt'

In [23]:
print(dataset_text)

D:\project_new\image_caption\Flickr8k_text\Flickr8k.token.txt


In [25]:
descriptions=all_img_captions(dataset_text)

In [27]:
descriptions

{'1000268201_693b08cb0e.jpg': ['A child in a pink dress is climbing up a set of stairs in an entry way .',
  'A girl going into a wooden building .',
  'A little girl climbing into a wooden playhouse .',
  'A little girl climbing the stairs to her playhouse .',
  'A little girl in a pink dress going into a wooden cabin .'],
 '1001773457_577c3a7d70.jpg': ['A black dog and a spotted dog are fighting',
  'A black dog and a tri-colored dog playing with each other on the road .',
  'A black dog and a white dog with brown spots are staring at each other in the street .',
  'Two dogs of different breeds looking at each other on the road .',
  'Two dogs on pavement moving toward each other .'],
 '1002674143_1b742ab4b8.jpg': ['A little girl covered in paint sits in front of a painted rainbow with her hands in a bowl .',
  'A little girl is sitting in front of a large painted rainbow .',
  'A small girl in the grass plays with fingerpaints in front of a white canvas with a rainbow on it .',
  'T

In [28]:
print(len(descriptions))

8092


In [31]:
clean_descriptions=cleaning_text(descriptions)

In [32]:
clean_descriptions

{'1000268201_693b08cb0e.jpg': ['child in pink dress is climbing up set of stairs in an entry way',
  'girl going into wooden building',
  'little girl climbing into wooden playhouse',
  'little girl climbing the stairs to her playhouse',
  'little girl in pink dress going into wooden cabin'],
 '1001773457_577c3a7d70.jpg': ['black dog and spotted dog are fighting',
  'black dog and tricolored dog playing with each other on the road',
  'black dog and white dog with brown spots are staring at each other in the street',
  'two dogs of different breeds looking at each other on the road',
  'two dogs on pavement moving toward each other'],
 '1002674143_1b742ab4b8.jpg': ['little girl covered in paint sits in front of painted rainbow with her hands in bowl',
  'little girl is sitting in front of large painted rainbow',
  'small girl in the grass plays with fingerpaints in front of white canvas with rainbow on it',
  'there is girl with pigtails sitting in front of rainbow painting',
  'young 

In [33]:
vocabulary=text_vocabulary(clean_descriptions)
vocabulary

{'furred',
 'shoveling',
 'egde',
 'walkng',
 'ate',
 'notices',
 'reviewing',
 'abs',
 'rafters',
 'joggers',
 'offf',
 'tread',
 'revealing',
 'headbands',
 'pointer',
 'dribbling',
 'liked',
 'wounded',
 'defense',
 'accepting',
 'fringe',
 'sparse',
 'guarded',
 'lilies',
 'waterspout',
 'crouched',
 'cruising',
 'sleeve',
 'protecting',
 'motorcyclists',
 'dump',
 'waiter',
 'liking',
 'cubicle',
 'railway',
 'beret',
 'aerobatics',
 'aquestrian',
 'ladie',
 'blondhaired',
 'burbur',
 'suburban',
 'spiritual',
 'squeezing',
 'bikes',
 'encircling',
 'whose',
 'traversing',
 'passage',
 'troll',
 'fold',
 'hear',
 'born',
 'retaining',
 'tangled',
 'hair',
 'beating',
 'rushing',
 'lease',
 'pushes',
 'bridge',
 'giong',
 'interacts',
 'shopping',
 'physical',
 'helmet',
 'seeds',
 'choppy',
 'shaft',
 'waterhole',
 'toddler',
 'snowboarder',
 'duck',
 'snap',
 'pinkbottomed',
 'collecting',
 'cries',
 'comforting',
 'establishment',
 'writing',
 'barrior',
 'waterskiing',
 'shoes'

In [34]:
print(len(vocabulary))

8763


In [35]:
save_description(clean_descriptions,'descriptions.txt')

In [46]:
def extract_features(directory):
        model = Xception( include_top=False, pooling='avg' )
        features = {}
        for img in tqdm(os.listdir(directory)):
            filename = directory + "/" + img
            image = Image.open(filename)
            image = image.resize((299,299))
            image = np.expand_dims(image, axis=0)
            #image = preprocess_input(image)
            image = image/127.5
            image = image - 1.0
            feature = model.predict(image)
            features[img] = feature
        return features
#2048 feature vector
features = extract_features(dataset_images)
dump(features, open("features.p","wb"))

  0%|          | 0/8091 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [48]:
features=load(open('features.p','rb'))

In [49]:
features

{'1000268201_693b08cb0e.jpg': array([[0.47339684, 0.01732643, 0.07333998, ..., 0.0855904 , 0.02102295,
         0.23766544]], dtype=float32),
 '1001773457_577c3a7d70.jpg': array([[0.00158211, 0.11113487, 0.00037397, ..., 0.26503602, 0.35279822,
         0.05871649]], dtype=float32),
 '1002674143_1b742ab4b8.jpg': array([[0.        , 0.02488983, 0.01554059, ..., 0.        , 0.        ,
         0.10192626]], dtype=float32),
 '1003163366_44323f5815.jpg': array([[0.14568879, 0.00272414, 0.27776527, ..., 0.17018232, 0.11957303,
         0.09414067]], dtype=float32),
 '1007129816_e794419615.jpg': array([[0.        , 0.12443952, 0.7391621 , ..., 0.00390435, 0.00997149,
         0.5017237 ]], dtype=float32),
 '1007320043_627395c3d8.jpg': array([[0.04136695, 0.        , 0.0127447 , ..., 0.00944686, 0.64201635,
         0.04792112]], dtype=float32),
 '1009434119_febe49276a.jpg': array([[0.        , 0.        , 0.02624729, ..., 0.30528584, 0.23091821,
         0.14191067]], dtype=float32),
 '1012

In [51]:
size=len(features)
size

8091