<a href="https://colab.research.google.com/github/vishalnadagiri/AutoCaption/blob/main/AutoCaptionCNN_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from numpy import array
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical, plot_model
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from os import listdir
from pickle import dump
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import load_img, img_to_array
import pandas as pd
import numpy as np
import glob
import re

# Image data preparation and Image feature extraction

In [None]:
def extract_features(img_files):
    model = VGG16()
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

    features = dict()
    for name in img_files:
        # filename = directory + '/' + name
        image = load_img(name, target_size=(224, 224))
        image = img_to_array(image)
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        image = preprocess_input(image)
        feature = model.predict(image, verbose=0)

        image_id = name.split('/')[-1]

        features[image_id] = feature
    # print('>%s' % name)
    return features

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
csv_path = f"/content/drive/MyDrive/AutoCaption/extracted/dataset.xlsx"
imag_path = f"/content/drive/MyDrive/AutoCaption/extracted/images/images/" 
df = pd.read_excel(csv_path)
img_files = (glob.glob(imag_path+'*'))

In [7]:
features = extract_features(img_files)
print('Extracted Features: %d' % len(features))
# save to file
dump(features, open('features.pkl', 'wb'))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Extracted Features: 500


# Tetxt Data prepparation

In [8]:
df.Description[0]

'This stylish foil print kurta from janasya is made of poly crepe and comes in an attractive peach color. It features 3/4 sleeve,round neck,a-line and it is calf length kurta that is suitable for casual occasions. Team it with matching leggings for a chic look.'

In [9]:
features.keys()

dict_keys(['pic_0.jpg', 'pic_1.jpg', 'pic_2.jpg', 'pic_3.jpg', 'pic_4.jpg', 'pic_5.jpg', 'pic_7.jpg', 'pic_6.jpg', 'pic_8.jpg', 'pic_10.jpg', 'pic_9.jpg', 'pic_12.jpg', 'pic_11.jpg', 'pic_13.jpg', 'pic_15.jpg', 'pic_14.jpg', 'pic_17.jpg', 'pic_16.jpg', 'pic_19.jpg', 'pic_20.jpg', 'pic_18.jpg', 'pic_21.jpg', 'pic_22.jpg', 'pic_23.jpg', 'pic_24.jpg', 'pic_25.jpg', 'pic_26.jpg', 'pic_27.jpg', 'pic_31.jpg', 'pic_29.jpg', 'pic_28.jpg', 'pic_30.jpg', 'pic_32.jpg', 'pic_33.jpg', 'pic_35.jpg', 'pic_34.jpg', 'pic_36.jpg', 'pic_37.jpg', 'pic_38.jpg', 'pic_39.jpg', 'pic_40.jpg', 'pic_41.jpg', 'pic_42.jpg', 'pic_43.jpg', 'pic_44.jpg', 'pic_46.jpg', 'pic_45.jpg', 'pic_47.jpg', 'pic_48.jpg', 'pic_50.jpg', 'pic_49.jpg', 'pic_51.jpg', 'pic_53.jpg', 'pic_52.jpg', 'pic_54.jpg', 'pic_56.jpg', 'pic_57.jpg', 'pic_55.jpg', 'pic_59.jpg', 'pic_58.jpg', 'pic_60.jpg', 'pic_61.jpg', 'pic_63.jpg', 'pic_62.jpg', 'pic_66.jpg', 'pic_64.jpg', 'pic_65.jpg', 'pic_68.jpg', 'pic_67.jpg', 'pic_71.jpg', 'pic_69.jpg', 'pic_

In [10]:
df.columns

Index(['Title', 'Description', 'Material', 'Pattern', 'Neckline',
       'Image_Path'],
      dtype='object')

In [11]:
df.Image_Path[0]

'/images/pic_0.jpg'

In [12]:
df.Image_Path = df.Image_Path.apply(lambda x: x.split(f'/')[-1])

In [13]:
df.Image_Path[0]

'pic_0.jpg'

- Convert all words to lowercase.
- Remove all punctuation.
- Remove all words that are one character or less in length (e.g. ‘a’).
- Remove all words with numbers in them.

In [14]:
for des,im in zip(df.Description[:5],df.Image_Path[:5]):
    print(im ,des)
    print()
    print()

pic_0.jpg This stylish foil print kurta from janasya is made of poly crepe and comes in an attractive peach color. It features 3/4 sleeve,round neck,a-line and it is calf length kurta that is suitable for casual occasions. Team it with matching leggings for a chic look.


pic_1.jpg This check pattern top by Work Label is crafted in cotton. Featuring a bias check at the yoke and straight check pattern in bottom half, a smart round Neckline, 3/4th sleeves, this mid hip length top offers a stylish & comfortable fit. Style this top with a trouser/skirt and medium high heels for chic look at work. This top can also be styled with a pair of stud earrings and a pair your regular sneakers to attain a casual look.


pic_2.jpg Featuring elegant printed details, this off white top and skirt set from Jaipur Kurti makes a statement addition to your casual wardrobe. Style this set with a pair of high heels and statement accessories to complete the look.


pic_3.jpg Add an extra dose of style to your

In [15]:
#create a dictionary as key: image name and value: description
descriptions = {k : v for k,v in zip(df.Image_Path,df.Description) }

In [16]:
import string

def clean_descriptions(descriptions):
    '''
        Clean the text 
        1. split desc in into words
        2. make all char lower case 
        3. remove punctuations
        4. remove single chars
        5. remove non-numeric chars
    '''
    table = str.maketrans('', '', string.punctuation)
    for key, desc in descriptions.items():
        desc = desc.split()
        desc = [word.lower() for word in desc]
        desc = [w.translate(table) for w in desc]
        desc = [word for word in desc if len(word)>1]
        desc = [word for word in desc if word.isalpha()]
        descriptions[key] =  'startseq ' + ' '.join(desc) + ' endseq'


clean_descriptions(descriptions)

In [17]:
descriptions['pic_0.jpg']

'startseq this stylish foil print kurta from janasya is made of poly crepe and comes in an attractive peach color it features sleeveround neckaline and it is calf length kurta that is suitable for casual occasions team it with matching leggings for chic look endseq'

In [18]:
def to_vocabulary(descriptions):

    all_desc = set()
    for key,desc in descriptions.items():
        # for d in descriptions[key]:
        all_desc.update(desc.split(' '))
    return all_desc


vocabulary = to_vocabulary(descriptions)
print('Vocabulary Size: %d' % len(vocabulary))

Vocabulary Size: 1587


In [19]:
len(descriptions)

500

In [20]:
features.keys()

dict_keys(['pic_0.jpg', 'pic_1.jpg', 'pic_2.jpg', 'pic_3.jpg', 'pic_4.jpg', 'pic_5.jpg', 'pic_7.jpg', 'pic_6.jpg', 'pic_8.jpg', 'pic_10.jpg', 'pic_9.jpg', 'pic_12.jpg', 'pic_11.jpg', 'pic_13.jpg', 'pic_15.jpg', 'pic_14.jpg', 'pic_17.jpg', 'pic_16.jpg', 'pic_19.jpg', 'pic_20.jpg', 'pic_18.jpg', 'pic_21.jpg', 'pic_22.jpg', 'pic_23.jpg', 'pic_24.jpg', 'pic_25.jpg', 'pic_26.jpg', 'pic_27.jpg', 'pic_31.jpg', 'pic_29.jpg', 'pic_28.jpg', 'pic_30.jpg', 'pic_32.jpg', 'pic_33.jpg', 'pic_35.jpg', 'pic_34.jpg', 'pic_36.jpg', 'pic_37.jpg', 'pic_38.jpg', 'pic_39.jpg', 'pic_40.jpg', 'pic_41.jpg', 'pic_42.jpg', 'pic_43.jpg', 'pic_44.jpg', 'pic_46.jpg', 'pic_45.jpg', 'pic_47.jpg', 'pic_48.jpg', 'pic_50.jpg', 'pic_49.jpg', 'pic_51.jpg', 'pic_53.jpg', 'pic_52.jpg', 'pic_54.jpg', 'pic_56.jpg', 'pic_57.jpg', 'pic_55.jpg', 'pic_59.jpg', 'pic_58.jpg', 'pic_60.jpg', 'pic_61.jpg', 'pic_63.jpg', 'pic_62.jpg', 'pic_66.jpg', 'pic_64.jpg', 'pic_65.jpg', 'pic_68.jpg', 'pic_67.jpg', 'pic_71.jpg', 'pic_69.jpg', 'pic_

In [21]:
descriptions.keys()

dict_keys(['pic_0.jpg', 'pic_1.jpg', 'pic_2.jpg', 'pic_3.jpg', 'pic_4.jpg', 'pic_5.jpg', 'pic_6.jpg', 'pic_7.jpg', 'pic_8.jpg', 'pic_9.jpg', 'pic_10.jpg', 'pic_11.jpg', 'pic_12.jpg', 'pic_13.jpg', 'pic_14.jpg', 'pic_15.jpg', 'pic_16.jpg', 'pic_17.jpg', 'pic_18.jpg', 'pic_19.jpg', 'pic_20.jpg', 'pic_21.jpg', 'pic_22.jpg', 'pic_23.jpg', 'pic_24.jpg', 'pic_25.jpg', 'pic_26.jpg', 'pic_27.jpg', 'pic_28.jpg', 'pic_29.jpg', 'pic_30.jpg', 'pic_31.jpg', 'pic_32.jpg', 'pic_33.jpg', 'pic_34.jpg', 'pic_35.jpg', 'pic_36.jpg', 'pic_37.jpg', 'pic_38.jpg', 'pic_39.jpg', 'pic_40.jpg', 'pic_41.jpg', 'pic_42.jpg', 'pic_43.jpg', 'pic_44.jpg', 'pic_45.jpg', 'pic_46.jpg', 'pic_47.jpg', 'pic_48.jpg', 'pic_49.jpg', 'pic_50.jpg', 'pic_51.jpg', 'pic_52.jpg', 'pic_53.jpg', 'pic_54.jpg', 'pic_55.jpg', 'pic_56.jpg', 'pic_57.jpg', 'pic_58.jpg', 'pic_59.jpg', 'pic_60.jpg', 'pic_61.jpg', 'pic_62.jpg', 'pic_63.jpg', 'pic_64.jpg', 'pic_65.jpg', 'pic_66.jpg', 'pic_67.jpg', 'pic_68.jpg', 'pic_69.jpg', 'pic_70.jpg', 'pic_

In [22]:
train_desc =dict()
test_desc = dict()
for key in list(features.keys())[:400]:
    if key in descriptions.keys():
        train_desc.update({key:descriptions[key]})

for key in list(features.keys())[400:]:
    if key in descriptions.keys():
        test_desc.update({key:descriptions[key]})

In [23]:
len(descriptions),len(features),len(train_desc),len(test_desc)

(500, 500, 400, 100)

In [24]:
list(features.keys())[:2]

['pic_0.jpg', 'pic_1.jpg']

In [25]:
for key in train_desc.keys():
    if key in test_desc.keys():
        print(key)
else:
     print('all pic are unique in train and test')

all pic are unique in train and test


In [26]:
train_fea =dict()
test_fea = dict()
for key in train_desc.keys():
    if key in features.keys():
        train_fea.update({key:features[key]})

for key in test_desc.keys():
    if key in features.keys():
        test_fea.update({key:features[key]})

In [27]:
for key in train_fea.keys():
    if key in test_fea.keys():
        print(key)
else:
     print('all pic are unique in train and test')

all pic are unique in train and test


In [28]:
train_desc['pic_0.jpg']

'startseq this stylish foil print kurta from janasya is made of poly crepe and comes in an attractive peach color it features sleeveround neckaline and it is calf length kurta that is suitable for casual occasions team it with matching leggings for chic look endseq'

In [29]:
#convert desc to list and tokennize it
def to_lines(descriptions):
    all_desc = list()
    for key in descriptions.keys():
        all_desc.append(descriptions[key])
    return all_desc
 
# fit a tokenizer given caption descriptions
def create_tokenizer(descriptions):
	lines = to_lines(descriptions)
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer
 
# prepare tokenizer
tokenizer = create_tokenizer(train_desc)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 1443


In [30]:
def create_sequences(tokenizer, max_length, descriptions, photos, vocab_size):#tokenizer, max_length, train_desc, train_fea, vocab_size
	X1, X2, y = list(), list(), list()
	'''X1 = image features X2=desc seq y=next word'''
	for key, desc_list in descriptions.items():
		for desc in desc_list:
			# encode the sequence
			seq = tokenizer.texts_to_sequences([desc])[0]
			# split one sequence into multiple X,y pairs
			for i in range(1, len(seq)):
				# split into input and output pair
				in_seq, out_seq = seq[:i], seq[i]
				# pad input sequence
				in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
				
				out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

				X1.append(photos[key][0])
				X2.append(in_seq)
				y.append(out_seq)
	return array(X1), array(X2), array(y)

In [31]:
def max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)
 
max_len=max_length(train_desc)

In [32]:
max_len

83

In [34]:
train_fea['pic_0.jpg'].shape

(1, 4096)

In [35]:
def define_model(vocab_size, max_length):
	# feature extractor model
	inputs1 = Input(shape=(4096,))
	fe1 = Dropout(0.5)(inputs1)
	fe2 = Dense(256, activation='relu')(fe1)
	# sequence model
	inputs2 = Input(shape=(max_length,))
	se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
	se2 = Dropout(0.5)(se1)
	se3 = LSTM(256)(se2)
	# decoder model
	decoder1 = add([fe2, se3])
	decoder2 = Dense(256, activation='relu')(decoder1)
	outputs = Dense(vocab_size, activation='softmax')(decoder2)
	# tie it together [image, seq] [word]
	model = Model(inputs=[inputs1, inputs2], outputs=outputs)
	model.compile(loss='categorical_crossentropy', optimizer='adam')
	# summarize model
	print(model.summary())
	plot_model(model, to_file='model.png', show_shapes=True)
	return model

In [38]:
X1train, X2train, ytrain = create_sequences(tokenizer, max_length, train_desc, train_fea, vocab_size)

In [39]:
X1train.shape

(0,)

In [None]:
X1test, X2test, ytest = create_sequences(tokenizer, max_length, test_desc, test_fea, vocab_size)

In [None]:
model = define_model(vocab_size, max_length)

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 83)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 83, 256)      369408      input_3[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 4096)         0           input_2[0][0]                    
____________________________________________________________________________________________

In [None]:
#monitor the minimum loss on the validation dataset and save the model to a file
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [None]:
model.fit([X1train, X2train], ytrain, epochs=20, verbose=2, callbacks=[checkpoint], validation_data=([X1test, X2test], ytest))

Epoch 1/20


ValueError: ignored