In [0]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import os
import time
import json
from glob import glob
from PIL import Image
import pickle
import random

In [0]:
image_path = '/ImageCap/imagesB/flickr30k-images/'
feat_path = '/ImageCap/image_feat/'
text_path = '/ImageCap/captions.csv'

In [0]:
df = pd.read_csv(text_path,delimiter='|', skipinitialspace=True) #in this csv, | is being used as delimiter

In [0]:
image_name_list = list(set(df['image_name'])) #obtaining unique instance name of each image
image_path_list = list(map(lambda arg: image_path + arg, image_name_list))
feat_path_list = list(map(lambda arg: feat_path + arg, image_name_list))
#Its imp to keep images and features in different directories so as to avoid Colab's infamous I/O error

In [0]:
len(image_path_list)

31783

In [0]:
def feat_extract():#Here we are performing surgery on a pretrained Inception V3 model so as to just obtain a model upto last conv layer

  IV3 = tf.keras.applications.InceptionV3(include_top=False,weights='imagenet') #creating an inceptionV3 instance without last classification layer

  x_in = IV3.input #we will feed input to the input layer of inception V3
  x_out= IV3.layers[-1].output #output of the last conv layer in inception V3 will will be taken as output

  return tf.keras.Model(inputs=x_in, outputs=x_out) #Output will be of dimention 8*8*2048

mod_fe = feat_extract()

In [0]:
mod_fe.save('/ImageCap/IV3_feat.h5') #saving because, we will also need it during evaluation

In [0]:
def load_image(arg):
    img = tf.io.read_file(arg)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, arg

In [0]:
#Features are being extracted seperately so as to avoid this part from becoming a bottleneck in further training
#Apart from this, features are cached in hard disk instead of RAM because of RAM's limitation in Collab
image_dataset = tf.data.Dataset.from_tensor_slices(image_path_list)
image_dataset = image_dataset.map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) #this is a map() extention for "Dataset" type stucture 
image_dataset = image_dataset.batch(32) #Since Inception V3 expects batch input anyways, so to leverage possible vectorization its better to send input in batches

for img, path in image_dataset:
  batch_features = mod_fe(img)
  batch_features = tf.reshape(batch_features,(batch_features.shape[0], 8*8, batch_features.shape[3]))
  
  for bf, p in zip(batch_features, path):
    path_ = p.numpy().decode("utf-8") #p is needed to be decoded as string becuase it is originally obtained as numpy object
    path_ = feat_path + path_[len(image_path):] # path_[len(image_path):] extracts name of image which is then concatenated to feature path 
    np.save(path_, bf.numpy()) #saves feature matrix with same name as that of image
    #Feature matrix is of dim 64x2048

In [0]:
#verifying quantity
import os, os.path
path = '/ImageCap/image_feat'
num_files = len([f for f in os.listdir(path)if os.path.isfile(os.path.join(path, f))])

num_files

31783