In [None]:
!pip install rouge
!pip
!pip install Keras-Preprocessing
import string
import numpy as np
import pandas as pd
from PIL import Image
import os
from pickle import dump, load
import numpy as np
from glob import glob
import cv2
from tensorflow.keras.utils import load_img, img_to_array
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.layers import add
from keras.models import Model, load_model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
from tensorflow.keras.applications.resnet import ResNet101
import csv
from rouge import Rouge


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### For Google Colab

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
home = './MiniProject/'

In [None]:
# Loading a text file into memory
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

# Loading a binary file into memory
def load_doc_bin(filename):
    file = open(filename, 'rb')
    text = load(file)
    file.close()
    return text


### Mapping image with descriptions

In [None]:
# Mapping image with descriptions
def all_img_captions(filename):
    file = load_doc(filename)
    captions = file.split('\n')

    descriptions ={}
    for caption in captions[1:]:
        #print(caption)
        if caption == "" :
          break
        #cap=caption.split(":")
        cap=caption.split(",")
        
        img = cap[0].strip()
        
        
        caption_ = cap[1].strip()
        
        if img not in descriptions:
            descriptions[img] = [caption_]
        else:
            descriptions[img].append(caption_)
    # print(descriptions)
    return descriptions

### Cleaning text in captions dictionary

In [None]:
#Changing to lower cases, removing punctuations and stop words and also numbers
#this can be optimized
def cleaning_text(captions):
    table = str.maketrans('','', string.punctuation)
    for img,caps in captions.items():
        for i,img_caption in enumerate(caps):

            img_caption.replace("-"," ")
            desc = img_caption.split()

            #convert to lower case
            desc = [word.lower() for word in desc]
            #remove punctuation
            desc = [word.translate(table) for word in desc]
            #remove 's and a    #intitution same removing stopwords in nlp
            desc = [word for word in desc if(len(word)>1)]
            #remove tokens with numbers
            desc = [word for word in desc if(word.isalpha())]

            img_caption = ' '.join(desc)
            captions[img][i]= img_caption
            
    # print(list(captions.values())[0])
    return captions

### Creating Vocabulary from descriptions

In [None]:
def text_vocabulary(descriptions):
    vocab = set()
    
    for key in descriptions.keys():
        [vocab.update(d.split()) for d in descriptions[key]]
    
    # print(vocab)
    return vocab      #returing only unique words in captions

### Saving cleaned description to a file

In [None]:
#All descriptions in one file 
def save_descriptions(descriptions, filename):
    lines = list()
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + ' ' + desc )
    data = "\n".join(lines)
    file = open(filename,"w")
    file.write(data)
    file.close()

In [None]:
filename = '/content/drive/MyDrive/MiniProject/captions.txt'

descriptions = all_img_captions(filename)
# print(description)/
print("Length of descriptions =" ,len(descriptions))

clean_descriptions = cleaning_text(descriptions)

vocabulary = text_vocabulary(clean_descriptions)
print("Length of vocabulary = ", len(vocabulary))

save_descriptions(clean_descriptions, '/content/drive/MyDrive/MiniProject/descriptions.txt')

#using all the functions above such as cleaning and saving it in descriptions.txt


Length of descriptions = 8091
Length of vocabulary =  8571


## RESNET MODEL

In [None]:
resnet = ResNet101(
    include_top=False,
    weights='imagenet',
    pooling='avg'
)
resnet.summary()

### Creating features of image using RESNET

In [None]:
def extract_features(directory):
        features = {}
        files = os.listdir(directory)
        for image in files:
            
            filename = os.path.join(directory, image)
            image = Image.open(filename)
            
            image = Image.open(filename)
            image = image.resize((224,224))
            image = np.expand_dims(image, axis=0)   #adding extra dimension and all of that put into resnet
            feature = resnet.predict(image)  
            features[filename] = feature
        return features

### Dumping feature vectors

In [None]:
features = extract_features('/content/drive/MyDrive/MiniProject/archive/Images')
dump(features, open( os.path.join(os.getcwd(),'drive','MyDrive','MiniProject','image_features.p'),"wb"))

In [None]:
features = load(open("features_10k.p","rb"))
len(features)

### Loading everything into memory

In [None]:
def load_clean_descriptions(filename):    
    #loading clean_descriptions 
    file = load_doc(filename) 
    descriptions = {} 
    for line in file.split("\n"):         
        words = line.split() 
        if len(words)<1 : 
            continue      
        image, image_caption = words[0], words[1:]          
        if image not in train_features.keys(): 
            continue  
        if image not in descriptions: 
            descriptions[image] = [] 
        desc = '<startseq> ' + " ".join(image_caption) + ' <endseq>' 
        descriptions[image].append(desc) 
    return descriptions 
 
def load_features(): 
    #loading all features 
    all_features = load_doc_bin('/content/drive/MyDrive/MiniProject/train_8K_6500.p')
    return all_features

In [None]:
train_features = load_features() 
features = train_features 
train_descriptions = load_clean_descriptions('/content/drive/MyDrive/MiniProject/descriptions.txt') 

print(len(features)) 

### Tokenizer to convert characters to integers

In [None]:
#converting dictionary to list of descriptions
def dict_to_list(descriptions):
    all_desc = []
    for key in descriptions.keys():     #getting all the descrptions
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc

from keras.preprocessing.text import Tokenizer

def create_tokenizer(descriptions):
    desc_list = dict_to_list(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(desc_list)
    print(tokenizer)
    return tokenizer

In [None]:
tokenizer = create_tokenizer(train_descriptions)
print(tokenizer.word_index)

with open('/content/drive/MyDrive/MiniProject/tokenizer.p', 'wb') as token_file:
    dump(tokenizer, token_file)

In [None]:
tokenizer = load_doc_bin('/content/drive/MyDrive/MiniProject/tokenizer.p')
vocab_size = len(tokenizer.word_index) + 1
vocab_size 

### Calculating Max length of descriptions

In [None]:
def max_length_func(descriptions):
    desc_list = dict_to_list(descriptions)
    max_ = 0
    for d in desc_list:
        if len(d.split()) > max_:
            max_ = len(d.split())
            # print(max_, d)
    return max_

max_length = max_length_func(descriptions)
max_length

#### Generator for training in-memory and preparing inputs for model

In [None]:
def data_generator(descriptions, features, tokenizer, max_length):
    while 1:
        for key, description_list in descriptions.items():
            if key not in features.keys():
              continue

            feature = features[key][0]
            input_image, input_sequence, output_word = create_sequences(tokenizer, max_length, description_list, feature)
            yield [[input_image, input_sequence], output_word]

def create_sequences(tokenizer, max_length, desc_list, feature):
    X1, X2, y = list(), list(), list()

    for desc in desc_list:
        # encoding the sequence
        seq = tokenizer.texts_to_sequences([desc])[0]
        # split one sequence into multiple X,y pairs
        for i in range(1, len(seq)):

            in_seq, out_seq = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

            X1.append(feature)
            X2.append(in_seq)
            y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)


[a,b],c = next(data_generator(train_descriptions, features, tokenizer, max_length))
a.shape, b.shape, c.shape

## Resnet and LSTM MODEL

In [None]:
from keras.utils.vis_utils import plot_model

def define_model(vocab_size, max_length):
    
    # features from the CNN model
    # cnn_input = Input(shape=(2048,))
    cnn_input = Input(shape=(2051,))
    layer = Dropout(0.5)(cnn_input)
    cnn_input = Dense(256, activation='relu')(layer)

    # LSTM sequence model
    lstm_input = Input(shape=(max_length,))
    layer = Embedding(vocab_size, 256, mask_zero=True)(lstm_input)
    layer = Dropout(0.5)(layer)
    lstm_input = LSTM(256)(layer)

    # Merging both models
    merged_model = add([cnn_input, lstm_input])
    merged_model = Dense(256, activation='relu')(merged_model)
    outputs = Dense(vocab_size, activation='softmax')(merged_model)
    
    model = Model(inputs=[cnn_input, lstm_input], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
    
    # summary of the model
    # print(model.summary())
    plot_model(model, to_file= '/content/drive/MyDrive/MiniProject/plotmodel.png', show_shapes=True)
    print(model.summary)
    return model

m1=define_model(7784,29)

In [None]:
print('Descriptions: train=', len(train_descriptions))
print('Photos: train=', len(train_features))
print('Vocabulary Size:', vocab_size)
print('Description Length: ', max_length)

from tensorflow import keras

#Loading the saved model
model = keras.models.load_model("/content/drive/MyDrive/MiniProject/model_20.h5")

epochs = 20
steps = len(train_descriptions)
# # os.mkdir(home+"models test")
for i in range(12, epochs):
    generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
    model.fit(generator, epochs=1, steps_per_epoch= steps, verbose=1)
    model.save(home+"models test1/model_" + str(i) + ".h5")
    print(model)

In [None]:
def word_for_id(integer, tokenizer):
 for word, index in tokenizer.word_index.items():
     if index == integer:
         return word
 return None

In [None]:
def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        pred = model.predict([photo,sequence], verbose=0)
        pred = np.argmax(pred)
        word = word_for_id(pred, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text

In [None]:
def generate_feature(filename):
    image = Image.open(filename)
    image = image.resize((224,224))
    image = np.expand_dims(image, axis=0)
    return resnet.predict(image)

In [None]:
f = open('/content/drive/MyDrive/MiniProject/predicted.csv','w')
writer = csv.writer(f)
writer.writerow(['Image','Caption Generated'])

from glob import glob
test_dir=glob('/content/drive/MyDrive/MiniProject/archive/TrainingImages/*.jpg')

for image in test_dir:
  description = generate_desc(model, tokenizer, generate_feature(image), max_length)
  writer.writerow([image,description])
f.close()
# print(description)

In [None]:
filename = "/content/drive/MyDrive/MiniProject/archive/Images/109823394_83fcb735e1.jpg"
description = generate_desc(model, tokenizer, generate_feature(filename), 32)
print(description)

In [None]:
actual_captions = {}
with open("/content/drive/MyDrive/MiniProject/descriptions.txt", "r") as f:
    for line in f:
        tokens = line.strip().split(" ")
        img_name, caption = tokens[0], " ".join(tokens[1:])
        if img_name not in actual_captions:
            actual_captions[img_name] = [caption]
        else:
            actual_captions[img_name].append(caption)
predicted_captions = {}
with open("/content/drive/MyDrive/MiniProject/predicted.csv", "r") as f:
    next(f)
    for line in f:
        img_name, caption = line.strip().split(",")
        img_name = img_name.split("/")[-1]  # remove file path from image name
        if img_name not in predicted_captions:
            predicted_captions[img_name] = [caption]
        else:
            predicted_captions[img_name].append(caption)
from nltk.translate.bleu_score import sentence_bleu

total_score = 0.0
num_captions = 0
for img_name, pred_captions in predicted_captions.items():
    actual_captions_for_img = actual_captions[img_name]
    #print("This is the predicted caption",pred_captions)

    for pred_caption in pred_captions:
        for actual_caption in actual_captions_for_img:
            #print("this is actual caption",actual_caption)
            # tokenize both captions into lists of words
            pred_tokens = pred_caption.strip().split(" ")
            actual_tokens = actual_caption.strip().split(" ")
            # compute BLEU score
            score = sentence_bleu([actual_tokens], pred_tokens)
            total_score += score
        num_captions += 1


print("Average BLEU score: {:.4f}".format(total_score))


In [None]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

In [5]:
from nltk.translate.meteor_score import meteor_score
from nltk.translate import meteor

In [None]:
import nltk
from nltk.tokenize import word_tokenize

actual_captions = {}
with open("/content/drive/MyDrive/MiniProject/descriptions.txt", "r") as f:
    for line in f:
        tokens = line.strip().split(" ")
        img_name, caption = tokens[0], " ".join(tokens[1:])
        if img_name not in actual_captions:
            actual_captions[img_name] = [caption]
        else:
            actual_captions[img_name].append(caption)

predicted_captions = {}
with open("/content/drive/MyDrive/MiniProject/predicted.csv", "r") as f:
    next(f)
    for line in f:
        img_name, caption = line.strip().split(",")
        img_name = img_name.split("/")[-1]  # remove file path from image name
        if img_name not in predicted_captions:
            predicted_captions[img_name] = [caption]
        else:
            predicted_captions[img_name].append(caption)

total_score = 0.0
num_captions = 0
for img_name, pred_captions in predicted_captions.items():
    actual_captions_for_img = actual_captions[img_name]

    for pred_caption in pred_captions:
        AT=[]
        # print("predicted tokens are ",pred_caption)
        for actual_caption in actual_captions_for_img:
            # tokenize both captions into lists of words
            pred_tokens = word_tokenize(pred_caption)
            #pred_tokens = [token.lower() for token in pred_tokens]  # convert each token to lowercase
            actual_tokens = word_tokenize(actual_caption)
            AT.append(actual_tokens)
            # print(actual_tokens)
           
            #actual_tokens = [token.lower() for token in actual_tokens]  # convert each token to lowercase
            # compute METEOR score
        score = nltk.translate.meteor_score.meteor_score(AT, pred_tokens)
        total_score += score
        num_captions += 1

print("Average METEOR score: {:.4f}")
score = total_score
score = score / 10
print(score)

In [None]:
!pip install rouge_score

from rouge_score import rouge_scorer
import pandas as pd

actual_captions = {}
with open("/content/drive/MyDrive/MiniProject/descriptions.txt", "r") as f:
    for line in f:
        tokens = line.strip().split(" ")
        img_name, caption = tokens[0], " ".join(tokens[1:])
        if img_name not in actual_captions:
            actual_captions[img_name] = [caption]
        else:
            actual_captions[img_name].append(caption)

predicted_captions = {}
with open("/content/drive/MyDrive/MiniProject/predicted.csv", "r") as f:
    next(f)
    for line in f:
        img_name, caption = line.strip().split(",")
        img_name = img_name.split("/")[-1]  # remove file path from image name
        if img_name not in predicted_captions:
            predicted_captions[img_name] = [caption]
        else:
            predicted_captions[img_name].append(caption)

total_score = 0.0
num_captions = 0

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

for img_name, pred_captions in predicted_captions.items():
    actual_captions_for_img = actual_captions[img_name]

    for pred_caption in pred_captions:
        print("predicted caption is ", pred_caption)
        
        for actual_caption in actual_captions_for_img:
            scores = scorer.score(pred_caption, actual_caption)
            rouge_1 = scores['rouge1'].fmeasure
            rouge_2 = scores['rouge2'].fmeasure
            rouge_l = scores['rougeL'].fmeasure
            
            total_score += rouge_1 + rouge_2 + rouge_l
            num_captions += 3  # We calculate three ROUGE scores for each pair of captions

print("Average ROUGE score: {:.4f}".format(total_score ))


In [None]:
print