# 03 Evaluation

Evaluate the trained image captioning model using BLEU scores.

In [None]:
import numpy as np
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.translate.bleu_score import corpus_bleu

# Load tokenizer, mapping, features, and model
with open('../data/Flickr8k_text/mapping.pkl', 'rb') as f:
    mapping = pickle.load(f)

with open('../data/Flickr8k_text/features.pkl', 'rb') as f:
    features = pickle.load(f)

with open('../data/Flickr8k_text/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

model = load_model('../models/decoder/caption_model.h5')

vocab_size = len(tokenizer.word_index) + 1
max_length = max(len(caption.split()) for captions in mapping.values() for caption in captions)

### Helper Functions

In [None]:
def idx_to_word(integer, tokenizer):
    return tokenizer.index_word.get(integer)

def generate_caption(model, tokenizer, photo, max_length):
    in_text = 'startseq'
    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([photo, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = idx_to_word(yhat, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text

### Evaluate BLEU Scores

In [None]:
actual, predicted = [], []
test = list(mapping.keys())[6000:8000]

for key in test:
    y_pred = generate_caption(model, tokenizer, features[key], max_length)
    references = [caption.split() for caption in mapping[key]]
    actual.append(references)
    predicted.append(y_pred.split())

bleu1 = corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))
bleu2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
bleu3 = corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0))

print(f"BLEU-1: {bleu1:.3f}")
print(f"BLEU-2: {bleu2:.3f}")
print(f"BLEU-3: {bleu3:.3f}")