In [None]:
!pip install tensorflow numpy opencv-python


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
import numpy as np
import os

# Load ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# Function to preprocess the image
def preprocess_img(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

# Function to extract features from the image
def extract_features(img_path):
    img = preprocess_img(img_path)
    features = model.predict(img)
    return features

# Example: Extract features from a sample image
features = extract_features('sample.jpg')
print(features.shape)  # Should output (1, 2048)


In [None]:
## Prepare the Captions

In [None]:
import string
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample captions
captions = ["A cat is sitting on a mat", "A dog is playing with a ball"]

# Preprocess captions
def preprocess_captions(captions):
    captions = [caption.lower().translate(str.maketrans('', '', string.punctuation)) for caption in captions]
    return captions

captions = preprocess_captions(captions)

# Tokenize captions
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(captions)
sequences = tokenizer.texts_to_sequences(captions)

# Pad sequences
max_length = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

print(sequences)


In [None]:
## BUILD THE MODEL

In [None]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, add
from tensorflow.keras.models import Model

# Image feature extractor model
inputs1 = Input(shape=(2048,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)

# Sequence model
inputs2 = Input(shape=(max_length,))
se1 = Embedding(input_dim=5000, output_dim=256, mask_zero=True)(inputs2)
se2 = LSTM(256)(se1)

# Decoder model
decoder1 = add([fe2, se2])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(5000, activation='softmax')(decoder2)

model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()


In [None]:
## TRAIN THE MODEL

In [None]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, add
from tensorflow.keras.models import Model

# Image feature extractor model
inputs1 = Input(shape=(2048,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)

# Sequence model
inputs2 = Input(shape=(max_length,))
se1 = Embedding(input_dim=5000, output_dim=256, mask_zero=True)(inputs2)
se2 = LSTM(256)(se1)

# Decoder model
decoder1 = add([fe2, se2])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(5000, activation='softmax')(decoder2)

model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()


In [None]:
## GENERATE CAPTIONS

In [None]:
def generate_caption(model, img_features, tokenizer, max_length):
    in_text = 'startseq'
    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([img_features, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word[yhat]
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text

# Generate caption for a new image
new_features = extract_features('new_image.jpg')
caption = generate_caption(model, new_features, tokenizer, max_length)
print("Generated Caption:", caption)
