In [None]:
import os
import pandas as pd
from PIL import Image
from transformers import AutoProcessor, BlipForConditionalGeneration

# Initialize the processor and model
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Define the folder containing images
image_folder = "image_folder path"

# Lists to store file names and captions
file_names = []
captions = []

# Loop through all files in the image folder
for image_filename in os.listdir(image_folder):
    if image_filename.endswith(('.png', '.jpg', '.jpeg')):  # Check for image files
        image_path = os.path.join(image_folder, image_filename)
        image = Image.open(image_path)
        text = " "

        # Process the image
        inputs = processor(images=image, text=text, return_tensors="pt")
        outputs = model.generate(**inputs)

        # Decode the output to get the caption
        caption = processor.decode(outputs[0], skip_special_tokens=True)

        # Save the filename and caption to the lists
        file_names.append(image_filename)
        captions.append(caption)

# Create a DataFrame from the lists
df = pd.DataFrame({
    "filename": file_names,
    "caption": captions
})

# Save the DataFrame to a CSV file
output_csv = "image_captions.csv"
df.to_csv(output_csv, index=False)

print(f"Captions saved to {output_csv}")
