In [3]:
import os
from PIL import Image
import torch
from lavis.models import load_model_and_preprocess

# Setup device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and preprocessors
model, vis_processors, _ = load_model_and_preprocess(
    name="blip_caption", model_type="large_coco", is_eval=True, device=device
)

# Path to the folder containing images
image_folder = "C:/Users/yagub/Desktop/social/"

# List all image files in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith(('jpg', 'jpeg', 'png'))]

# Path for the output text file
output_file = os.path.join(image_folder, "all_captions.txt")

# Open the output file in write mode
with open(output_file, "w") as f:
    # Loop through each image file
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        raw_image = Image.open(image_path).convert("RGB")

        # Process the image
        image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)

        # Generate captions
        captions = model.generate({"image": image}, use_nucleus_sampling=True, num_captions=3)

        # Write captions to the output file
        f.write(f"{image_file}:\n")
        for idx, caption in enumerate(captions):
            f.write(f"  {idx + 1}: {caption}\n")
        f.write("\n")

print(f"All captions saved to {output_file}")




All captions saved to C:/Users/yagub/Desktop/social/all_captions.txt
