# Images
Experimented with BLIP to create captions for images. Though our text data is pretty comprehensive and we don't _need_ image data, the caption from the images will likely provide additional context sometimes not explicitly written in metadata.

In [14]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import requests

# Load once at the top-level to avoid re-downloading models
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def generate_caption(image_url: str) -> str:
    """Given an image URL, returns a caption generated by BLIP."""
    try:
        image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
        inputs = processor(images=image, return_tensors="pt")
        output = model.generate(**inputs)
        caption = processor.decode(output[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        return f"Error processing image: {e}"


![bra](https://m.media-amazon.com/images/I/51mCnuqW6HL._AC_.jpg)

In [2]:
generate_caption("https://m.media-amazon.com/images/I/51mCnuqW6HL._AC_.jpg")

'a woman wearing a white bra with floral print'

![socks](https://m.media-amazon.com/images/I/41wv1FeF3fL._AC_.jpg)


In [3]:
generate_caption("https://m.media-amazon.com/images/I/41wv1FeF3fL._AC_.jpg")

'a pair of white socks with red and black accents'

![band](https://m.media-amazon.com/images/I/51cQ43xwDoL._AC_.jpg)


In [8]:
generate_caption("https://m.media-amazon.com/images/I/51cQ43xwDoL._AC_.jpg")

'the watch strap is made from woven fabric'

In [3]:
import pandas as pd

In [17]:
file_path = "image_urls.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,image_url,primary_keys
0,,['ekouaer-womens-long-nightgown-short-sleeve-n...
1,https://m.media-amazon.com/images/I/01+4kROOdv...,['aviatrix-mens-boys-us-air-g-force-pilot-blac...
2,https://m.media-amazon.com/images/I/01+5esf5ol...,['women-zip-up-hoodies-casual-sweatshirts-drop...
3,https://m.media-amazon.com/images/I/01+7HdDcbv...,['haola-womens-casual-print-loose-crewneck-sho...
4,https://m.media-amazon.com/images/I/01+K2hCumd...,['pin-high-ricardo-dry-fit-high-performance-go...


In [20]:
import os
import json
def process_images(df: pd.DataFrame, start_index: int = 0, output_dir: str = "image_captions"):
    os.makedirs(output_dir, exist_ok=True)

    for i, row in df.iloc[start_index:].iterrows():
        image_url = row["image_url"]
        primary_keys = row["primary_keys"]

        caption = generate_caption(image_url)

        result = {
            "image_url": image_url,
            "primary_keys": primary_keys,
            "caption": caption
        }

        output_path = os.path.join(output_dir, f"caption_{i}.json")
        with open(output_path, "w") as f:
            json.dump(result, f, indent=2)

        print(f"[{i}] Wrote caption to {output_path}")

In [21]:
# Example usage:
# df = pd.read_json("grouped_images.json")  # Make sure it has image_url + primary_keys columns
process_images(df, start_index=10)

[10] Wrote caption to image_captions/caption_10.json
[11] Wrote caption to image_captions/caption_11.json
[12] Wrote caption to image_captions/caption_12.json
[13] Wrote caption to image_captions/caption_13.json
[14] Wrote caption to image_captions/caption_14.json
[15] Wrote caption to image_captions/caption_15.json
[16] Wrote caption to image_captions/caption_16.json
[17] Wrote caption to image_captions/caption_17.json
[18] Wrote caption to image_captions/caption_18.json
[19] Wrote caption to image_captions/caption_19.json
[20] Wrote caption to image_captions/caption_20.json
[21] Wrote caption to image_captions/caption_21.json
[22] Wrote caption to image_captions/caption_22.json
[23] Wrote caption to image_captions/caption_23.json
[24] Wrote caption to image_captions/caption_24.json
[25] Wrote caption to image_captions/caption_25.json
[26] Wrote caption to image_captions/caption_26.json
[27] Wrote caption to image_captions/caption_27.json
[28] Wrote caption to image_captions/caption_2

KeyboardInterrupt: 

In [22]:
len(df)

651148