# 🧠 Flea Market Auto-Cropper & Inventory Matcher
This notebook detects, crops, and labels individual items in a cluttered image using Grounding DINO, Segment Anything (SAM), and BLIP. It then matches each item to your custom inventory list based on visual captions.

In [None]:
!pip install -q git+https://github.com/facebookresearch/segment-anything.git
!pip install -q git+https://github.com/IDEA-Research/GroundingDINO.git
!pip install -q git+https://github.com/salesforce/BLIP.git
!pip install -q transformers diffusers timm sentence-transformers opencv-python


In [None]:
from google.colab import files
from PIL import Image
import os

print("Upload your flea market image:")
uploaded = files.upload()
img_path = list(uploaded.keys())[0]

print("Upload your item list (CSV with title, description, keywords):")
uploaded_csv = files.upload()
item_csv_path = list(uploaded_csv.keys())[0]


In [None]:
# 🔁 Load BLIP captioning and semantic matching models
from transformers import BlipProcessor, BlipForConditionalGeneration
from sentence_transformers import SentenceTransformer, util

# Load BLIP captioning model
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Load SentenceTransformer for semantic matching
semantic_model = SentenceTransformer("all-MiniLM-L6-v2")


In [None]:
# 🔁 Fake crop logic – simulates 4 item crops (until GroundingDINO is added)
from PIL import Image
import os

img = Image.open(img_path).convert("RGB")
width, height = img.size
cropped_dir = "crops"
os.makedirs(cropped_dir, exist_ok=True)

cropped_images = []
for i, (x1, y1, x2, y2) in enumerate([
    (0, 0, width//2, height//2),
    (width//2, 0, width, height//2),
    (0, height//2, width//2, height),
    (width//2, height//2, width, height)
]):
    crop = img.crop((x1, y1, x2, y2))
    crop_path = f"{cropped_dir}/item_{i+1}.jpg"
    crop.save(crop_path)
    cropped_images.append((crop_path, crop))


In [None]:
import pandas as pd
df = pd.read_csv(item_csv_path)

matched_data = []

def caption_image(image):
    inputs = blip_processor(image, return_tensors="pt")
    out = blip_model.generate(**inputs)
    caption = blip_processor.decode(out[0], skip_special_tokens=True)
    return caption

# Compute sentence embeddings for inventory titles
df["embedding"] = df["title"].apply(lambda x: semantic_model.encode(x, convert_to_tensor=True))

for path, image in cropped_images:
    caption = caption_image(image)
    cap_embed = semantic_model.encode(caption, convert_to_tensor=True)

    similarities = df["embedding"].apply(lambda emb: util.cos_sim(cap_embed, emb).item())
    best_match = similarities.idxmax()

    matched_data.append({
        "image_path": path,
        "caption": caption,
        "matched_title": df.loc[best_match, "title"],
        "keywords": df.loc[best_match, "keywords"],
        "description": df.loc[best_match, "description"]
    })

matched_df = pd.DataFrame(matched_data)


In [None]:
matched_df.to_csv("matched_inventory.csv", index=False)
print("✅ Exported matched inventory with captions and titles.")
matched_df.head()