# Install dependencies


In [5]:
!pip -q install open_clip_torch==2.24.0 timm==1.0.9 scikit-image==0.24.0 pillow==10.4.0


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.0/15.0 MB[0m [31m76.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m102.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h

# Imports


In [7]:
import io, numpy as np, PIL.Image as Image, torch
import open_clip
from google.colab import files
from skimage.segmentation import felzenszwalb
from skimage.util import img_as_float
from skimage.transform import resize

# Load dining hall menu data


In [25]:
# TODO: From image metadata, set dining_hall_id based on location, and then date and time
dining_hall_id = 2 # south
date = "09-25-2025"
meal = "Lunch"

# TODO: Load menu from API
MENU_ITEMS = [
"Chef's Choice Soup du Jour",
"Fish of the Day w/ Shrimp Scampi Butter",
"Cheese Pizza",
"Pepperoni Pizza",
"Special Pizza",
"Marinara Sauce",
"Chefs Choice Vegetable Stir Fry",
"General Tso's Chicken",
"Sesame Noodles",
"White Rice",
"Five Spice Beef Tips",
"Green Beans",
"Halal Rice & Orzo Pilaf",
"Beef Burger No Bun",
"Beet and Kale Burger w/ Bun",
"Grilled Cheese",
"Grilled Chicken Breast",
"Grilled Guinness Bratwurst",
"Straight Cut French Fries",
"Caramel Coconut Cookie Bar",
"Snickerdoodle Cookies",
"Carrots Roasted With Garlic",
"Creole Tofu",
"Outback Chicken with Apple Smoked Bacon",
"Rice Pilaf",
"Steamed Broccoli",
"Whipped Golden Potatoes w/ Roasted Garlic and Rosemary",
]


# Build output prompts


In [26]:
PROMPTS = [
    "a photo of {}",
    "a plate of {}",
    "{} on a plate",
    "cafeteria serving of {}",
    "dining hall style {}",
]

def _prep_text_variants(items, prompts):
    variants, ptrs = [], []
    for idx, item in enumerate(items):
        for p in prompts:
            variants.append(p.format(item))
            ptrs.append(idx)
    return variants, np.array(ptrs)

ITEM_TEXT_VARIANTS, ITEM_POINTERS = _prep_text_variants(MENU_ITEMS, PROMPTS)

DEVICE = 'cpu'

# Load CLIP model

In [10]:
model, _, preprocess = open_clip.create_model_and_transforms(
    "ViT-B-32", pretrained="laion2b_s34b_b79k", device=DEVICE
)

tokenizer = open_clip.get_tokenizer("ViT-B-32")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


open_clip_pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

# Tokenize prompts

In [27]:
with torch.no_grad():
    text_tokens = tokenizer(ITEM_TEXT_VARIANTS).to(DEVICE)
    text_emb = model.encode_text(text_tokens)
    text_emb = text_emb / text_emb.norm(dim=-1, keepdim=True)

# Helper functions

In [28]:
def clip_image_embed(pil_img):
    img_in = preprocess(pil_img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        img_emb = model.encode_image(img_in)
        return img_emb / img_emb.norm(dim=-1, keepdim=True)

def rank_items_from_embedding(img_emb):
    sims = (img_emb @ text_emb.T).squeeze(0)
    per_item = np.full(len(MENU_ITEMS), -1e9, dtype=np.float32)
    sims_cpu = sims.detach().float().cpu().numpy()
    for i in range(len(MENU_ITEMS)):
        per_item[i] = sims_cpu[ITEM_POINTERS == i].max()
    z = (per_item - per_item.mean()) / (per_item.std() + 1e-6)
    probs = np.exp(z) / np.exp(z).sum()
    return np.argsort(-probs), probs

def pretty_print(order, probs, header="Predictions"):
    print(f"\n{header}\n" + "-"*len(header))
    for rank, idx in enumerate(order, 1):
        print(f"{rank}. {MENU_ITEMS[idx]} (score ~ {probs[idx]:.3f})")

In [29]:
img = Image.open("south_2025-09-25.jpg").convert("RGB")

emb = clip_image_embed(img)
order, probs = rank_items_from_embedding(emb)
pretty_print(order, probs, "Predicted menu items (full image)")

print("\nLikely on the plate:", [MENU_ITEMS[i] for i in chosen])


Predicted menu items (full image)
---------------------------------
1. General Tso's Chicken (score ~ 0.320)
2. Creole Tofu (score ~ 0.109)
3. White Rice (score ~ 0.102)
4. Fish of the Day w/ Shrimp Scampi Butter (score ~ 0.054)
5. Five Spice Beef Tips (score ~ 0.046)
6. Halal Rice & Orzo Pilaf (score ~ 0.045)
7. Rice Pilaf (score ~ 0.035)
8. Chefs Choice Vegetable Stir Fry (score ~ 0.032)
9. Grilled Chicken Breast (score ~ 0.026)
10. Sesame Noodles (score ~ 0.025)
11. Steamed Broccoli (score ~ 0.023)
12. Green Beans (score ~ 0.023)
13. Outback Chicken with Apple Smoked Bacon (score ~ 0.023)
14. Marinara Sauce (score ~ 0.019)
15. Chef's Choice Soup du Jour (score ~ 0.014)
16. Special Pizza (score ~ 0.012)
17. Whipped Golden Potatoes w/ Roasted Garlic and Rosemary (score ~ 0.011)
18. Snickerdoodle Cookies (score ~ 0.011)
19. Grilled Guinness Bratwurst (score ~ 0.011)
20. Straight Cut French Fries (score ~ 0.010)
21. Beet and Kale Burger w/ Bun (score ~ 0.009)
22. Caramel Coconut Cookie