In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging
from typing import Any
from pathlib import Path
import json

from PIL import Image
from google import genai

import tt

LOG = logging.getLogger(__name__)
tt.logging_init()

In [None]:
mobs_path = Path.home() / "src/data/mobs1/640"
image_fname = mobs_path / "06092b21-2024-10-20_22.22.09.png"
image_fname = mobs_path / "cda62cfe-2024-10-20_22.22.31.png"
image640 = Image.open(image_fname)
print(image640.size)
display(image640)

In [None]:
CLASSES = [
    "chicken",
    "cow",
    "creeper",
    "enderman",
    "pig",
    "player",
    "sheep",
    "skeleton",
    "spider",
    "villager",
    "zombie",
]
CLASSES.sort()
CLASSES_MINECRAFT = [f"minecraft {x}" for x in CLASSES]

In [None]:
gf = tt.GeminiFile()
# gf.upload_dir(mobs_path)
gf.sync()
print(gf.gfiles)

In [None]:
# model = "gemini-2.5-flash"
# model = "gemini-2.5-flash-lite"

"""
TODO:
https://ai.google.dev/gemini-api/docs/batch-api
The Gemini Batch API is designed to process large volumes of requests
asynchronously at 50% of the standard cost. The target turnaround time is 24
hours, but in majority of cases, it is much quicker.
"""

def gemini_detect(
    image: Image.Image | genai.types.File,
    prompt: str,
    model: str = "gemini-2.5-flash-lite",
    tempurature: float | None = 0.0,
    seed: int | None = 325,
) -> list[tt.BBox]:
    client = genai.Client()
    config = genai.types.GenerateContentConfig(
        response_mime_type="application/json",
        thinking_config=genai.types.ThinkingConfig(thinking_budget=0),
        temperature=tempurature,
    )
    response = client.models.generate_content(
        model="gemini-2.5-flash", contents=[image, prompt], config=config
    )
    assert response.text is not None
    bounding_boxes: list[dict[str, Any]] = json.loads(response.text)
    return tt.gemini_to_bboxes(bounding_boxes)

In [None]:
prompt_short = f"""Detect {", ".join(CLASSES)}."""

image_detect = image640.copy()

bbox_short = gemini_detect(image_detect, prompt_short)
print(bbox_short)

In [None]:
display(tt.bbs_to_df(bbox_short))

In [None]:
# plot_bounding_boxes(image640, bbox_short)
display(tt.plot_bb(image640, bbox_short, CLASSES))

In [None]:
def detect_gfiles(gf: genai.types.File) -> tuple[Image.Image, str]:
    assert isinstance(gf.display_name, str)
    image_file = gf.display_name
    bbs = gemini_detect(gf, prompt_short)
    ann_image = tt.plot_bb(Image.open(image_file), bbs, CLASSES)
    return ann_image, image_file
    
gf.sync()
tt.InferViewer[genai.types.File](detect_gfiles, gf.gfiles).show_widget()

In [None]:
display(bbs)
display(tt.bbs_to_df(tt.gemini_to_bboxes(bbs)))
display(tt.plot_bb(Image.open(gfile.display_name), tt.gemini_to_bboxes(bbs), CLASSES))

In [None]:
Image.open(gfile.display_name)