In [1]:
"""
Install an additional SDK for JSON schema support Google AI Python SDK

$ pip install google.ai.generativelanguage
"""

import os
import json
import google.generativeai as genai
from google.ai.generativelanguage_v1beta.types import content

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())


  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

image_path = os.environ["IMAGE_PATH"]


def upload_to_gemini(path, mime_type=None):
    """Uploads the given file to Gemini.

    See https://ai.google.dev/gemini-api/docs/prompting_with_media
    """
    file = genai.upload_file(path, mime_type=mime_type)
    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
    return file


# Create the model
generation_config = {
    "temperature": 0.1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_schema": content.Schema(
        type=content.Type.ARRAY,
        items=content.Schema(
            type=content.Type.OBJECT,
            enum=[],
            required=["title", "color", "coords"],
            properties={
                "title": content.Schema(
                    type=content.Type.STRING,
                    description="Popular name of the pepper plant.",
                ),
                "color": content.Schema(
                    type=content.Type.STRING,
                    description="A color name propr for the annotation box.",
                ),
                "coords": content.Schema(
                    type=content.Type.ARRAY,
                    description="A list of 4 integers to locate the 4 corners [y1, x1, y2, x2].",
                    items=content.Schema(type=content.Type.INTEGER),
                ),
            },
        ),
    ),
    "response_mime_type": "application/json",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
    system_instruction="""
Given the image you will receive, return bounding boxes as a JSON array,
  return the typical name of the pepper, such as bell pepper, jalapeno, etc.
  and the color of the pepper, such as red, green, yellow, etc.
  """,
)

# TODO Make these files available on the local file system
# You may need to update the file paths
files = [
    upload_to_gemini(image_path, mime_type="image/png"),
]

chat_session = model.start_chat(
    history=[
        {
            "role": "user",
            "parts": [
                files[0],
            ],
        },
    ]
)

response = chat_session.send_message(
    "Return bounding boxes as a JSON array, tagging peppers by their name and colour suggestion."
)
text_dict = json.loads(response.parts[0].text)

with open("output.json", "w") as f:
    f.write(json.dumps(text_dict, indent=2))

Uploaded file 'image.png' as: https://generativelanguage.googleapis.com/v1beta/files/meuazhvz6x5z
