<h1>Generate and Improve Image Descriptions</h1>

In this notebook we prompt a model to describe our images and then we ask the model to improve descriptions. (a-few-shots prompt)

This involves using an image as a scenario of survey questions, and piping the answer to a question into another question.

In [None]:
# import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
from edsl import QuestionFreeText

q1 = QuestionFreeText(
    question_name = "describe",
    question_text = """
    I have an image of an object that I would like you to describe in detail. 
    Based on the image provided, please write a description that includes:

    1. The object's appearance, including color, shape, and material.
    2. Any notable design features or patterns.
    3. The object's functionality or potential use.
    4. The overall impression or aesthetic of the object.
    
    Here is the image: {{ image }}. Please make your description as precise and vivid as possible.
    """
)

In [None]:
from edsl import QuestionFreeText

q2 = QuestionFreeText(
    question_name = "improvements",
    question_text = """
    Evaluate the description of this image on a scale from 1 to 10.
    Explain the reasoning behind your evaluation.
    Image: {{ image }}
    Description: {{ describe.answer }}
    """
)

In [None]:
# all images sotred/cached on expectedparrot service.

IMAGE_UUIDS = {
    'cabin_luggage': ['4f41ad44-6472-4403-8e59-0fb0e5eca575', 'e30bbefa-b135-4a46-bf32-ed9437be8733', 'a56079a4-1f50-4019-9344-947101bd3e4b'],
    'packing_cubes': ['7c046f64-1209-447d-a68a-b984f3508e0b', '82ffd7a6-87dc-4365-9c9f-2e76263908be', '851dedbe-3b32-45ca-8c32-4ddc510f7327'],
    'water_bottle': ['2a81aec9-c164-4d29-9281-360547f04ac4', 'd34d3ec1-073d-45b9-baca-a0d3411e9e0c', 'ff0ab897-50b8-458e-bfa5-7550b8a48648']
}

In [None]:
%%time

from edsl import Scenario, ScenarioList, FileStore

def prefetch_images(image_uuids):
    return {
        product: {
            index: FileStore.pull(uuid) for index, uuid in enumerate(uuids)
        }
        for product, uuids in image_uuids.items()
    }

pre_fetched_images = prefetch_images(IMAGE_UUIDS)

sl = ScenarioList([
    Scenario({
        "question_name": f"{product}_{i + 1}",
        "image": pre_fetched_images[product][i]
    })
    for product in IMAGE_UUIDS
    for i in range(3)
])

sl

In [None]:
%%time

from edsl import Survey

survey = Survey(questions = [q1, q2])

In [None]:
from edsl import Model

Model.services()

In [None]:
from edsl import Model, ModelList

ml = ModelList([
  Model("gpt-4o", service_name = "openai", temperature = 1, max_tokens = 5000),
  Model("gpt-5-chat-latest", service_name = "openai", temperature = 1, max_tokens = 5000),
  Model("gemini-2.0-flash", service_name = "google", temperature = 1, maxOutputTokens = 5000),
  Model("gemini-1.5-flash", service_name = "google", temperature = 1, maxOutputTokens = 5000),
])

ml


In [None]:
%%time

results = survey.by(sl).by(ml).run()

In [None]:
results.select('model', 'scenario.question_name', 'answer.*')