In [2]:
from model import Icon, Dataset
from llm import GPT, Claude, Llava

import openai
import anthropic
from ollama import Client

from dotenv import load_dotenv
from IPython.core.display import HTML

# import evaluate
# import pandas as pd

# from bert_score import BERTScorer
# from scipy.stats import ttest_rel

# Datasets

### Load Datasets from Saved Pickle File

In [3]:
dataset = Dataset("")
dataset.load_pkl("ablation_zero-shot.pkl")

In [4]:
print(len(dataset.icons))

408


In [5]:
dataset.save_json("../outputs/zero-shot.json")

In [6]:
dataset.save_pkl("ablation_zero-shot.pkl")

# Generate Descriptions

## GPT-4o, Claude 3.5, LLaVA

In [5]:
load_dotenv() # API keys are stored in an environment variable
openai_client = openai.OpenAI()
anth_client = anthropic.Anthropic()
llava_client = Client() # pass host=<ollama_api_endpoint> if necessary

gpt = GPT(openai_client, "gpt-4o")
claude = Claude(anth_client, "claude-3-5-sonnet-20240620")
llava = Llava(llava_client, "llava:34b")

In [6]:
TREATMENTS = {
    "image-and-context": [
        ("You are an AI visual assistant specialized in interpreting icons displayed on the dashboard of a vehicle. "
        "An icon communicates important information about the vehicle to the driver. "
        "For example, a particular icon may indicate that a seatbelt is not fastened. "
        "You are seeing an image of a single dashboard icon. "),
        ("Briefly describe the dashboard icon depicted in the image, focusing on the visual content of the image and meaning of the icon. "
        "Limit your response to 2 sentences. The first sentence should describe the visual content. The second sentence should describe the icon's meaning."
        "Format your response as a JSON object with the following keys: 'visual_content', 'meaning'. "
        "Respond only with the JSON object. "
        "The image has the following associated text: "),
        True,
        True
    ], 
    "image-only": [
        ("You are an AI visual assistant specialized in interpreting icons displayed on the dashboard of a vehicle. "
        "An icon communicates important information about the vehicle to the driver. "
        "For example, a particular icon may indicate that a seatbelt is not fastened. "
        "You are seeing an image of a single dashboard icon. "),
        ("Briefly describe the dashboard icon depicted in the image, focusing on the visual content of the image and meaning of the icon. "
        "Limit your response to 2 sentences. The first sentence should describe the visual content. The second sentence should describe the icon's meaning."
        "Format your response as a JSON object with the following keys: 'visual_content', 'meaning'. "
        "Respond only with the JSON object. "),
        True,
        False
    ], 
    "context-only": [
        ("You are an AI visual assistant specialized in interpreting icons displayed on the dashboard of a vehicle. "
        "An icon communicates important information about the vehicle to the driver. "
        "For example, a particular icon may indicate that a seatbelt is not fastened. "
        "Imagine you are seeing an image of a single dashboard icon that has an associated text description. "),
        ("Briefly describe the dashboard icon depicted in the image, focusing on the visual content of the image and meaning of the icon. "
        "Limit your response to 2 sentences. The first sentence should describe the visual content. The second sentence should describe the icon's meaning."
        "Format your response as a JSON object with the following keys: 'visual_content', 'meaning'. "
        "Respond only with the JSON object. "
        "The image has the following associated text: "),
        False,
        True        
    ]
}

### Treatment #1: k=0,1,3,5 (Image + Context)

In [7]:
for treatment, p in TREATMENTS.items():
    instructions, prompt, use_image, use_context = p
    gpt.get_captions(
        train = None,
        test = dataset,
        k = 0,
        treatment = f"gpt-4_{treatment}",
        instructions = instructions,
        prompt = prompt,
        use_image = use_image,
        use_context = use_context,
        max_tokens=1000,
        rerun=True
    )
    dataset.save_pkl("ablation_zero-shot.pkl")
    dataset.save_json("../outputs/zero-shot.json")

In [None]:
for treatment, p in TREATMENTS.items():
    instructions, prompt, use_image, use_context = p
    llava.get_captions(
        train = None,
        test = dataset,
        k = 0,
        treatment = f"llava_{treatment}",
        instructions = instructions,
        prompt = prompt,
        use_image = use_image,
        use_context = use_context,
        max_tokens=1000,
        rerun=True
    )
    dataset.save_pkl("ablation_zero-shot.pkl")
    dataset.save_json("../outputs/zero-shot.json")

In [None]:
for treatment, p in TREATMENTS.items():
    instructions, prompt, use_image, use_context = p
    claude.get_captions(
        train = None,
        test = dataset,
        k = 0,
        treatment = f"claude-3-5_{treatment}",
        instructions = instructions,
        prompt = prompt,
        use_image = use_image,
        use_context = use_context,
        max_tokens=1000,
        rerun=True
    )
    dataset.save_pkl("ablation_zero-shot.pkl")
    dataset.save_json("../outputs/zero-shot.json")