In [7]:
import base64
import requests

# OpenAI API Key
with open("openai-key.txt", "r") as f:
    api_key = f.readline()

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

In [4]:
def generate(prompt, image_path, verbose = False):
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
                  "detail": "low"
              }
            }
          ]
        }
      ],
      "max_tokens": 128
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload).json()
    if verbose:
        print(response)
    return response["usage"], response["choices"][0]["message"]["content"]

generate("This meme made me laugh. What is the punchline in this meme?", "images/20240101_172315.jpg", True)

{'id': 'chatcmpl-8hN5zh6TQZl6QEJiFm8AfxqQlw4Aj', 'object': 'chat.completion', 'created': 1705347575, 'model': 'gpt-4-1106-vision-preview', 'usage': {'prompt_tokens': 107, 'completion_tokens': 113, 'total_tokens': 220}, 'choices': [{'message': {'role': 'assistant', 'content': 'The meme shows a book with two pages open. On the left page, there is an illustration of legs wearing high heels with the word "YES" above it, and on the right page, an illustration of legs without high heels, positioned as if they are still wearing heels, with the word "BUT" above it. The humor here lies in the expectation versus reality motif where someone\'s muscle memory or habits persist even when the usual context (like high heels) is removed, leading to a funny and relatable situation for those who often wear high heels.'}, 'finish_reason': 'stop', 'index': 0}]}


({'prompt_tokens': 107, 'completion_tokens': 113, 'total_tokens': 220},
 'The meme shows a book with two pages open. On the left page, there is an illustration of legs wearing high heels with the word "YES" above it, and on the right page, an illustration of legs without high heels, positioned as if they are still wearing heels, with the word "BUT" above it. The humor here lies in the expectation versus reality motif where someone\'s muscle memory or habits persist even when the usual context (like high heels) is removed, leading to a funny and relatable situation for those who often wear high heels.')

In [5]:
import os, json
from tqdm import tqdm

## schema [{"image_path": <>, "prompt": <>, "usage": {"prompt_tokens": ...}}]
with open("gpt4-usages.json", "r") as f:
    usages = json.load(f)
    total_usage = sum(x["usage"]["total_tokens"] for x in usages)


outpath = "outputs/punchline/gpt4-vision-punchline.json"
try:
    with open(outpath, "r") as f:
        outputs = json.load(f)
except Exception:
    print("starting from zero")
    outputs = []

current_usage = 14996
pbar = tqdm(os.listdir("images"))
for filename in pbar:
    if filename in (o["image_path"] for o in outputs):
        continue
    prompt = "This meme made me laugh. What is the punchline in this meme?"
    usage, output = generate(prompt, os.path.join("images", filename))

    outputs.append({"image_path":filename, "output": output})
    with open(outpath, "w") as f:
        json.dump(outputs, f, indent=2)

    usages.append({"image_path":filename, "prompt": prompt, "usage": usage})
    with open("gpt4-usages.json", "w") as f:
        json.dump(usages, f, indent=2)
    
    current_usage+=usage["total_tokens"]
    total_usage+=usage["total_tokens"]
    pbar.set_postfix({"current_usage": current_usage, "total_usage": total_usage})

    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 283/283 [26:22<00:00,  5.59s/it, current_usage=63850, total_usage=117490]
