In [2]:
import base64
import requests

# OpenAI API Key
with open("openai-key.txt", "r") as f:
    api_key = f.readline()

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

In [17]:
def generate(prompt, image_path, verbose = False):
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
                  "detail": "low"
              }
            }
          ]
        }
      ],
      "max_tokens": 128
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload).json()
    if verbose:
        from PIL import Image
        image = Image.open(image_path)
        display(image)
        print(response)
    if "usage" not in response and "error" in response:
        return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}, response["error"]["message"]
    return response["usage"], response["choices"][0]["message"]["content"]

generate("Describe this image", "images_split/20240101_172315_BUT.jpg")

({'prompt_tokens': 95, 'completion_tokens': 92, 'total_tokens': 187},
 'The image shows an illustration of a pair of human legs from the thigh down to the feet. It appears to represent someone walking or in mid-step, with one leg slightly ahead of the other, suggesting motion. The person is not wearing any shoes, and the backdrop is a simple, plain purple color, providing a contrast that highlights the legs. The style of the drawing is not highly detailed but uses clear lines and shapes to represent the form of the legs.')

In [23]:
import os, json
from tqdm import tqdm

## schema [{"image_path": <>, "prompt": <>, "usage": {"prompt_tokens": ...}}]
with open("gpt4-usages.json", "r") as f:
    usages = json.load(f)
    total_usage = sum(x["usage"]["total_tokens"] for x in usages)


outpath = "outputs/right/gpt4-vision-right.json"
try:
    with open(outpath, "r") as f:
        outputs = json.load(f)
except Exception:
    print("starting from zero")
    outputs = []

current_usage = 446
files = [x for x in os.listdir("images_split") if "BUT" in x]
pbar = tqdm(files)
for filename in pbar:
    matching = [o for o in outputs if o["image_path"]==filename and "Rate limit reached" not in o["output"]]
    if matching:
        continue
    prompt = "Describe this image"
    usage, output = generate(prompt, os.path.join("images_split", filename))

    outputs.append({"image_path":filename, "output": output})
    with open(outpath, "w") as f:
        json.dump(outputs, f, indent=2)

    usages.append({"image_path":filename, "prompt": prompt, "usage": usage})
    with open("gpt4-usages.json", "w") as f:
        json.dump(usages, f, indent=2)
    
    current_usage+=usage["total_tokens"]
    total_usage+=usage["total_tokens"]
    pbar.set_postfix({"current_usage": current_usage, "total_usage": total_usage})

    

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 283/283 [00:33<00:00,  8.35it/s, current_usage=1086, total_usage=234076]


In [None]:
# whyfunny: 53930
# punchline: 63560
# left: 
# right: 

In [24]:
redo_files = []
for o in outputs:
    if "Rate limit reached" in o["output"]:
        redo_files.append(o["image_path"])

duplicates = []
for i,o in enumerate(outputs):
    for j in range(i+1, len(outputs)):
        if outputs[j]["image_path"] == o["image_path"]:
            duplicates.append((o["image_path"],i,j))

45
24
307
