In [1]:
import base64
import requests

# OpenAI API Key
with open("openai-key.txt", "r") as f:
    api_key = f.readline()

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

In [2]:
def generate(prompt, image_path, verbose = False):
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
                  "detail": "low"
              }
            }
          ]
        }
      ],
      "max_tokens": 128
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload).json()
    if verbose:
        from PIL import Image
        image = Image.open(image_path)
        display(image)
        print(response)
    if "usage" not in response and "error" in response:
        return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}, response["error"]["message"]
    return response["usage"], response["choices"][0]["message"]["content"]

generate("Describe this image", "images_split/20240101_172315_BUT.jpg")

({'prompt_tokens': 95, 'completion_tokens': 82, 'total_tokens': 177},
 'The image shows an illustration of a pair of human legs walking. One leg is in front of the other, suggesting a walking motion. The legs are bare, and the feet appear to be barefoot as well. The backdrop is a plain, solid color, providing a simple contrast to the legs. It seems to be a simplistic and stylized representation, possibly from a book or a minimalistic art piece.')

In [3]:
import os, json
from tqdm import tqdm
from validate import validate_file

## schema [{"image_path": <>, "prompt": <>, "usage": {"prompt_tokens": ...}}]
with open("gpt4-usages.json", "r") as f:
    usages = json.load(f)
    total_usage = sum(x["usage"]["total_tokens"] for x in usages)

outpath = "outputs/right/gpt4-vision-right.json"
try:
    with open(outpath, "r") as f:
        outputs = json.load(f)
except Exception:
    print("starting from zero")
    outputs = {}

current_usage = 0
files = [x for x in os.listdir("images_split") if "BUT" in x]
redo_files = [path for path,output in outputs.items() if "Rate limit reach" in output]
print(f"Redoing {len(redo_files)} files")
pbar = tqdm(files)
for filename in pbar:
    if filename in outputs and filename not in redo_files:
        continue
    prompt = "Describe this image"
    usage, output = generate(prompt, os.path.join("images_split", filename))

    outputs[filename] = output
    with open(outpath, "w") as f:
        json.dump(outputs, f, indent=4)

    usages.append({"image_path":filename, "prompt": prompt, "usage": usage})
    with open("gpt4-usages.json", "w") as f:
        json.dump(usages, f, indent=2)
    
    current_usage+=usage["total_tokens"]
    total_usage+=usage["total_tokens"]
    pbar.set_postfix({"current_usage": current_usage, "total_usage": total_usage})

with open(outpath, "w") as f:
    json.dump(outputs, f, indent=4)
if not validate_file(outpath):
    print("Validation failed!")    

Redoing 24 files


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 283/283 [03:00<00:00,  1.57it/s, current_usage=5278, total_usage=239354]

File outputs/right/gpt4-vision-right.json is valid.





In [None]:
# whyfunny: 53930
# punchline: 63560
# left: ~60000
# right: ~60000