In [None]:
import json
import glob
import os
import pandas as pd
from PIL import Image
import base64
import io
from openai import OpenAI
from pydantic import BaseModel
from textwrap import dedent
from tqdm import tqdm, trange
from copy import deepcopy
import random


vmc_bench = "/pasteur2/u/suyc/VLMEval/VLMEvalKit/LMUData/VMCBench-9450.tsv"
vmc_bench_df = pd.read_csv(vmc_bench, sep="\t")
# select category == MMMU-500, MathVista-500, AI2D-500
data = vmc_bench_df[vmc_bench_df["category"].isin(["MMMU-500", "MathVista-500", "AI2D-500"])].to_dict(orient="records")

In [48]:
from anthropic import Anthropic

client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

def process_claude(item):
    question = item["question"]
    correct_choice = item[item["answer"]]
    image = item["image"]
    try:
        if isinstance(eval(image), list):
            image = str(eval(image)[0])
    except:
        pass

    prompt = f"""Generate 3 distractors for the following question with image:
        Question: {question}
        Answer: {correct_choice}
        Output a list of 3 distractors in JSON format only. Do not output anything else.
    """


    response = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/jpeg",
                            "data": image,
                        },
                    },
                    {"type": "text", "text": dedent(prompt)},
                ],
            }
        ],
    )
    output = response.content[0].text
    return output

item = data[0]
output = json.loads(process_claude(item))


In [49]:
output

{'distractors': ['5t^2 [u(t) - u(t - 2)] + 20[u(t - 2) - u(t - 5)] + 15(t - 5)[u(t - 5) - u(t - 7)]',
  '5t^2 [u(t) - u(t - 2)] + 20[u(t - 2) - u(t - 5)] - 30[u(t - 5) - u(t - 7)]',
  '5t [u(t) - u(t - 2)] + 20[u(t - 2) - u(t - 5)] + 15(t - 7)[u(t - 5) - u(t - 7)]']}

In [41]:
from copy import deepcopy

def process_item(item):
    try:
        item = deepcopy(item)
        distractors = process_claude(
            item
        )
        distractors = json.loads(distractors)["distractors"]
        answer = item[item["answer"]]
        options = distractors + [answer]
        random.shuffle(options)
        item["A"], item["B"], item["C"], item["D"] = options
        item["answer"] = "ABCD"[options.index(answer)]
        return item
    except Exception as e:
        print(e)
        return None

process_item(data[4])

{'index': 5,
 'question': 'Adam and Arin Adams have collected their personal asset and liability information and have asked you to put together a balance sheet as of December 31, 2015. The following information is received from the Adams family. <image 1> What was their net working capital (NWC) for the year? (Hint: NWC is the difference between total liquid assets and total current liabilities.)',
 'A': '$3,500',
 'B': '$4,300',
 'C': '$2,100',
 'D': '$1,850',
 'answer': 'C',
 'category': 'MMMU-500',
 'image': "['/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFdAvIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXG

In [42]:
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm

def parallel_judge(data):
    with ProcessPoolExecutor(max_workers=32) as executor:
        results = list(tqdm(executor.map(process_item, data), total=len(data)))
    return results

outputs = parallel_judge(data)

  6%|▋         | 81/1269 [00:08<01:26, 13.73it/s] 

Expecting ':' delimiter: line 5 column 5 (char 73)
Invalid \escape: line 4 column 9 (char 35)


  8%|▊         | 106/1269 [00:10<01:16, 15.10it/s]

Invalid \escape: line 3 column 65 (char 85)


  9%|▉         | 114/1269 [00:11<01:29, 12.94it/s]

Invalid \escape: line 3 column 8 (char 28)


 19%|█▉        | 245/1269 [00:17<00:50, 20.18it/s]

Invalid \escape: line 3 column 7 (char 27)
Expecting ':' delimiter: line 5 column 5 (char 76)


 21%|██        | 265/1269 [00:18<00:48, 20.55it/s]

Invalid \escape: line 3 column 11 (char 33)


 24%|██▎       | 301/1269 [00:22<01:20, 12.02it/s]

Extra data: line 9 column 1 (char 65)
Expecting ':' delimiter: line 5 column 5 (char 122)


 28%|██▊       | 357/1269 [00:24<00:38, 23.74it/s]

Invalid \escape: line 3 column 39 (char 61)


 37%|███▋      | 471/1269 [00:31<01:08, 11.58it/s]

list indices must be integers or slices, not str


 40%|████      | 512/1269 [00:32<00:28, 26.15it/s]

list indices must be integers or slices, not str
list indices must be integers or slices, not str


 41%|████      | 521/1269 [00:33<00:41, 17.94it/s]

list indices must be integers or slices, not str


 45%|████▍     | 566/1269 [00:34<00:34, 20.63it/s]

list indices must be integers or slices, not str


 49%|████▉     | 624/1269 [00:36<00:19, 33.56it/s]

list indices must be integers or slices, not str


 57%|█████▋    | 727/1269 [00:40<00:20, 26.48it/s]

list indices must be integers or slices, not str
list indices must be integers or slices, not str


 84%|████████▍ | 1063/1269 [00:56<00:07, 28.08it/s]

Expecting value: line 6 column 3 (char 71)


100%|██████████| 1269/1269 [01:06<00:00, 19.20it/s]


In [44]:
with open("/pasteur2/u/yuhuiz/CVPR/AutoConverter/data/VMCBench_claude.jsonl", "w") as f:
    for item in outputs:
        if item is not None:
            f.write(json.dumps(item) + "\n")