In [1]:
import random

index_to_letter = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E'}

In [2]:
def replace_a_or_the(word, replace='a'):
    if replace == 'a'and word.startswith("a "):
        word = word.replace("a ", "the ", 1)
    elif replace == 'the' and word.startswith("the "):
        word = word.replace("the ", "a ", 1)
    return word 

In [3]:
import json
with open('verb_candidates.json', 'r') as f:
    verb_json_list = json.load(f)

In [4]:
# options are verb phrases
verb_question_bundle = {}
for sentence in verb_json_list:
    group_id = sentence['gid']
    if group_id not in verb_question_bundle:
        verb_question_bundle[group_id] = [sentence]
    else:
        verb_question_bundle[group_id].append(sentence)

In [5]:
verb_questions = []
for group_id, sent_list in verb_question_bundle.items():
    question = {}
    ppl_vp = {}
    tmp_loc_list = []
    id, gt_subj, gt_verb, gt_loc, dt_verb = None, None, None, None, None
    for sent in sent_list:
        if sent['tag'] == 'ground-truth':
            id = sent['gid']
            gt_subj = sent['subj']
            gt_verb = sent['vp']
            gt_loc = sent['loc']
        elif sent['tag'] == 'distractor':
            dt_verb = sent['vp']
        ppl_vp[sent['vp']] = sent['ppl']
        tmp_loc_list.append(sent['vp'])
    
    assert(any([id, gt_subj, gt_verb, gt_loc, dt_verb]) != None)
    question['id'] = id
    question['subj'] = gt_subj
    question['loc'] = gt_loc
    question['gt_vp'] = gt_verb
    question['dt_vp'] = dt_verb
    question['ppl'] = ppl_vp

    assert(len(tmp_loc_list) == 5)
    random.shuffle(tmp_loc_list) # we shuffle the option's list
    ans_idx = tmp_loc_list.index(gt_verb)

    gt_subj_the = replace_a_or_the(gt_subj, replace='a')
    gt_subj_a = replace_a_or_the(gt_subj, replace='the')

    # multiple choice question
    choice = f"What is {gt_subj_the} doing {gt_loc}?\nA. {tmp_loc_list[0]}\nB. {tmp_loc_list[1]}\nC. {tmp_loc_list[2]}\nD. {tmp_loc_list[3]}\nE. {tmp_loc_list[4]}"
    # binary question - true
    binary_gt = f"Does the image show that {gt_subj_a} {gt_verb} {gt_loc}?"
    # binary question - false
    binary_dt = f"Does the image show that {gt_subj_a} {dt_verb} {gt_loc}?"
    # open question
    open_question = f"What is {gt_subj_the} doing {gt_loc}?"

    question['choice'] = choice
    question['choice answer'] = index_to_letter[ans_idx]
    question['binary-yes'] = binary_gt
    question['binary-yes answer'] = "Yes."
    question['binary-no'] = binary_dt
    question['binary-no answer'] = "No."
    question['open'] = open_question
    question['open answer'] = gt_verb
    question['image prompt'] = f"Generate an image of {gt_subj} {gt_verb} {gt_loc}."
    
    verb_questions.append(question)

with open('verb_questions.json', 'w') as f:
    json.dump(verb_questions, f)

In [6]:
with open('verb_questions-h.txt', 'w') as outfile:
    with open('verb_questions.json', 'r') as infile:
        j_list = json.load(infile)
        for question in j_list:
            outfile.write(f"Question{question['id']}:\n\n{question['image prompt']}\n{question['choice']}\n")
            outfile.write(f"Answer: {question['choice answer']}\n\n")
            outfile.write(f"{question['binary-yes']}\nAnswer: {question['binary-yes answer']}\n")
            outfile.write(f"{question['binary-no']}\nAnswer: {question['binary-no answer']}\n\n")
            outfile.write(f"{question['open']}\nAnswer: {question['open answer']}\n\n")
            outfile.write(f"PPL: {question['ppl']}\n\n")
            outfile.write("="*10)
            outfile.write('\n')

In [7]:
with open('location_candidates.json', 'r') as f:
    location_json_list = json.load(f)

In [8]:
# options are verb phrases
loc_question_bundle = {}
for sentence in location_json_list:
    group_id = sentence['gid']
    if group_id not in loc_question_bundle:
        loc_question_bundle[group_id] = [sentence]
    else:
        loc_question_bundle[group_id].append(sentence)

In [9]:
loc_questions = []
for group_id, sent_list in loc_question_bundle.items():
    question = {}
    ppl_loc = {}
    tmp_loc_list = []
    id, gt_subj, gt_verb, gt_loc, dt_loc = None, None, None, None, None
    for sent in sent_list:
        if sent['tag'] == 'ground-truth':
            id = sent['gid']
            gt_subj = sent['subj']
            gt_verb = sent['vp']
            gt_loc = sent['loc']
        elif sent['tag'] == 'distractor':
            dt_loc = sent['loc']
        ppl_loc[sent['loc']] = sent['ppl']
        tmp_loc_list.append(sent['loc'])
    
    assert(any([id, gt_subj, gt_verb, gt_loc, dt_loc]) != None)
    question['id'] = id
    question['subj'] = gt_subj
    question['vp'] = gt_verb
    question['gt_loc'] = gt_loc
    question['dt_loc'] = dt_loc
    question['ppl'] = ppl_loc

    assert(len(tmp_loc_list) == 5)
    random.shuffle(tmp_loc_list) # we shuffle the option's list
    ans_idx = tmp_loc_list.index(gt_loc)

    gt_subj_the = replace_a_or_the(gt_subj, 'a')
    gt_subj_a = replace_a_or_the(gt_subj, 'the')
    
    choice = f"Where does {gt_subj_the} {gt_verb}?\nA. {tmp_loc_list[0]}\nB. {tmp_loc_list[1]}\nC. {tmp_loc_list[2]}\nD. {tmp_loc_list[3]}\nE. {tmp_loc_list[4]}"
    # binary question - true
    binary_gt = f"Does the image show that {gt_subj_a} {gt_verb} {gt_loc}?"
    # binary question - false
    binary_dt = f"Does the image show that {gt_subj_a} {gt_verb} {dt_loc}?"
    # open question
    open_question = f"Where does {gt_subj_the} {gt_verb}?"

    question['choice'] = choice
    question['choice answer'] = index_to_letter[ans_idx]
    question['binary-yes'] = binary_gt
    question['binary-yes answer'] = "Yes."
    question['binary-no'] = binary_dt
    question['binary-no answer'] = "No."
    question['open'] = open_question
    question['open answer'] = gt_loc
    question['image prompt'] = f"Generate an image of {gt_subj} {gt_verb} {gt_loc}."

    loc_questions.append(question)

with open('location_questions.json', 'w') as f:
    json.dump(loc_questions, f)

In [11]:
with open('location_questions-h.txt', 'w') as outfile:
    with open('location_questions.json', 'r') as infile:
        j_list = json.load(infile)
        for question in j_list:
            outfile.write(f"Question{question['id']}:\n\n{question['image prompt']}\n{question['choice']}\n")
            outfile.write(f"Answer: {question['choice answer']}\n\n")
            outfile.write(f"{question['binary-yes']}\nAnswer: {question['binary-yes answer']}\n")
            outfile.write(f"{question['binary-no']}\nAnswer: {question['binary-no answer']}\n\n")
            outfile.write(f"{question['open']}\nAnswer: {question['open answer']}\n\n")
            outfile.write(f"PPL: {question['ppl']}\n\n")
            outfile.write("="*10)
            outfile.write('\n')