In [1]:
import json
from pprint import pprint

with open("obqa.jsonl", "w") as fout:
    for split in ["dev", "test", "train"]:
        with open(f"OBQA/{split}.jsonl") as f:
            for line in f:
                q = json.loads(line.strip())
                q["split"] = split
                fout.write(json.dumps(q) + "\n")

questions = []
with open("obqa.jsonl") as f:
    for line in f:
        q = json.loads(line.strip())
        questions.append(q)
print(len(questions))
pprint(questions[:3])

with open("question_stems.txt", "w") as f:
    for q in questions:
        f.write(q["question"]["stem"] + "\n")

5957
[{'answerKey': 'A',
  'id': '8-376',
  'question': {'choices': [{'label': 'A', 'text': 'Deep sea animals'},
                           {'label': 'B', 'text': 'fish'},
                           {'label': 'C', 'text': 'Long Sea Fish'},
                           {'label': 'D', 'text': 'Far Sea Animals'}],
               'stem': 'Frilled sharks and angler fish live far beneath the '
                       'surface of the ocean, which is why they are known as'},
  'split': 'dev'},
 {'answerKey': 'D',
  'id': '7-57',
  'question': {'choices': [{'label': 'A',
                            'text': 'is standard weight and size'},
                           {'label': 'B',
                            'text': 'is the opposite of variable'},
                           {'label': 'C', 'text': 'only needs a few'},
                           {'label': 'D', 'text': 'uses what it needs'}],
               'stem': 'Gas can fill any container it is given, and liquid'},
  'split': 'dev'},
 {'answerKey':

In [2]:
import re 
import string
from collections import defaultdict

import spacy 
from tqdm.notebook import tqdm
nlp = spacy.load("en_core_web_sm")

cnt = 0
processed_questions = []
counters = defaultdict(int)

"""
- not: add the word “not”
    - be not
    - can not
    - does not
    - not because
- word prefix: “un-”, “in-”, …
- antonym: “highest” → “lowest”
- prompt: add “which of the following is not true”, “choose the wrong answer”
"""

def join_spacy_tokens(tokens):
    ret = ""
    for t in tokens:
        if t[0] in string.punctuation:
            ret += t
        else:
            ret += (" " + t)
    return ret.strip()


def negate_root_verb(nlp, q_body, rank):
    doc = nlp(q_body.strip())
    tokens = [token.text for token in doc]

    root_i = None
    root_token = None
    for i, token in enumerate(doc):
        if token.dep_ == "ROOT" and token.pos_ == "VERB":
            root_i = i
            root_token = token
            break
    """
    https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
    27. VB	Verb, base form
    28.	VBD	Verb, past tense
    29.	VBG	Verb, gerund or present participle
    30.	VBN	Verb, past participle
    31.	VBP	Verb, non-3rd person singular present
    32.	VBZ	Verb, 3rd person singular present
    """
    if root_i:
        if root_token.tag_ == "VBD":
            tokens = tokens[:root_i] + (["did", "not", root_token.lemma_] if rank % 2 else ["didn't", root_token.lemma_]) + tokens[root_i + 1:]
        elif root_token.tag_ == "VBP":
            tokens = tokens[:root_i] + (["do", "not", root_token.lemma_] if rank % 2 else ["don't", root_token.lemma_]) + tokens[root_i + 1:]
        elif root_token.tag_ == "VBZ":
            tokens = tokens[:root_i] + (["does", "not", root_token.lemma_] if rank % 2 else ["doesn't", root_token.lemma_]) + tokens[root_i + 1:]
        else:
            return None
    else:
        return None
    return join_spacy_tokens(tokens)          
    

### MAIN ###
for q in tqdm(questions):
    q_text = q["question"]["stem"]
    q_text_split = re.compile("[,\.\?]").split(q_text)
    q_body_old = q_text_split[-1].strip()

    q_body = q_text_split[-1].strip()
    q_words = q_body.split()

    q_body = " " + q_body
    if not q_words: continue

    if "Not" in q_words or "not" in q_words or "n't" in q_body\
        or "nothing" in q_words or "none" in q_words or "never" in q_words or "nobody" in q_words \
        or "nowhere" in q_words or "no" in q_words:
        q["negation_rule"] = "gold/not"

    elif "unable" in q_words or "unlike" in q_words or "unlikely" in q_words \
        or "incapable" in q_words or "unusual" in q_words or "impossible" in q_words \
        or "rarely" in q_words or "rare" in q_words or "barely" in q_words or "scarcely" in q_words:
        q["negation_rule"] = "gold/un-in-"

    elif "because" in q_words:
        q["negation_rule"] = "not/because"
        q_body = q_body.replace(" because", " not because")

    elif q_words.count("is") + q_words.count("are") + q_words.count("was") + q_words.count("were") == 1:
        q["negation_rule"] = "not/be"
        rank = counters[q["negation_rule"]]
        q_body = q_body.replace(" is", (" is not" if rank % 2 else " isn't"))
        q_body = q_body.replace(" are", (" are not" if rank % 2 else " aren't"))
        q_body = q_body.replace(" was", (" was not" if rank % 2 else " wasn't"))
        q_body = q_body.replace(" were", (" were not" if rank % 2 else " weren't"))

    elif (
            q_words.count("may") + q_words.count("might") \
            + q_words.count("can") + q_words.count("could") \
            + q_words.count("will") + q_words.count("would") == 1
        ):
        q["negation_rule"] = "not/modal"
        rank = counters[q["negation_rule"]]
        q_body = q_body.replace(" may", " may not")
        q_body = q_body.replace(" might", " might not")
        q_body = q_body.replace(" can", (" can not" if rank % 2 else " can't"))
        q_body = q_body.replace(" could", (" could not" if rank % 2 else " couldn't"))
        q_body = q_body.replace(" will", (" will not" if rank % 2 else " won't"))
        q_body = q_body.replace(" would", (" would not" if rank % 2 else " wouldn't"))

    elif negate_root_verb(nlp, q_body, counters["not/do"]):
        q["negation_rule"] = "not/do"
        rank = counters[q["negation_rule"]]
        q_body = negate_root_verb(nlp, q_body, rank)

    elif q_words[0] != "Which" and q_words[0] != "which" and q_text.strip()[-1] != "?":
        # if not any(set(word) == {"_"} for word in q_text.split()):
        #     q_body += " ____"
        q["negation_rule"] = "prompt"
        rank = counters[q["negation_rule"]]

    if "negation_rule" in q:
        if q["negation_rule"] == "prompt":
            rank = counters[q["negation_rule"]]
            q["question"]["stem"] = ("Choose the incorrect answer. " if rank % 2 else "Which of the following is not true? ") + q_text
        
        else:
            q["question"]["stem"] = q_text.replace(q_body_old.strip(), q_body.strip())

        processed_questions.append(q)
        counters[q["negation_rule"]] += 1

print(len(processed_questions))
pprint(dict(counters))
pprint(processed_questions[:3])

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5957.0), HTML(value='')))


3784
{'gold/not': 14,
 'gold/un-in-': 40,
 'not/be': 1015,
 'not/because': 193,
 'not/do': 804,
 'not/modal': 1095,
 'prompt': 623}
[{'answerKey': 'D',
  'id': '7-57',
  'negation_rule': 'prompt',
  'question': {'choices': [{'label': 'A',
                            'text': 'is standard weight and size'},
                           {'label': 'B',
                            'text': 'is the opposite of variable'},
                           {'label': 'C', 'text': 'only needs a few'},
                           {'label': 'D', 'text': 'uses what it needs'}],
               'stem': 'Which of the following is not true? Gas can fill any '
                       'container it is given, and liquid'},
  'split': 'dev'},
 {'answerKey': 'A',
  'id': '7-1024',
  'negation_rule': 'not/because',
  'question': {'choices': [{'label': 'A',
                            'text': 'they are genetically called to'},
                           {'label': 'B',
                            'text': 'their children

In [3]:
import random
from copy import deepcopy

random.seed(1234)

new_questions = []

for q in processed_questions:
    answer_index = ord(q["answerKey"]) - ord("A")
    correct_answer = deepcopy(q["question"]["choices"][answer_index])
    wrong_answers = [a for a in q["question"]["choices"] if a["label"] != q["answerKey"]]
    wrong_answer = deepcopy(random.choice(wrong_answers))
    new_q = deepcopy(q)
    # negate correct answer
    if random.random() > 0.5:
        correct_answer["label"] = "A"
        wrong_answer["label"] = "B"
        new_q["question"]["choices"] = [correct_answer, wrong_answer]
        new_q["answerKey"] = ("A" if q["negation_rule"].startswith("gold/") else "B")
    else:
        correct_answer["label"] = "B"
        wrong_answer["label"] = "A"
        new_q["question"]["choices"] = [wrong_answer, correct_answer]
        new_q["answerKey"] = ("B" if q["negation_rule"].startswith("gold/") else "A")
    new_questions.append(new_q)
pprint(new_questions[:5])

with open("obqa_dev_negated.jsonl", "w") as f:
    for new_q in new_questions:
        f.write(json.dumps(new_q) + "\n")

with open("sample_un_in.jsonl", "w") as f_un_in,\
    open("sample_not_be.jsonl", "w") as f_not_be,\
    open("sample_not_because.jsonl", "w") as f_not_because,\
    open("sample_not_do.jsonl", "w") as f_not_verb,\
    open("sample_not_modal.jsonl", "w") as f_not_modal,\
    open("sample_prompt.jsonl", "w") as f_prompt:

    buffers = defaultdict(list)

    for new_q in new_questions:
        buffer = buffers[new_q["negation_rule"]]
        buffer.append(new_q)

    for negation_rule, buffer in buffers.items():
        random.shuffle(buffer)
        target_file = {
            "gold/un-in-": f_un_in,
            "not/be": f_not_be,
            "not/because": f_not_because,
            "not/do": f_not_verb,
            "not/modal": f_not_modal,
            "prompt": f_prompt,
        }.get(negation_rule, None)
        if not target_file: continue
        for new_q in buffer:
            target_file.write(json.dumps(new_q) + "\n")

[{'answerKey': 'A',
  'id': '7-57',
  'negation_rule': 'prompt',
  'question': {'choices': [{'label': 'A',
                            'text': 'is the opposite of variable'},
                           {'label': 'B', 'text': 'uses what it needs'}],
               'stem': 'Which of the following is not true? Gas can fill any '
                       'container it is given, and liquid'},
  'split': 'dev'},
 {'answerKey': 'B',
  'id': '7-1024',
  'negation_rule': 'not/because',
  'question': {'choices': [{'label': 'A',
                            'text': 'they are genetically called to'},
                           {'label': 'B',
                            'text': 'their children ask for them to'}],
               'stem': 'When birds migrate south for the winter, they do it '
                       'not because'},
  'split': 'dev'},
 {'answerKey': 'A',
  'id': '959',
  'negation_rule': 'not/be',
  'question': {'choices': [{'label': 'A', 'text': 'east'},
                           {'label