## Process query data

Process origin data.

In [11]:
import datasets
from datasets import load_dataset
import pandas as pd
import json
from tqdm import tqdm
import os
import random

In [None]:
data_path = "<The default data path at your disk>"
logic_data_path = "<The default data path at your disk>"
# aqua_rat  Calc-ape210k  gsm8k  instruction  PRM800K  svamp  TAL-SCQ5K  TheoremQA
save_path = "../data/math/origin/"
logic_save_path = "../data/logic/origin"

In [112]:
data_mapping = {
    "aqua_rat": "aqua_rat",
    "calc": "Calc-ape210k",
    "gsm8k": "gsm8k",
    "prm800k": "PRM800K",
    "svamp": "svamp",
    "tal-scq5k": "TAL-SCQ5K",
    "theoremqa": "TheoremQA",
    "math": "MATH",
    "tabmwp": "tabmwp",
    "aime": "AIME",
    "proofwriter": "proofwriter",
    "folio": "folio",
    "logicnli": "logicnli",
    "ar_lsat": "AR-LSAT",
    "mindgames": "mindgames",
    "reclor": "reclor",
    "entailment_bank": "entailment_bank",
    "strategy_qa": "StrategyQA",
    "metamathQA": "MetaMathQA",
    "numinamath": "NuminaMath-CoT"
}

In [60]:
def save_examples(data_name, data_kind, examples, save_path):
    if not os.path.exists(data_path):
        os.makedirs(data_path)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    with open(os.path.join(save_path, f"{data_name}_{data_kind}.jsonl"), "w", encoding="utf-8") as fw:
        for example in tqdm(examples):
            fw.write(json.dumps(example, ensure_ascii=False) + "\n")

In [24]:
def remove_boxed(s):
    left = "\\boxed{"
    try:
        assert s[:len(left)] == left
        assert s[-1] == "}"
        return s[len(left):-1]
    except:
        return s


def extract_boxed_answer(string):
    idx = string.rfind("\\boxed")
    if idx < 0:
        return None

    i = idx
    right_brace_idx = None
    num_left_braces_open = 0
    while i < len(string):
        if string[i] == "{":
            num_left_braces_open += 1
        if string[i] == "}":
            num_left_braces_open -= 1
            if num_left_braces_open == 0:
                right_brace_idx = i
                break
        i += 1

    if right_brace_idx == None:
        retval = None
    else:
        retval = string[idx:right_brace_idx + 1]

    return retval

def extract_answer(text):
    return remove_boxed(extract_boxed_answer(text))


In [101]:
### Calc
def load_calc(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for example in data["train"]:
        # print(example)
        train_examples.append({
            "id": example["id"],
            "question": example["question"],
            "labels": [example["result"], str(example["result_float"])],
        })
    for example in data["validation"]:
        dev_examples.append({
            "id": example["id"],
            "question": example["question"],
            "labels": [example["result"], str(example["result_float"])],
        })
    for example in data["test"]:
        test_examples.append({
            "id": example["id"],
            "question": example["question"],
            "labels": [example["result"], str(example["result_float"])],
        })
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "dev", dev_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
load_calc(data_path, data_mapping["calc"])

100%|██████████| 195179/195179 [00:01<00:00, 140372.62it/s]
100%|██████████| 1783/1783 [00:00<00:00, 184835.49it/s]
100%|██████████| 1785/1785 [00:00<00:00, 184028.53it/s]


In [131]:
### AQUA
def load_aqua(data_path, data_name):
    def obtain_correct_answer(options, label):
        label_names = ["A", "B", "C", "D", "E", "F", "G"]
        label_index = label_names.index(label)
        return options[label_index][2:]
        
        
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["train"]):
        # print(example)
        train_examples.append({
            "id": f"aqua_rat_train_{ei}",
            "question": example["question"] + "\nOptions: " + "\t".join(example["options"]),
            "labels": [example["correct"], obtain_correct_answer(example["options"], example["correct"])],
        })
    for ei, example in enumerate(data["validation"]):
        # print(example)
        dev_examples.append({
            "id": f"aqua_rat_dev_{ei}",
            "question": example["question"] + "\nOptions: " + "\t".join(example["options"]),
            "labels": [example["correct"], obtain_correct_answer(example["options"], example["correct"])],
        })
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"aqua_rat_test_{ei}",
            "question": example["question"] + "\nOptions: " + "\t".join(example["options"]),
            "labels": [example["correct"], obtain_correct_answer(example["options"], example["correct"])],
        })
    # print(train_examples[10])
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "dev", dev_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
load_aqua(data_path, data_mapping["aqua_rat"])

{'id': 'aqua_rat_train_10', 'question': 'If Tim had lunch at $50 and he gave 20% tip, how much did he spend?\nOptions: A)A)$60.00\tB)B)$35.42\tC)C)$60.60\tD)D)$21.56\tE)E)$78.45', 'labels': ['A', 'A)$60.00']}


In [64]:
### gsm8k
def load_gsm8k(data_path, data_name):

    def extract_answser(text):
        return text.split("\n")[-1].replace("#### ", "")
    data = load_dataset(os.path.join(data_path, data_name), "main")
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["train"]):
        # print(example)
        train_examples.append({
            "id": f"gsm8k_train_{ei}",
            "question": example["question"],
            "labels": [extract_answser(example["answer"])],
        })
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"gsm8k_test_{ei}",
            "question": example["question"],
            "labels": [extract_answser(example["answer"])],
        })
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
load_gsm8k(data_path, data_mapping["gsm8k"])

100%|██████████| 7473/7473 [00:00<00:00, 121351.63it/s]
100%|██████████| 1319/1319 [00:00<00:00, 181113.30it/s]


In [68]:
### svamp
def load_svamp(data_path, data_name):

    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["train"]):
        # print(example)
        train_examples.append({
            "id": f"svamp_train_{ei}",
            "question": example["question_concat"],
            "labels": [example["Answer"]],
        })
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"svamp_test_{ei}",
            "question": example["question_concat"],
            "labels": [example["Answer"]],
        })
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
load_svamp(data_path, data_mapping["svamp"])

100%|██████████| 700/700 [00:00<00:00, 206848.87it/s]
100%|██████████| 300/300 [00:00<00:00, 201681.55it/s]


In [130]:
### TAL-SCQ5K
def load_tal(data_path, data_name):

    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["train"]):
        # print(example)
        train_examples.append({
            "id": f"tal_scq5k_train_{ei}",
            "question": (example["problem"] + "\n" + "\t".join(["{}. {}".format(option[0]["aoVal"], option[0]["content"]) for option in example["answer_option_list"]])).replace("$$", ""),
            "labels": [example["answer_value"]],
        })
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"tal_scq5k_test_{ei}",
            "question": (example["problem"] + "\n" + "\t".join(["{}. {}".format(option[0]["aoVal"], option[0]["content"]) for option in example["answer_option_list"]])).replace("$$", ""),
            "labels": [example["answer_value"]],
        })
    print(train_examples[10])
    # save_examples(data_name, "train", train_examples, save_path)
    # save_examples(data_name, "test", test_examples, save_path)
load_tal(data_path, data_mapping["tal-scq5k"])

{'id': 'tal_scq5k_train_10', 'question': '对于任何自然数，定义n!=1\\times 2\\times 3\\times \\cdots \\times n．那么请问算式2022!-3!的计算结果的个位数字是． \nA. 2 \tB. 4 \tC. 6 \tD. 8 ', 'labels': ['B']}


In [90]:
### TheoremQA
def load_theoremqa(data_path, data_name):

    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"theoremqa_test_{ei}",
            "question": example["Question"],
            "labels": [example["Answer"]],
        })
    save_examples(data_name, "test", test_examples, save_path)
load_theoremqa(data_path, data_mapping["theoremqa"])

100%|██████████| 800/800 [00:00<00:00, 183027.50it/s]


In [94]:
### PRM800K
def load_prm800k(data_path, data_name):

    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    for ei, example in enumerate(data["train"]):
        # print(example)
        train_examples.append({
            "id": f"prm800k_train_{ei}",
            "question": example["problem"],
            "labels": [example["answer"]],
        })
    for ei, example in enumerate(data["test"]):
        # print(example)
        test_examples.append({
            "id": f"tal_scq5k_test_{ei}",
            "question": example["problem"],
            "labels": [example["answer"]],
        })
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
load_prm800k(data_path, data_mapping["prm800k"])

100%|██████████| 12000/12000 [00:00<00:00, 67578.49it/s]
100%|██████████| 500/500 [00:00<00:00, 187212.28it/s]


In [None]:
def load_gpqa(data_path, data_name):
    data = pd.read_csv(data_path)
    test_examples = list()
    # 遍历每一行
    for index, row in data.iterrows():
        # 在这里处理每一行的数据
        test_examples.append({
            "idx": index,
            "question": row["Question"],
            "labels": [row["Correct Answer"]],
        })
    save_examples(data_name, "test", test_examples, save_path)
load_gpqa("<your data path>/gpqa_diamond.csv", "GPQA_Diamond")


100%|██████████| 198/198 [00:00<00:00, 129417.51it/s]


In [None]:
def load_gpqa_mcqa(data_path, data_name):
    data = pd.read_csv(data_path)
    test_examples = list()
    # 遍历每一行
    for index, row in data.iterrows():
        correct_answer = row["Correct Answer"]
        incorrect_answer_1 = row["Incorrect Answer 1"]
        incorrect_answer_2 = row["Incorrect Answer 2"]
        incorrect_answer_3 = row["Incorrect Answer 3"]
        all_answers = [correct_answer, incorrect_answer_1, incorrect_answer_2, incorrect_answer_3]
        random.shuffle(all_answers)
        option_labels = ["A", "B", "C", "D"]
        option_text = ["{}. {}".format(option_label, option_name) for option_label, option_name in zip(option_labels, all_answers)]
        correct_label = option_labels[all_answers.index(correct_answer)]
        test_examples.append({
            "idx": index,
            "question": row["Question"] + "\nOptions: " + "\n".join(option_text),
            "labels": [correct_label, correct_answer],
        })
    save_examples(data_name, "test", test_examples, save_path)
load_gpqa_mcqa("<your data path>/gpqa_diamond.csv", "GPQA_Diamond_MCQA")


100%|██████████| 198/198 [00:00<00:00, 107965.70it/s]


In [None]:
def load_math(data_path, data_name):
    def load_jsonl(file):
        examples = list()
        with open(file, "r", encoding="utf-8") as fr:
            for line in tqdm(fr.readlines()):
                examples.append(json.loads(line))
        return examples
    
    train_examples, dev_examples, test_examples = list(), list(), list()
    data_file_list = os.listdir(data_path)
    index = 0
    for data_file in data_file_list:
        examples = load_jsonl(os.path.join(data_path, data_file))
        # 过滤掉level较低的
        for example in examples:
            if example["level"] in ["Level 4", "Level 5"]:
                if "train" in data_file:
                    train_examples.append({
                        "idx": index,
                        "question": example["problem"],
                        "labels": [extract_answer(example["solution"])],
                    })
                else:
                    test_examples.append({
                        "idx": index,
                        "question": example["problem"],
                        "labels": [extract_answer(example["solution"])],
                    })
                index += 1
    print("train num={}".format(len(train_examples)))
    print("test num={}".format(len(test_examples)))
    save_examples(data_name, "train", train_examples, save_path)
    save_examples(data_name, "test", test_examples, save_path)
            
load_math("<your data path>/data", "MATH_hard")

100%|██████████| 1187/1187 [00:00<00:00, 237655.20it/s]
100%|██████████| 1744/1744 [00:00<00:00, 237172.24it/s]
100%|██████████| 474/474 [00:00<00:00, 233509.53it/s]
100%|██████████| 771/771 [00:00<00:00, 218530.10it/s]
100%|██████████| 479/479 [00:00<00:00, 168249.86it/s]
100%|██████████| 870/870 [00:00<00:00, 153851.27it/s]
100%|██████████| 903/903 [00:00<00:00, 185514.13it/s]
100%|██████████| 1295/1295 [00:00<00:00, 178836.55it/s]
100%|██████████| 540/540 [00:00<00:00, 239674.51it/s]
100%|██████████| 869/869 [00:00<00:00, 239713.92it/s]
100%|██████████| 871/871 [00:00<00:00, 247475.87it/s]
100%|██████████| 1205/1205 [00:00<00:00, 228383.93it/s]
100%|██████████| 546/546 [00:00<00:00, 145782.03it/s]
100%|██████████| 746/746 [00:00<00:00, 140110.64it/s]


train num=3994
test num=2538


100%|██████████| 3994/3994 [00:00<00:00, 133198.03it/s]
100%|██████████| 2538/2538 [00:00<00:00, 141360.38it/s]


In [56]:
def load_tabmwp(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    # print(data["train"][0])
    # 遍历每一行
    for index, row in enumerate(data["train"]):
        # 在这里处理每一行的数据
        if row["table_title"] is not None:
            prompt = "Read the following table about {}, and answer the question.\n{}\nQuestion: {}".format(row["table_title"], row["table"], row["question"])
        else:
            prompt = "Read the following table, and answer the question.\n{}\nQuestion: {}".format(row["table"], row["question"])
        if row["choices"] is not None:
            prompt += "\nOptions: " + "\t".join(row["choices"])
        train_examples.append({
            "idx": index,
            "question": prompt,
            "labels": [row["answer"]],
        })
    save_examples(data_name, "train", train_examples, save_path)

load_tabmwp(data_path, "tabmwp")

100%|██████████| 1000/1000 [00:00<00:00, 164109.24it/s]


In [55]:
def load_aime(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        if row["Year"] == 2024:
            # print(row)
            continue
        train_examples.append({
            "idx": index,
            "question": row["Question"],
            "labels": [row["Answer"]],
        })
    print("train num={}".format(len(train_examples)))
    save_examples(data_name, "train", train_examples, save_path)

load_aime(data_path, "AIME")

{'ID': '1983-1', 'Year': 1983, 'Problem Number': 1, 'Question': 'Let $x$ , $y$ and $z$ all exceed $1$ and let $w$ be a positive number such that $\\log_xw=24$ , $\\log_y w = 40$ and $\\log_{xyz}w=12$ . Find $\\log_zw$ .', 'Answer': '60', 'Part': None}
train num=919


100%|██████████| 919/919 [00:00<00:00, 152035.87it/s]


In [139]:
def load_metamathQA(data, data_name):
    data = load_dataset(os.path.join(data_path, data_mapping[data_name]))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    avg_length = 0
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": row["query"],
            "labels": [row["response"].split("The answer is: ")[-1]],
            "response": row["response"],
        })
        avg_length += len(row["response"])
    avg_length /= len(train_examples)
    print("avg_length={}".format(avg_length))
    ## 根据response的长度降序排序
    sorted(train_examples, key=lambda i:len(i["response"]), reverse=True)
    new_train_examples = list()
    new_avg_length = 0
    ei = 0
    for example in tqdm(train_examples):
        if len(example["response"]) >= avg_length * 0.8:
            new_train_examples.append({
                "idx": ei,
                "question": example["question"],
                "labels": example["labels"],
            })
            ei += 1
            new_avg_length += len(example["response"])
    new_avg_length /= len(new_train_examples)
    print("new_avg_length={}".format(new_avg_length))
        
    print("train num={}".format(len(new_train_examples)))
    print(new_train_examples[139])
    random.shuffle(new_train_examples)
    save_examples(data_name, "train", new_train_examples, save_path)
load_metamathQA(data_path, "metamathQA")

{'query': "Gracie and Joe are choosing numbers on the complex plane. Joe chooses the point $1+2i$. Gracie chooses $-1+i$. How far apart are Gracie and Joe's points?", 'response': "The distance between two points $(x_1,y_1)$ and $(x_2,y_2)$ in the complex plane is given by the formula $\\sqrt{(x_2-x_1)^2+(y_2-y_1)^2}$.\nIn this case, Joe's point is $(1,2)$ and Gracie's point is $(-1,1)$.\nSo the distance between their points is $\\sqrt{((-1)-(1))^2+((1)-(2))^2}=\\sqrt{(-2)^2+(-1)^2}=\\sqrt{4+1}=\\sqrt{5}$.\nTherefore, Gracie and Joe's points are $\\boxed{\\sqrt{5}}$ units apart.\nThe answer is: \\sqrt{5}", 'type': 'MATH_AnsAug', 'original_question': "Gracie and Joe are choosing numbers on the complex plane. Joe chooses the point $1+2i$. Gracie chooses $-1+i$. How far apart are Gracie and Joe's points?"}


In [142]:
def load_numinamath(data, data_name):
    data = load_dataset(os.path.join(data_path, data_mapping[data_name]))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])

    source_set = set()
    avg_length = 0
    for index, row in enumerate(data["train"]):
        source = row["source"]
        source_set.add(source)
        answer = extract_answer(row["solution"])
        if answer is not None and answer.strip() != "":
            if source in ["amc_aime", "olympiads"]:
                train_examples.append({
                    "idx": index,
                    "question": row["problem"],
                    "labels": [answer],
                    "response": row["solution"],
                })
        avg_length += len(row["solution"])
    avg_length /= len(train_examples)
    print("avg_length={}".format(avg_length))
    # return train_examples
    ## 根据response的长度降序排序
    sorted(train_examples, key=lambda i:len(i["response"]), reverse=True)
    new_train_examples = list()
    new_avg_length = 0
    ei = 0
    for example in tqdm(train_examples):
        # if len(example["response"]) >= avg_length * 1.2:
        if len(example["response"]) >= 0:
            new_train_examples.append({
                "idx": ei,
                "question": example["question"],
                "labels": example["labels"],
            })
            ei += 1
            new_avg_length += len(example["response"])
    new_avg_length /= len(new_train_examples)
    print("source_set=", source_set)
    print("new_avg_length={}".format(new_avg_length))
        
    print("train num={}".format(len(new_train_examples)))
    print(new_train_examples[139])
    random.shuffle(new_train_examples)
    save_examples(data_name, "train", new_train_examples, save_path)
load_numinamath(data_path, "numinamath")

{'source': 'synthetic_math', 'problem': 'Consider the terms of an arithmetic sequence: $-\\frac{1}{3}, y+2, 4y, \\ldots$. Solve for $y$.', 'solution': 'For an arithmetic sequence, the difference between consecutive terms must be equal. Therefore, we can set up the following equations based on the sequence given:\n\\[ (y + 2) - \\left(-\\frac{1}{3}\\right) = 4y - (y+2) \\]\n\nSimplify and solve these equations:\n\\[ y + 2 + \\frac{1}{3} = 4y - y - 2 \\]\n\\[ y + \\frac{7}{3} = 3y - 2 \\]\n\\[ \\frac{7}{3} + 2 = 3y - y \\]\n\\[ \\frac{13}{3} = 2y \\]\n\\[ y = \\frac{13}{6} \\]\n\nThus, the value of $y$ that satisfies the given arithmetic sequence is $\\boxed{\\frac{13}{6}}$.', 'messages': [{'content': 'Consider the terms of an arithmetic sequence: $-\\frac{1}{3}, y+2, 4y, \\ldots$. Solve for $y$.', 'role': 'user'}, {'content': 'For an arithmetic sequence, the difference between consecutive terms must be equal. Therefore, we can set up the following equations based on the sequence given:\

100%|██████████| 133832/133832 [00:00<00:00, 981804.67it/s]

source_set= {'amc_aime', 'aops_forum', 'olympiads', 'synthetic_amc', 'orca_math', 'math', 'gsm8k', 'cn_k12', 'synthetic_math'}
new_avg_length=2097.5472233845417
train num=133832
{'idx': 139, 'question': 'Find the value of the expression \\(\\frac{a^{2}}{b c}+\\frac{b^{2}}{a c}+\\frac{c^{2}}{a b}\\) if \\(a + b + c = 0\\).', 'labels': ['3']}



100%|██████████| 133832/133832 [00:01<00:00, 106811.33it/s]


In [122]:
train_examples[90102]

{'idx': 93713,
 'question': 'Let  $a, b, c$  be pairwise coprime natural numbers. A positive integer  $n$  is said to be *stubborn* if it cannot be written in the form \r $n = bcx+cay+abz$ , for some  $x, y, z \\in\\mathbb{ N}.$  Determine the number of stubborn numbers.',
 'labels': ['abc - ab - bc - ca'],
 'response': 'To determine the number of stubborn numbers, we need to analyze the given problem in detail. We are given that \\(a, b, c\\) are pairwise coprime natural numbers, and a positive integer \\(n\\) is said to be stubborn if it cannot be written in the form \\(n = bcx + cay + abz\\) for some \\(x, y, z \\in \\mathbb{N}\\).\n\n1. **Understanding the Form \\(n = bcx + cay + abz\\)**:\n   - Since \\(a, b, c\\) are pairwise coprime, the form \\(n = bcx + cay + abz\\) represents a linear combination of \\(bc, ca, ab\\) with coefficients in \\(\\mathbb{N}\\).\n   - This form is similar to the Frobenius coin problem but in three dimensions.\n\n2. **Largest Non-Stubborn Number**:\n

In [68]:
def load_proofwriter(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": row["theory"] + "\n" + "Question: Please check the following fact and give your answer from 'True' or 'False' or 'Unknown': " + row["question"],
            "labels": [row["answer"]],
        })
    random.shuffle(train_examples)
    train_examples = train_examples[:2000]
    save_examples(data_name, "train", train_examples, logic_save_path)
load_proofwriter(logic_data_path, "proofwriter")

{'id': 'AttNeg-OWA-D0-4611', 'maxD': 0, 'NFact': 7, 'NRule': 8, 'theory': 'Gary is furry. Gary is nice. Gary is red. Gary is rough. Gary is not smart. Gary is white. Gary is young. If Gary is nice and Gary is not white then Gary is red. If someone is white then they are red. All young people are furry. If someone is white and not red then they are furry. Smart, red people are rough. If Gary is not red and Gary is not furry then Gary is not smart. If Gary is white then Gary is not smart. If someone is rough and not white then they are not smart.', 'question': 'Gary is white.', 'answer': 'True', 'QDep': 0, 'QLen': 1.0, 'allProofs': '@0: Gary is furry.[(triple1 OR ((triple7) -> rule3))] Gary is nice.[(triple2)] Gary is not smart.[(triple5 OR ((triple6) -> rule7))] Gary is red.[(triple3 OR ((triple6) -> rule2))] Gary is rough.[(triple4)] Gary is white.[(triple6)] Gary is young.[(triple7)]', 'config': 'depth-0'}


100%|██████████| 2000/2000 [00:00<00:00, 84184.09it/s]


In [71]:
def load_folio(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": "Please check whether the conclusion is entailment to the premise, and answer 'True' or 'False' or 'Uncertain'.\nPremise: {}\nConclusion: {}".format(row["premises"], row["conclusion"]),
            "labels": [row["label"]],
        })
    save_examples(data_name, "train", train_examples, logic_save_path)
load_folio(logic_data_path, "folio")

{'story_id': 406, 'premises': "All people who regularly drink coffee are dependent on caffeine.\nPeople regularly drink coffee, or they don't want to be addicted to caffeine, or both.\nNo one who doesn't want to be addicted to caffeine is unaware that caffeine is a drug.\nRina is either a student who is unaware that caffeine is a drug, or she is not a student and is she aware that caffeine is a drug.\nRina  is either a student who is dependent on caffeine, or she is not a student and not dependent on caffeine.", 'premises-FOL': '∀x (DrinkRegularly(x, coffee) → IsDependentOn(x, caffeine))\n∀x (DrinkRegularly(x, coffee)  ∨ (¬WantToBeAddictedTo(x, caffeine)))\n∀x (¬WantToBeAddictedTo(x, caffeine) → ¬AwareThatDrug(x, caffeine))\n¬(Student(rina) ⊕  ¬AwareThatDrug(rina, caffeine))\n¬(IsDependentOn(rina, caffeine) ⊕ Student(rina))', 'conclusion': "Rina doesn't want to be addicted to caffeine or is unaware that caffeine is a drug.", 'conclusion-FOL': '¬WantToBeAddictedTo(rina, caffeine) ∨ (¬Aw

100%|██████████| 1001/1001 [00:00<00:00, 88864.63it/s]


In [75]:
def load_logicnli(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": "Please check whether the hypothesis is entailment to the premise, and answer 'entailment' or 'contradiction' or 'neutral' or 'self_contradiction'.\nPremise: {}\nHypothesis: {}".format(row["premise"], row["hypothesis"]),
            "labels": [row["label"]],
        })
    random.shuffle(train_examples)
    train_examples = train_examples[:8000]
    save_examples(data_name, "train", train_examples, logic_save_path)
load_logicnli(logic_data_path, "logicnli")

{'premise': 'Nathalie is not blue.\nGabriel is concerned.\nNathalie is not concerned.\nBaird is concerned.\nBaird is serious.\nQuinlan is not entire.\nJohn is not fresh.\nJohn is blue.\nGabriel is serious.\nArthur is serious.\nGabriel is not entire.\nNathalie is not accurate.If there is someone who is either not concerned or fresh, then John is entire.\nSomeone who is eithor not fresh or entire is always not serious.\nIf John is not serious and Quinlan is fresh, then Baird is not entire.\nIf Gabriel is not serious, then Quinlan is not blue and Gabriel is accurate.\nIf Nathalie is not blue, then Collier is entire.\nIf someone is fresh or he is not concerned, then he is not blue.\nIf there is someone who is both serious and fresh, then Baird is not blue.\nIf someone is concerned and serious, then he is entire, and vice versa.\nNathalie being not fresh and Baird being serious imply that Nathalie is blue.\nIf there is at least one people who is both not accurate and entire, then Collier is

100%|██████████| 8000/8000 [00:00<00:00, 44671.16it/s]


In [81]:
def load_arlsat(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_mapping[data_name]))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        options = row["answers"]
        label_names = ["A", "B", "C", "D", "E", "F", "G"]
        train_examples.append({
            "idx": index,
            "question": row["context"] + "\nQuestion: " + row["question"] + "\nOptions: " + "\t".join(["({}). {}".format(label_names[i], options[i]) for i in range(len(options))]),
            "labels": [label_names[row["label"]], options[row["label"]]],
        })
    save_examples(data_name, "train", train_examples, logic_save_path)
load_arlsat(logic_data_path, "ar_lsat")

{'context': 'Exactly six trade representatives negotiate a treaty: Klosnik, Londi, Manley, Neri, Osata, Poirier. There are exactly six chairs evenly spaced around a circular table. The chairs are numbered 1 through 6, with successively numbered chairs next to each other and chair number 1 next to chair number 6. Each chair is occupied by exactly one of the representatives. The following conditions apply: Poirier sits immediately next to Neri. Londi sits immediately next to Manley, Neri, or both. Klosnik does not sit immediately next to Manley. If Osata sits immediately next to Poirier, Osata does not sit immediately next to Manley.', 'question': 'Which one of the following seating arrangements of the six representatives in chairs 1 through 6 would NOT violate the stated conditions?', 'answers': ['Klosnik, Poirier, Neri, Manley, Osata, Londi', 'Klosnik, Londi, Manley, Poirier, Neri, Osata', 'Klosnik, Londi, Manley, Osata, Poirier, Neri', 'Klosnik, Osata, Poirier, Neri, Londi, Manley', '

100%|██████████| 1585/1585 [00:00<00:00, 42750.31it/s]


In [86]:
def load_reclor(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_mapping[data_name]))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][1000])
    for index, row in enumerate(data["train"]):
        options = row["answers"]
        label_names = ["A", "B", "C", "D", "E", "F", "G"]
        train_examples.append({
            "idx": index,
            "question": row["context"] + "\nQuestion: " + row["question"] + "\nOptions: " + "\t".join(["({}). {}".format(label_names[i], options[i]) for i in range(len(options))]),
            "labels": [label_names[row["label"]], options[row["label"]]],
        })
    save_examples(data_name, "train", train_examples, logic_save_path)
load_reclor(logic_data_path, "reclor")

{'context': 'A graduate degree in policymaking is necessary to serve in the presidential cabinet. In addition, every member of the cabinet must pass a security clearance. No person with a felony can pass a security clearance. Rick holds a graduate degree in policymaking, but he has a conviction for driving under the influence. Ttherefore, Rick cannot serve in the cabinet.', 'question': "The argument's conclusion follows logically if which one of the following is assumed?", 'answers': ['Holding a graduate degree is less important than having a felony conviction.', 'Driving under the influence is a felony.', 'Anyone without a felony conviction can pass a security clearance.', "Rick's conviction for drunk driving calls his character in question."], 'label': 1, 'id_string': 'train_1000'}


100%|██████████| 4638/4638 [00:00<00:00, 25915.88it/s]


In [88]:
def load_entailment_bank(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_name))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": "Question: {}".format(row["question"]),
            "labels": [row["answer"][0], row["cot"][-1]],
        })
    save_examples(data_name, "train", train_examples, logic_save_path)
load_entailment_bank(logic_data_path, "entailment_bank")

{'id': 'entailment_bank_train_0', 'ref_id': 'MDSA_2009_4_30', 'question': 'Stars are organized into patterns called constellations. One constellation is named Leo. Which statement best explains why Leo appears in different areas of the sky throughout the year?', 'type': 'text', 'choices': [], 'context': '', 'cot': ['Leo is a kind of constellation.', 'A constellation contains stars.', 'Therefore, leo is a constellation containing stars.', 'Leo is a constellation containing stars.', 'The earth revolving around the sun causes stars to appear in different areas in the sky at different times of year.', 'Therefore, the earth revolving around the sun causes leo to appear in different areas in the sky at different times of year.'], 'answer': ['Earth revolves around the sun.'], 'generated_cot': [], 'feedback': []}


100%|██████████| 1840/1840 [00:00<00:00, 176739.78it/s]


In [93]:
def load_strategyqa(data_path, data_name):
    data = load_dataset(os.path.join(data_path, data_mapping[data_name]))
    train_examples, dev_examples, test_examples = list(), list(), list()
    print(data["train"][0])
    for index, row in enumerate(data["train"]):
        train_examples.append({
            "idx": index,
            "question": "Give some facts, please answer the question by 'True' or 'False'.\nFacts: {}\nQuestion: {}".format(row["facts"], row["question"]),
            "labels": [str(row["answer"])],
        })
    save_examples(data_name, "train", train_examples, logic_save_path)
load_strategyqa(logic_data_path, "strategy_qa")

{'qid': '4fd64bb6ce5b78ab20b6', 'term': 'Mixed martial arts', 'description': 'full contact combat sport', 'question': 'Is Mixed martial arts totally original from Roman Colosseum games?', 'answer': False, 'facts': 'Mixed Martial arts in the UFC takes place in an enclosed structure called The Octagon. The Roman Colosseum games were fought in enclosed arenas where combatants would fight until the last man was standing. Mixed martial arts contests are stopped when one of the combatants is incapacitated. The Roman Colosseum was performed in front of crowds that numbered in the tens of thousands. Over 56,000 people attended UFC 193.'}


100%|██████████| 1603/1603 [00:00<00:00, 98217.36it/s]
