# Experiment Results Extractor

Use this notebook to extract results from `.pkl` to `.json`.

JSON files will be saved in `./_json_results/`.

In [1]:
import json
import os
import pickle
import random

import pandas as pd

## MODIFY THE BLOCK BELOW

Below is an example of how you should provide the *input folder name* in `./results/` folder, and the *output dict key* in the merged JSON.

In [2]:
RECORD_FOLDERS = [
    # ["RESULT_FOLDER_NAME", "DICT_KEY"]
    ["example_addcoarse_exp", "addcoarse"],
    ["example_addstrict_exp", "addstrict"],
]

## MODIFY THE BLOCK BELOW

This will sort results in the exact token order provided in `./data/split.json`. To re-order the results tokens to the *distribution shift* experiment settings, comment out *NORMAL* section and un-comment *DISTRIBUTION SHIFT* section.

In [3]:
random.seed(0)

with open("./data/split.json", "r") as f:
    split_data = json.load(f)


# Uncomment the corresponding section to obtain ordered results.

#### NORMAL
val_tokens = split_data["val"]
token_list = random.sample(val_tokens, 2000)
assert token_list[0] == "6a1f2ebe1ef8437c87a5a742362b09b4"

#### DISTRIBUTION SHIFT
# val_tokens = split_data["distribshift"]
# token_list = val_tokens
# assert token_list[0] == "64d019bf33ba4dcb9eca5e5ab2ef967e"

In [None]:
for (FOLDER, DICT_TITLE) in RECORD_FOLDERS:
    with open(f"./results/{FOLDER}/records.pkl", "rb") as f:
        records = pickle.load(f)
    df_records = pd.DataFrame(records)

    skip_list = []
    pointer = 0
    accumulated_succ = 0
    result = {
        f"{DICT_TITLE}": {
            "input_similarity_top1_demo": [],
            "output_similarity_cos": [],
            "output_similarity_exp": [],
            "acc_all_steps": [],
        }
    }

    for i, token in enumerate(token_list):
        # locate record["token"]
        record = df_records[df_records["token"] == token]
        # if not found, input sim = None, output sim = None, acc = -1
        if len(record) == 0:
            skip_list.append(i)
            result[DICT_TITLE]["input_similarity_top1_demo"].append(None)
            result[DICT_TITLE]["output_similarity_cos"].append(None)
            result[DICT_TITLE]["output_similarity_exp"].append(None)
            result[DICT_TITLE]["acc_all_steps"].append(-1)
            continue
        record = record.iloc[0]
        input_sim = record["input_similarity"]
        output_cos_sim = record["output_similarity_cos"]
        output_exp_sim = record["output_similarity_exp"]
        is_success = record["l2_error"] < 2.5

        result[DICT_TITLE]["input_similarity_top1_demo"].append(input_sim)
        result[DICT_TITLE]["output_similarity_cos"].append(output_cos_sim)
        result[DICT_TITLE]["output_similarity_exp"].append(output_exp_sim)
        result[DICT_TITLE]["acc_all_steps"].append(1 if is_success else 0)

    assert len(skip_list) < 200 # sanity check. Should skip less than ~200 results. Too large means code wrong
    print(f"{DICT_TITLE} skips: {len(skip_list)}")
    assert len(result[DICT_TITLE]["acc_all_steps"]) == 2000, f"{DICT_TITLE} has {len(result[DICT_TITLE]['acc_all_steps'])} records"
    # save dict
    os.makedirs("./_json_results", exist_ok=True)
    save_path = f"./_json_results/{DICT_TITLE}.json"
    with open(save_path, "w") as f:
        json.dump(result, f, indent=4)

In [None]:
# for every json file in "./_json_results", load it and merge it
merged_json = {}

JSON_RESULTS_PATH = "./_json_results"
for file in os.listdir(JSON_RESULTS_PATH):
    if file.endswith(".json"):
        with open(os.path.join(JSON_RESULTS_PATH, file), "r") as f:
            data = json.load(f)
            merged_json.update(data)

with open("./_json_results/_merged_json.json", "w") as f:
    json.dump(merged_json, f, indent=4)

In [None]:
for json_file in merged_json:
    print(f"{json_file}: {len(merged_json[json_file]['acc_all_steps'])}")