In [1]:
import json

In [2]:
def create_ground_truth(input_filepath, output_filepath):
    with open(input_filepath) as f:
        data = json.load(f)

    print(f'create_ground_truth: filepath: {input_filepath}')
    print(f'create_ground_truth: raw count: {len(data)}')

    gt_data = []
    unsupported_facts = 0
    selected_facts = 0
    for spouse_fact_data in data:
        pair = tuple(spouse_fact_data['pair'])

        if not spouse_fact_data['supports']:
            unsupported_facts += 1
            continue

        fact_used = False
        for support in spouse_fact_data['supports']:
            if 'selected' in support and support['selected']:
                gt_data.append((pair, support['doc_id'], support['fact_verification_result'], support['content']))
                fact_used += True

        selected_facts += 1 if fact_used else 0

    pos_count = len([d for d in gt_data if d[2]])
    neg_count = len([d for d in gt_data if not d[2]])

    print(f'create_ground_truth: unsupported facts: {unsupported_facts}')
    print(f'create_ground_truth: selected facts: {selected_facts}')
    print(f'create_ground_truth: unsupported docs: {unsupported_facts}')
    print(f'create_ground_truth: labelled count: {len(gt_data)} (pos: {pos_count}, neg: {neg_count})')

    with open(output_filepath, 'w') as f:
        json.dump(gt_data, f)

    return gt_data

In [6]:
wiki_dbp_filepath = '/Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/wiki_dbp_pred.json'
wiki_dbp_gt_file_path = '/Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/wiki_dbp_gt.json'
wiki_dbp_gt = create_ground_truth(wiki_dbp_filepath, wiki_dbp_gt_file_path)

create_ground_truth: filepath: /Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/wiki_dbp_pred.json
create_ground_truth: raw count: 100
create_ground_truth: unsupported facts: 26
create_ground_truth: selected facts: 28
create_ground_truth: unsupported docs: 26
create_ground_truth: labelled count: 50 (pos: 27, neg: 23)


In [4]:
only_dbp_filepath = '/Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/only_dbp_pred.json'
only_dbp_gt_filepath = '/Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/only_dbp_gt.json'
only_dbp_gt = create_ground_truth(only_dbp_filepath, only_dbp_gt_filepath)

create_ground_truth: filepath: /Users/vision/projects/abhinav-kumar-thakur/wikidata-fact-verification/data/only_dbp_pred.json
create_ground_truth: raw count: 100
create_ground_truth: unsupported facts: 41
create_ground_truth: selected facts: 0
create_ground_truth: unsupported docs: 41
create_ground_truth: labelled count: 0 (pos: 0, neg: 0)
