In [471]:
import json

def read_file(file):
	with open(file, "r") as fp:
	    data = json.load(fp)
	return data
    
biomarkers = read_file("referenced/biomarkers.json")
indications = read_file("data/indications.json")
therapies = read_file("referenced/therapies.json")
diseases = read_file("referenced/diseases.json")
propositions = read_file("referenced/propositions.json")
contributions = read_file("referenced/contributions.json")
strength = read_file("referenced/strengths.json")
documents = read_file("data/documents.json")
statements = read_file("data/statements.hc.json")
therapy_groups = read_file("referenced/therapy_groups.json")

In [472]:
therapy_lookup = {item["id"]: item for item in therapies}
disease_lookup = {item["id"]: item for item in diseases}
biomarker_lookup = {item["id"]: item for item in biomarkers}
strength_lookup = {item["id"]: item for item in strength}
contribution_lookup = {item["id"]: item for item in contributions}
document_lookup = {doc["id"]: doc for doc in documents}
indication_lookup = {ind["id"]: ind for ind in indications}
therapy_groups_lookup = {item["id"]: item for item in therapy_groups}
proposition_lookup = {item['id']: item for item in propositions}

In [473]:
def dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup):
    dereferenced_props = []
    for prop in propositions:
        prop = prop.copy()

        if prop["therapy_id"] != None:
            prop["therapy"] = therapy_lookup[prop["therapy_id"]]
            prop.pop("therapy_id")
        elif prop["therapy_group_id"] != None:
            therapy_group = therapy_groups_lookup[prop["therapy_group_id"]]
            prop["therapy_group"] = therapy_group
            dereferenced_therapiesG = []
            for TGid in therapy_group["therapies"]:
                dereferenced_therapiesG.append(therapy_lookup[TGid])
            prop["therapies"] = dereferenced_therapiesG
            prop.pop("therapy_group_id")
        else: 
            print("Unexpected input in proposition")

        condition_id = prop.pop("conditionQualifier_id")
        prop["conditionQualifier"] = disease_lookup[condition_id]

        resolved_biomarkers = []
        for bio in prop["biomarkers"]:
            resolved_biomarkers.append(biomarker_lookup[bio])
        prop["biomarkers"] = resolved_biomarkers

        dereferenced_props.append(prop)
    return dereferenced_props


In [474]:
#print(dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup))


In [475]:
dereferenced_props = dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup)
dereferenced_prop_lookup = {prop["id"]: prop for prop in dereferenced_props} 

In [482]:
def dereference_statements(statements, dereferenced_prop_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup):
    dereferenced_statements = []
    null_count = 0 
    statement_count = 0
    for state in statements:
        state = state.copy()
        proposition_id = state["proposition_id"]

        if proposition_id == "":
            print("Skipping statement with empty proposition_id, statement_id:" + str(state["id"]))
        elif proposition_id == None:
            print("Null value in prop idea for statement_id:" + str(state["id"]))
            strength_id = state.pop("strength_id")
            state["strength"] = strength_lookup[strength_id]

            indication_id = state.pop("indication_id")
            state["indication"] = indication_lookup[indication_id]

            contributions_resolved = []
            for contrib_id in state["contributions"]:
                contributions_resolved.append(contribution_lookup[contrib_id])
            state["contributions"] = contributions_resolved

            documents_resolved = []
            for doc_id in state["reportedIn"]:
                documents_resolved.append(document_lookup[doc_id])
            state["reportedIn"] = documents_resolved

            state.pop("proposition_id")
            state["proposition"] = None

            dereferenced_statements.append(state)
            null_count += 1

        else:
            state.pop("proposition_id")
            state["proposition"] = dereferenced_prop_lookup[proposition_id]

            strength_id = state.pop("strength_id")
            state["strength"] = strength_lookup[strength_id]

            indication_id = state.pop("indication_id")
            state["indication"] = indication_lookup[indication_id]

            contributions_resolved = []
            for contrib_id in state["contributions"]:
                contributions_resolved.append(contribution_lookup[contrib_id])
            state["contributions"] = contributions_resolved 

            documents_resolved = []
            for doc_id in state["reportedIn"]:
                documents_resolved.append(document_lookup[doc_id])
            state["reportedIn"] = documents_resolved

            dereferenced_statements.append(state)
            statement_count += 1
            
    return dereferenced_statements, null_count, statement_count
    


In [483]:
print("the amount of null statements is: " + str(null_count))
print("the amount of 'correct' statements is: " + str(statement_count))
dereference_statements1 = dereference_statements(statements, dereferenced_prop_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup)
dereference_statements1

the amount of null statements is: 64
the amount of 'correct' statements is: 267
Null value in prop idea for statement_id:hc:2
Null value in prop idea for statement_id:hc:3
Null value in prop idea for statement_id:hc:28
Null value in prop idea for statement_id:hc:16
Null value in prop idea for statement_id:hc:44
Null value in prop idea for statement_id:hc:48
Null value in prop idea for statement_id:hc:68
Null value in prop idea for statement_id:hc:70
Null value in prop idea for statement_id:hc:71
Null value in prop idea for statement_id:hc:77
Null value in prop idea for statement_id:hc:77
Null value in prop idea for statement_id:hc:81
Null value in prop idea for statement_id:hc:82
Null value in prop idea for statement_id:hc:83
Null value in prop idea for statement_id:hc:84
Null value in prop idea for statement_id:hc:85
Null value in prop idea for statement_id:hc:137
Null value in prop idea for statement_id:hc:138
Null value in prop idea for statement_id:hc:139
Null value in prop idea fo

([{'id': 'hc:0',
   'type': 'Statement',
   'description': '',
   'contributions': [{'id': 0,
     'type': 'Contribution',
     'agent_id': 0,
     'description': 'Initial access of FDA approvals',
     'date': '2024-10-30'}],
   'reportedIn': [{'id': 'doc:hc.adcetris',
     'type': 'Document',
     'subtype': 'Regulatory approval',
     'name': 'Adcetris (brentuximab vedotin) [product monograph]. HC.',
     'aliases': [],
     'citation': 'Seagen Inc. Adcetris (brentuximab vedotin) [product monograph]. Health Canada website. https://pdf.hres.ca/dpd_pm/00080158.PDF. Published April 2025. Accessed June 2025.',
     'company': 'Seagen Inc.',
     'drug_name_brand': 'Adcetris',
     'drug_name_generic': 'brentuximab vedotin',
     'first_published': None,
     'access_date': '2025-06-11',
     'organization_id': 'hc',
     'publication_date': '2025-04-08',
     'url': 'https://pdf.hres.ca/dpd_pm/00080158.PDF',
     'url_drug': 'https://healetslth-products.canada.ca/dpd-bdpp/info?lang=eng&