In [83]:
import json

def read_file(file):
	with open(file, "r") as fp:
	    data = json.load(fp)
	return data
biomarkers = read_file("data/copy-biomarkers.json")
indications = read_file("data/indications.json")
therapies = read_file("data/copy-therapies.json")
diseases = read_file("data/copy-diseases.json")
propositions = read_file("data/propositions.hc.json")
contributions = read_file("data/copy-contributions.json")
strength = read_file("data/copy-strengths.json")
documents = read_file("data/documents.json")
statements = read_file("data/statements.hc.json")

In [85]:
therapy_lookup = {item["id"]: item for item in therapies}
disease_lookup = {item["id"]: item for item in diseases}
biomarker_lookup = {item["id"]: item for item in biomarkers}
strength_lookup = {item["id"]: item for item in strength}
contribution_lookup = {item["id"]: item for item in contributions}
document_lookup = {doc["id"]: doc for doc in documents}
indication_lookup = {ind["id"]: ind for ind in indications}

In [86]:
def dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup):
    dereferenced_props = []
    for prop in propositions:
        prop = prop.copy()
        
        therapy_id = prop.pop("therapy_id")
        prop["therapy"] = therapy_lookup[therapy_id]

        condition_id = prop.pop("conditionQualifier_id")
        prop["conditionQualifier"] = disease_lookup[condition_id]

        resolved_biomarkers = []
        for bio in prop["biomarkers"]:
            resolved_biomarkers.append(biomarker_lookup[bio])
        prop["biomarkers"] = resolved_biomarkers

        dereferenced_props.append(prop)
    return dereferenced_props

In [87]:
#print(dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup))


In [88]:
dereferenced_props = dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup)
dereferenced_prop_lookup = {prop["id"]: prop for prop in dereferenced_props}


In [91]:
   def dereference_statements(statements, proposition_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup):
    dereferenced_statements = []
    for state in statements:
        state = state.copy()

        proposition_id = state.pop("proposition_id")
        state["proposition"] = proposition_lookup[proposition_id]

        strength_id = state.pop("strength_id")
        state["strength"] = strength_lookup[strength_id]

        indication_id = state.pop("indication_id")
        state["indication"] = indication_lookup[indication_id]

        contributions_resolved = []
        for contrib_id in state["contributions"]:
            contributions_resolved.append(contribution_lookup[contrib_id])
        state["contributions"] = contributions_resolved

        documents_resolved = []
        for doc_id in state["reportedIn"]:
            documents_resolved.append(document_lookup[doc_id])
        state["reportedIn"] = documents_resolved

        dereferenced_statements.append(state)
    return dereferenced_statements   

In [92]:
print(dereference_statements(statements, proposition_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup))

[{'id': 0, 'type': 'Statement', 'description': '', 'contributions': [{'id': 0, 'type': 'Contribution', 'agent_id': 0, 'description': 'Initial access of FDA approvals', 'date': '2024-10-30'}], 'reportedIn': [{'id': 'doc:hc.alecensaro', 'type': 'Document', 'subtype': 'Regulatory approval', 'name': 'Alecensaro (alectinib) [product monograph]. HC.', 'aliases': [], 'citation': 'Hoffmann-La Roche Limited. Alecensaro (alectinib) [product monograph]. Health Canada website. https://pdf.hres.ca/dpd_pm/00078825.PDF. Revised March 2025. Accessed June 2025.', 'company': 'Hoffmann-La Roche Limited', 'drug_name_brand': 'Alecensaro', 'drug_name_generic': 'alectinib', 'first_published': '2016-09-29', 'access_date': '2025-06-13', 'organization_id': 'hc', 'publication_date': '2025-03-11', 'url': 'https://pdf.hres.ca/dpd_pm/00078825.PDF', 'url_drug': None, 'application_number': None}], 'direction': 'supports', 'proposition': {'id': 0, 'type': 'VariantTherapeuticResponseProposition', 'predicate': 'predictS