In [324]:
import json

def read_file(file):
	with open(file, "r") as fp:
	    data = json.load(fp)
	return data
    
biomarkers = read_file("referenced/biomarkers.json")
indications = read_file("data/indications.json")
therapies = read_file("referenced/therapies.json")
diseases = read_file("referenced/diseases.json")
propositions = read_file("referenced/propositions.json")
contributions = read_file("referenced/contributions.json")
strength = read_file("referenced/strengths.json")
documents = read_file("data/documents.json")
statements = read_file("data/statements.hc.json")
therapy_groups = read_file("referenced/therapy_groups.json")

In [325]:
therapy_lookup = {item["id"]: item for item in therapies}
disease_lookup = {item["id"]: item for item in diseases}
biomarker_lookup = {item["id"]: item for item in biomarkers}
strength_lookup = {item["id"]: item for item in strength}
contribution_lookup = {item["id"]: item for item in contributions}
document_lookup = {doc["id"]: doc for doc in documents}
indication_lookup = {ind["id"]: ind for ind in indications}
therapy_groups_lookup = {item["id"]: item for item in therapy_groups}
proposition_lookup = {item['id']: item for item in propositions}

In [326]:
def dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup):
    dereferenced_props = []
    for prop in propositions:
        prop = prop.copy()

        if prop["therapy_id"] != None:
            prop["therapy"] = therapy_lookup[prop["therapy_id"]]
            prop.pop("therapy_id")
        elif prop["therapy_group_id"] != None:
            therapy_group = therapy_groups_lookup[prop["therapy_group_id"]]
            prop["therapy_group"] = therapy_group
            dereferenced_therapiesG = []
            for TGid in therapy_group["therapies"]:
                dereferenced_therapiesG.append(therapy_lookup[TGid])
            prop["therapies"] = dereferenced_therapiesG
            prop.pop("therapy_group_id")
        else: 
            print("Unexpected input in proposition")

        condition_id = prop.pop("conditionQualifier_id")
        prop["conditionQualifier"] = disease_lookup[condition_id]

        resolved_biomarkers = []
        for bio in prop["biomarkers"]:
            resolved_biomarkers.append(biomarker_lookup[bio])
        prop["biomarkers"] = resolved_biomarkers

        dereferenced_props.append(prop)
    return dereferenced_props


In [327]:
#print(dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup))


In [328]:
dereferenced_props = dereference_propositions(propositions, therapy_lookup, disease_lookup, biomarker_lookup, therapy_groups_lookup)
dereferenced_prop_lookup = {prop["id"]: prop for prop in dereferenced_props}



In [331]:
def dereference_statements(statements, dereferenced_prop_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup):
    dereferenced_statements = []
    for state in statements:
        state = state.copy()
        proposition_id = state["proposition_id"]

        if proposition_id == "":
            print("Skipping statement with empty proposition_id, statement_id:" + str(state["id"]))
        elif proposition_id == None:
            print("Null value in prop idea for statement_id:" + str(state["id"]))
            strength_id = state.pop("strength_id")
            state["strength"] = strength_lookup[strength_id]

            indication_id = state.pop("indication_id")
            state["indication"] = indication_lookup[indication_id]

            contributions_resolved = []
            for contrib_id in state["contributions"]:
                contributions_resolved.append(contribution_lookup[contrib_id])
            state["contributions"] = contributions_resolved

            documents_resolved = []
            for doc_id in state["reportedIn"]:
                documents_resolved.append(document_lookup[doc_id])
            state["reportedIn"] = documents_resolved

            state.pop("proposition_id")
            state["proposition"] = None

            dereferenced_statements.append(state)

        else:
            state.pop("proposition_id")
            state["proposition"] = dereferenced_prop_lookup[proposition_id]

            strength_id = state.pop("strength_id")
            state["strength"] = strength_lookup[strength_id]

            indication_id = state.pop("indication_id")
            state["indication"] = indication_lookup[indication_id]

            contributions_resolved = []
            for contrib_id in state["contributions"]:
                contributions_resolved.append(contribution_lookup[contrib_id])
            state["contributions"] = contributions_resolved 

            documents_resolved = []
            for doc_id in state["reportedIn"]:
                documents_resolved.append(document_lookup[doc_id])
            state["reportedIn"] = documents_resolved

            dereferenced_statements.append(state)
            
    return dereferenced_statements


In [332]:
dereference_statements1 = dereference_statements(statements, dereferenced_prop_lookup, strength_lookup, indication_lookup, contribution_lookup, document_lookup)
print(dereference_statements1)

Null value in prop idea for statement_id:hc:16
Null value in prop idea for statement_id:hc:44
Null value in prop idea for statement_id:hc:48
Null value in prop idea for statement_id:hc:68
Null value in prop idea for statement_id:hc:77
Null value in prop idea for statement_id:hc:77
Null value in prop idea for statement_id:hc:150
Null value in prop idea for statement_id:hc:151
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empty proposition_id, statement_id:hc:115
Skipping statement with empt