In [109]:
from pyrca.analyzers.bayesian import BayesianNetwork
import pandas as pd
import json

In [110]:
with open('data/data.json', encoding="utf-8") as f:
    data = json.load(f)
    
data = {k:v for k,v in list(data.items())[:20]}

In [111]:
symptoms = set()
for disease in data.values():
    symptoms.update(disease['keys']['common_keys'])

In [112]:
nodes = list(symptoms | data.keys())
df = pd.DataFrame(0, index=list(data.keys()), columns=list(symptoms))
graph_df = pd.DataFrame(0, index=nodes, columns=nodes)
for disease, info in data.items():
    for symptom in info['keys']['common_keys']:
        df.at[disease, symptom] = 1
for disease, info in data.items():
    for symptom in info['keys']['common_keys']:
        graph_df.at[disease, symptom] = 1

In [113]:
graph_df

Unnamed: 0,阵挛,多动,兴趣缺乏,性高潮明显延迟（FOD）,停止或减少酒精摄入,常常说太多的话,阿片类药物过量,组织计划困难,通常难以按顺序等候（例如，排队时）,焦虑不是有因使用药物或物质滥用所致,...,经常忙个不停，行为像是“被马达所驱动”着,处理信息的速度减慢,寻求刺激的行为,回避、厌恶或不愿参加需要持续脑力劳动的任务,不合理的自责或过度且不恰当的内疚,产后抑郁,难以将注意力集中于任务,经常在需要保持坐立的情境中离开座位,反射亢进和阵挛,自杀观念
阵挛,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
多动,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
兴趣缺乏,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
性高潮明显延迟（FOD）,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
停止或减少酒精摄入,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
产后抑郁,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
难以将注意力集中于任务,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
经常在需要保持坐立的情境中离开座位,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
反射亢进和阵挛,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [114]:
model = BayesianNetwork(config=BayesianNetwork.config_class(graph=graph_df))

In [115]:
test_data = [[0 for i in range(128)] for j in range(128)]
data_df = pd.DataFrame(test_data, columns=nodes)
for i, (disease, info) in enumerate(data.items()):
    for symptom in info['keys']['common_keys']:
        data_df.at[i, symptom] = 1
        data_df.at[i, disease] = 1

In [116]:
# from pyrca.applications.example.rca import RCAEngine
# engine = RCAEngine()
# engine.build_causal_graph(
#     df=graph_df,
#     run_pdag2dag=True,
#     max_num_points=5000000,
#     verbose=True
# )
# bn = engine.train_bayesian_network(dfs=[data_df])
# bn.print_probabilities()

In [117]:
model.train([data_df])



In [118]:
from difflib import SequenceMatcher

In [119]:
def sim_ratio(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [120]:
predict_disease = []

In [121]:
def format_root_causes(results):
    output = ["可能疾病："]
    for node, prob in results['root_cause_nodes']:
        disease = node.split('_')[1]
        predict_disease.append(disease)
        line = f"{disease}: {prob:.4f}"
        output.append(line)
    return "\n".join(output)

In [122]:
def find_matched_nodes(symp_list):
    matched_nodes = []
    nodes = list(symptoms)
    for symp in symp_list:
        max_ratio = 0
        matched_node = None
        for node in nodes:
            ratio = sim_ratio(symp, node)
            if ratio > max_ratio:
                max_ratio = ratio
                matched_node = node
        matched_nodes.append(matched_node)
    return matched_nodes

In [123]:
def print_invest1(disease_names):
    print("推荐进行检查：")
    for disease_name in disease_names:
        matched_disease = disease_name
        if matched_disease:
            print(f"{matched_disease}:")
            invest1 = data[matched_disease]['invest1']
            keys = []
            for key in invest1:
                keys.append(key)
            print(",".join(keys))
        else:
            print(f"No match found for {disease_name}")

In [124]:
def diagnose(symp_list, model):
    matched_nodes = find_matched_nodes(symp_list)
    print(format_root_causes(model.find_root_causes(matched_nodes).to_dict()))
    print("------------------------------------------")
    print_invest1(predict_disease)
    

In [125]:
symp_list = ['瞳孔小', '思维奇怪', '容易愤怒']

In [126]:
diagnose(symp_list, model)





Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 1003.66it/s]
Eliminating: 阿片类药物过量: 100%|██████████| 1/1 [00:00<?, ?it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Eliminating: 广泛性焦虑障碍: 100%|██████████| 1/1 [00:00<00:00, 1000.31it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<?, ?it/s]
Eliminating: 人格障碍: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<?, ?it/s]
Eliminating: 阿片类药物过量: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<?, ?it/s]
Eliminating: ROOT_阿片类药物过量: 100%|██████████| 1/1 [00:00<00:00, 1003.90it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 1003.66it/s]
Eliminating: 广泛性焦虑障碍: 100%|██████████| 1/1 [00:00<00:00, 1002.70it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 999.60it/s]
Eliminating: ROOT_广泛性焦虑障碍: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Finding E

可能疾病：
阿片类药物过量: 0.9802
广泛性焦虑障碍: 0.9802
人格障碍: 0.9802
------------------------------------------
推荐进行检查：
阿片类药物过量:
纳洛酮治疗试验,心电图 (ECG)
广泛性焦虑障碍:
临床诊断
人格障碍:
临床访谈



