In [7]:
from GPT import GPT
import json
import re

def query_gpt4(d, start, end):
    field_texts = json.load(open(f"prompt/domain.json"))[d]
    variable_texts = json.load(open(f"prompt/description/{d}.json"))
    input = f'''You are an expert on the {field_texts}
    There are two factors:
    {start}: {variable_texts[start]}
    {end}: {variable_texts[end]}
    Which cause-and-effect relationship is more likely for following causal statements V1 V2?
    A. changing V1 causes a change in V2.
    B. changing V2 causes a change in V1.
    C. changes in V1 and in V2 are not correlated.
    D. uncertain.
    Provide your final answer within the tags <Answer>A/B/C/D</Answer>.
    Analyze the statement: {start} {end}'''
    prompts = [input]
    chatgpt = GPT("gpt-4", 0.7)
    outputs = chatgpt.chatgpt_QA_multi(prompts)
    output = outputs[0]["output"]
    answer = re.findall(r"<Answer>(.*?)</Answer>",output)[0]
    return input, output, answer

In [92]:
import json
import re

sample_dir = "pairwise_samples/"
gpt_dir = "out/gpt_history/for_estimation/"

def pairwise(fp):
    with open(fp, "r") as f:
        lines = f.readlines()
        for line in lines:
            # parse v1,v2 as (v1,v2), v1 and v2 are strings
            match = re.match(r"(.*),(.*)", line)
            if match:
                v1, v2 = match.group(1), match.group(2)
                yield (v1, v2)

## estimate 
datasets = ["alarm", "asia", "insurance", "mildew", "child", "cancer", "water", "barley"]
pairwise_samples = {d:{"edge":[],"indirect_path":[],"no_path_bi":[]} for d in datasets}
for d in datasets:
    for v1, v2 in pairwise(f"{sample_dir}/{d}/direct_connections_str.txt"):
        pairwise_samples[d]["edge"].append((v1, v2))
    for v1, v2 in pairwise(f"{sample_dir}/{d}/indirect_connections_str.txt"):
        pairwise_samples[d]["indirect_path"].append((v1, v2))
    for v1, v2 in pairwise(f"{sample_dir}/{d}/no_connection_str.txt"):
        pairwise_samples[d]["no_path_bi"].append((v1, v2))

correct_num = {d:{"edge":0,"indirect_path":0,"no_path_bi":0} for d in datasets}
reversed_num = {d:{"edge":0,"indirect_path":0,"no_path_bi":0} for d in datasets}

for d in pairwise_samples:
    gpt_file = f"{gpt_dir}/{d}_history.json"
    json_history = json.load(open(gpt_file, "r"))
    for v1, v2 in pairwise_samples[d]["edge"]:
        print(f"{v1} -> {v2}")
        # check if the element with "start":v1 and "end":v2 or "start":v2 and "end":v1 exists in the history;
        flag = False
        reverse_flag = False
        for i, history in enumerate(json_history):
            if history["start"] == v1 and history["end"] == v2:
                flag = True
                if history["answer"] == "A":
                    correct_num[d]["edge"] += 1
                    reverse_flag = False
                    break
                if history["answer"] == "B":
                    reverse_flag = True

            elif history["start"] == v2 and history["end"] == v1:
                flag = True
                if history["answer"] == "B":
                    correct_num[d]["edge"] += 1
                    reverse_flag = False
                    break
                if history["answer"] == "A":
                    reverse_flag = True
        if reverse_flag:
            reversed_num[d]["edge"] += 1

    for v1, v2 in pairwise_samples[d]["indirect_path"]:
        print(f"{v1} => {v2}")
        # check if the element with "start":v1 and "end":v2 or "start":v2 and "end":v1 exists in the history;
        flag = False
        reverse_flag = False
        for i, history in enumerate(json_history):
            if history["start"] == v1 and history["end"] == v2:
                flag = True
                if history["answer"] == "A":
                    correct_num[d]["indirect_path"] += 1
                    reverse_flag = False
                    break
                if history["answer"] == "B":
                    reverse_flag = True

            elif history["start"] == v2 and history["end"] == v1:
                flag = True
                if history["answer"] == "B":
                    correct_num[d]["indirect_path"] += 1
                    reverse_flag = False
                    break
                if history["answer"] == "A":
                    reverse_flag = True
        if reverse_flag:
            reversed_num[d]["indirect_path"] += 1
        # if no match query gpt4
        if not flag:
            input, output, answer = query_gpt4(d, v1, v2)
            json_history.append({"start":v1, "end":v2, "input":input, "output":output, "answer":answer})
            with open(gpt_file, "w") as f:
                json.dump(json_history, f, indent=4)
            if answer == "A":
                correct_num[d]["indirect_path"] += 1
            if answer == "B":
                reversed_num[d]["indirect_path"] += 1
        
    for v1, v2 in pairwise_samples[d]["no_path_bi"]:
        print(f"{v1} <> {v2}")
        # check if the element with "start":v1 and "end":v2 or "start":v2 and "end":v1 exists in the history;
        flag = False
        for i, history in enumerate(json_history):
            if history["start"] == v1 and history["end"] == v2:
                flag = True
                if history["answer"] in ["C","D"]:
                    correct_num[d]["no_path_bi"] += 1       
                    break
            elif history["start"] == v2 and history["end"] == v1:
                flag = True
                if history["answer"] in ["C","D"]:
                    correct_num[d]["no_path_bi"] += 1
                    break
        # if no match query gpt4
        if not flag:
            input, output, answer = query_gpt4(d, v1, v2)
            json_history.append({"start":v1, "end":v2, "input":input, "output":output, "answer":answer})
            with open(gpt_file, "w") as f:
                json.dump(json_history, f, indent=4)
            if answer in ["C","D"]:
                correct_num[d]["no_path_bi"] += 1




LVEDVOLUME -> CVP
LVEDVOLUME -> PCWP
LVFAILURE -> HISTORY
STROKEVOLUME -> CO
ERRCAUTER -> HREKG
ERRCAUTER -> HRSAT
TPR -> BP
KINKEDTUBE -> PRESS
KINKEDTUBE -> VENTLUNG
PVSAT -> SAO2
SAO2 -> CATECHOL
SHUNT -> SAO2
INTUBATION -> MINVOL
INTUBATION -> PRESS
INTUBATION -> VENTLUNG
DISCONNECT -> VENTTUBE
MINVOLSET -> VENTMACH
VENTTUBE -> PRESS
VENTALV -> PVSAT
HR -> HREKG
HYPOVOLEMIA => PCWP
LVFAILURE => CVP
ANAPHYLAXIS => CATECHOL
ANAPHYLAXIS => BP
TPR => CO
PVSAT => CATECHOL
PULMEMBOLUS => SAO2
PULMEMBOLUS => CO
SHUNT => HREKG
INTUBATION => HREKG
INTUBATION => PVSAT
DISCONNECT => HRBP
DISCONNECT => HR
MINVOLSET => PVSAT
MINVOLSET => BP
VENTMACH => HRBP
VENTTUBE => CO
VENTLUNG => CATECHOL
VENTALV => CATECHOL
ARTCO2 => HR
HISTORY <> MINVOLSET
HISTORY <> BP
CVP <> TPR
PCWP <> HRSAT
HYPOVOLEMIA <> KINKEDTUBE
HYPOVOLEMIA <> SHUNT
HYPOVOLEMIA <> VENTLUNG
LVEDVOLUME <> PULMEMBOLUS
LVFAILURE <> MINVOLSET
ERRLOWOUTPUT <> PAP
ERRLOWOUTPUT <> VENTMACH
HRBP <> BP
HREKG <> CO
HRSAT <> PRESS
HRSAT <> CO

In [93]:
correct_num

{'alarm': {'edge': 20, 'indirect_path': 13, 'no_path_bi': 12},
 'asia': {'edge': 8, 'indirect_path': 10, 'no_path_bi': 8},
 'insurance': {'edge': 17, 'indirect_path': 19, 'no_path_bi': 7},
 'mildew': {'edge': 19, 'indirect_path': 20, 'no_path_bi': 2},
 'child': {'edge': 20, 'indirect_path': 10, 'no_path_bi': 10},
 'cancer': {'edge': 4, 'indirect_path': 4, 'no_path_bi': 0},
 'water': {'edge': 19, 'indirect_path': 10, 'no_path_bi': 9},
 'barley': {'edge': 14, 'indirect_path': 6, 'no_path_bi': 10}}

In [94]:
reversed_num

{'alarm': {'edge': 0, 'indirect_path': 3, 'no_path_bi': 0},
 'asia': {'edge': 0, 'indirect_path': 0, 'no_path_bi': 0},
 'insurance': {'edge': 1, 'indirect_path': 1, 'no_path_bi': 0},
 'mildew': {'edge': 1, 'indirect_path': 0, 'no_path_bi': 0},
 'child': {'edge': 0, 'indirect_path': 8, 'no_path_bi': 0},
 'cancer': {'edge': 0, 'indirect_path': 0, 'no_path_bi': 0},
 'water': {'edge': 1, 'indirect_path': 10, 'no_path_bi': 0},
 'barley': {'edge': 1, 'indirect_path': 6, 'no_path_bi': 0}}

In [95]:
# calculate the accuraccy and save to a new dict
accuracy = {d:{"edge":0,"indirect_path":0,"no_path_bi":0} for d in datasets}
for d in datasets:
    for k in pairwise_samples[d]:
        accuracy[d][k] = correct_num[d][k] / len(pairwise_samples[d][k])
accuracy

{'alarm': {'edge': 1.0, 'indirect_path': 0.65, 'no_path_bi': 0.6},
 'asia': {'edge': 1.0, 'indirect_path': 1.0, 'no_path_bi': 0.8},
 'insurance': {'edge': 0.85, 'indirect_path': 0.95, 'no_path_bi': 0.35},
 'mildew': {'edge': 0.95, 'indirect_path': 1.0, 'no_path_bi': 0.1},
 'child': {'edge': 1.0, 'indirect_path': 0.5, 'no_path_bi': 0.5},
 'cancer': {'edge': 1.0, 'indirect_path': 1.0, 'no_path_bi': 0.0},
 'water': {'edge': 0.95, 'indirect_path': 0.5, 'no_path_bi': 0.45},
 'barley': {'edge': 0.7, 'indirect_path': 0.3, 'no_path_bi': 0.5}}

In [96]:
acc_reversed = {d:{"edge":0,"indirect_path":0,"no_path_bi":0} for d in datasets}
for d in datasets:
    for k in pairwise_samples[d]:
        acc_reversed[d][k] = reversed_num[d][k] / len(pairwise_samples[d][k])
acc_reversed

{'alarm': {'edge': 0.0, 'indirect_path': 0.15, 'no_path_bi': 0.0},
 'asia': {'edge': 0.0, 'indirect_path': 0.0, 'no_path_bi': 0.0},
 'insurance': {'edge': 0.05, 'indirect_path': 0.05, 'no_path_bi': 0.0},
 'mildew': {'edge': 0.05, 'indirect_path': 0.0, 'no_path_bi': 0.0},
 'child': {'edge': 0.0, 'indirect_path': 0.4, 'no_path_bi': 0.0},
 'cancer': {'edge': 0.0, 'indirect_path': 0.0, 'no_path_bi': 0.0},
 'water': {'edge': 0.05, 'indirect_path': 0.5, 'no_path_bi': 0.0},
 'barley': {'edge': 0.05, 'indirect_path': 0.3, 'no_path_bi': 0.0}}

In [97]:
import numpy as np
true_dags = {}
for d in datasets:
    true_dags[d] = np.loadtxt(f"BN_structure/{d}_graph.txt", dtype=int)
## calculate the edge number and the path number of the true DAGs

edge_num = {d:0 for d in datasets}
path_num = {d:0 for d in datasets}
node_num = {d:0 for d in datasets}
for d in datasets:
    node_num[d] = len(true_dags[d])
    edge_num[d] = np.sum(true_dags[d])
    n = len(true_dags[d])
    reachability = true_dags[d].copy()
    for k in range(n):
        for i in range(n):
            for j in range(n):
                reachability[i, j] = reachability[i, j] or (
                    reachability[i, k] and reachability[k, j])
    path_num[d] = np.sum(reachability)

In [98]:
## add a new acc to the accuracy dict, which is the (edge_num * edge_acc + (path_num-edge_num) * path_acc) / path_num
## do the same for reversed accuracy
for d in datasets:
    accuracy[d]["path_acc"] = (edge_num[d] * accuracy[d]["edge"] + (path_num[d] - edge_num[d]) * accuracy[d]["indirect_path"]) / path_num[d]
    acc_reversed[d]["path_acc"] = (edge_num[d] * acc_reversed[d]["edge"] + (path_num[d] - edge_num[d]) * acc_reversed[d]["indirect_path"]) / path_num[d]

In [99]:
# add a new column to pairwise_samples, 'path_acc', which equals 'indirectpath'
for d in datasets:
    pairwise_samples[d]["path_acc"] = pairwise_samples[d]["indirect_path"]

In [100]:
# calculate the weighted accuracy of edge, indirect_path, no_path_bi, repsectively, weighted by the number of samples in each dataset\
weighted_accuracy = {k:0 for k in pairwise_samples[datasets[0]]}
for d in datasets:
    for k in pairwise_samples[d]:
        weighted_accuracy[k] += len(pairwise_samples[d][k]) * accuracy[d][k]
for k in pairwise_samples[datasets[0]]:
    weighted_accuracy[k] /= sum([len(pairwise_samples[d][k]) for d in datasets])
weighted_accuracy

{'edge': 0.9166666666666666,
 'indirect_path': 0.6865671641791045,
 'no_path_bi': 0.4393939393939394,
 'path_acc': 0.7555929551986658}

In [101]:
weighted_reversed_accuracy = {k:0 for k in pairwise_samples[datasets[0]]}
for d in datasets:
    for k in pairwise_samples[d]:
        weighted_reversed_accuracy[k] += len(pairwise_samples[d][k]) * acc_reversed[d][k]
for k in pairwise_samples[datasets[0]]:
    weighted_reversed_accuracy[k] /= sum([len(pairwise_samples[d][k]) for d in datasets])
weighted_reversed_accuracy

{'edge': 0.030303030303030304,
 'indirect_path': 0.208955223880597,
 'no_path_bi': 0.0,
 'path_acc': 0.15078649969766383}

In [102]:
## LLM related parameters
edge_acc = weighted_accuracy["edge"]
ind_acc = weighted_accuracy["indirect_path"]
no_acc = weighted_accuracy["no_path_bi"]
path_acc = weighted_accuracy["path_acc"]
edge_rev = weighted_reversed_accuracy["edge"]
ind_rev = weighted_reversed_accuracy["indirect_path"]
path_rev = weighted_reversed_accuracy["path_acc"]

p_e = 1 - no_acc
p_r = path_rev
p_r_d = edge_rev
p_m_d = 1 - edge_acc - edge_rev
p_c = path_acc
print(f"p_e: {p_e}, p_r: {p_r}, p_r_d: {p_r_d}, p_m_d: {p_m_d}, p_c: {p_c}")

p_e: 0.5606060606060606, p_r: 0.15078649969766383, p_r_d: 0.030303030303030304, p_m_d: 0.053030303030303066, p_c: 0.7555929551986658


In [112]:
# structural related parameters
gamma_1_dict = {}
for d in datasets:
    n, e, p = node_num[d], edge_num[d], path_num[d]
    cn2 = n*(n-1) / 2
    gamma_1_dict[d] = (cn2 - p) / cn2
gamma_1 = np.mean(list(gamma_1_dict.values()))
print(f"gamma_1: {gamma_1}")

gamma_1: 0.5128619140036711


In [131]:
# CSL related parameters
gamma_2_dict = {}
z_1_dict = {}
z_2_dict = {}
pre_dict = {} 
sizes = [{"asia": 250, "child": 500, "insurance": 500, "alarm": 1000, "cancer": 250, "mildew": 8000, "water": 1000, "barley": 2000},
                             {"asia": 1000, "child": 2000, "insurance": 2000, "alarm": 4000, "cancer": 1000, "mildew": 32000, "water": 4000, "barley": 8000}]
for d in datasets:
    true_dag = true_dags[d]
    n = true_dag.shape[0]
    reachability = true_dags[d].copy()
    for k in range(n):
        for i in range(n):
            for j in range(n):
                reachability[i, j] = reachability[i, j] or (
                    reachability[i, k] and reachability[k, j])
    z_1_dict[d] = {sizes[0][d]:[], sizes[1][d]:[]}
    z_2_dict[d] = {sizes[0][d]:[], sizes[1][d]:[]}
    gamma_2_dict[d] = {sizes[0][d]:[], sizes[1][d]:[]}
    pre_dict[d] = {sizes[0][d]:[], sizes[1][d]:[]}
    for s in range(2):
        for r in range(6):
            fp = f"out/adj-matrix/{d}-{sizes[s][d]}-{r+1}-MINOBSx-bdeu-iter1.txt"
            ev_dag = np.loadtxt(fp, dtype=int)
            gamma_2_dict[d][sizes[s][d]].append(np.sum(ev_dag) / true_dag.shape[0])
            if np.sum(ev_dag) == 0:
                continue
            # z_1 is the ratio of correct edges
            z_1_dict[d][sizes[s][d]].append(np.sum(ev_dag * true_dag) / np.sum(ev_dag))
            # z_2 is the ratio of reversed edges
            z_2_dict[d][sizes[s][d]].append(np.sum(ev_dag * true_dag.T) / np.sum(ev_dag))
            # pre is the probability of rechatbility[v1,v2] = 1 given the condition that ev_dag[v2,v1] = 1
            pre_dict[d][sizes[s][d]].append(np.sum(reachability * ev_dag.T) / np.sum(ev_dag.T))
gamma_2 = np.mean([np.mean(gamma_2_dict[d][sizes[0][d]]) for d in datasets] + [np.mean(gamma_2_dict[d][sizes[1][d]]) for d in datasets])
z_1 = np.mean([np.mean(z_1_dict[d][sizes[0][d]]) for d in datasets] + [np.mean(z_1_dict[d][sizes[1][d]]) for d in datasets])
z_2 = np.mean([np.mean(z_2_dict[d][sizes[0][d]]) for d in datasets] + [np.mean(z_2_dict[d][sizes[1][d]]) for d in datasets])
pre = np.mean([np.mean(pre_dict[d][sizes[0][d]]) for d in datasets] + [np.mean(pre_dict[d][sizes[1][d]]) for d in datasets])
print(f"gamma_2: {gamma_2}, z_1: {z_1}, z_2: {z_2}, pre: {pre}")

gamma_2: 1.0875914698180322, z_1: 0.8763823919470554, z_2: 0.050014626639769875, pre: 0.05576574731711764


In [132]:
p_e*gamma_1 + p_r*(1-gamma_1)

0.3609673441012883

In [133]:
((p_r_d+p_m_d)*z_1 + p_m_d*z_2 + (p_r + p_c*pre)*(1-z_1-z_2))*gamma_2

0.09775690104112862

In [135]:
ours = 0.09775690104112862
full = 0.3609673441012883
1 /(2*ours / full)

1.8462499335439286

In [137]:
save_dict = {"llm_sample":pairwise_samples,"llm_acc":accuracy, "llm_reversed":acc_reversed,"gamma1":gamma_1_dict,"gamma_2":gamma_2_dict,"z_1":z_1_dict,"z_2":z_2_dict,"pre":pre_dict}
json.dump(save_dict, open("out/llm_estimation.json", "w"), indent=4)

In [2]:
import json
para_record = json.load(open("out/llm_estimation.json", "r"))

In [4]:
para_record.keys()

dict_keys(['llm_sample', 'llm_acc', 'llm_reversed', 'gamma1', 'gamma_2', 'z_1', 'z_2', 'pre'])

In [7]:
para_record['llm_acc']
import pandas as pd

llm_acc = pd.DataFrame(para_record['llm_acc'])
llm_acc

Unnamed: 0,alarm,asia,insurance,mildew,child,cancer,water,barley
edge,1.0,1.0,0.85,0.95,1.0,1.0,0.95,0.7
indirect_path,0.65,1.0,0.95,1.0,0.5,1.0,0.5,0.3
no_path_bi,0.6,0.8,0.35,0.1,0.5,0.0,0.45,0.5
path_acc,0.722197,1.0,0.919048,0.991901,0.695312,1.0,0.671676,0.362338


In [8]:
llm_reversed = pd.DataFrame(para_record["llm_reversed"])
llm_reversed

Unnamed: 0,alarm,asia,insurance,mildew,child,cancer,water,barley
edge,0.0,0.0,0.05,0.05,0.0,0.0,0.05,0.05
indirect_path,0.15,0.0,0.05,0.0,0.4,0.0,0.5,0.3
no_path_bi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
path_acc,0.119058,0.0,0.05,0.008099,0.24375,0.0,0.328324,0.261039


In [11]:
llm_sample = para_record["llm_sample"]
for d in llm_sample:
    for s in llm_sample[d]:
        llm_sample[d][s] = len(llm_sample[d][s])
llm_sample

{'alarm': {'edge': 20, 'indirect_path': 20, 'no_path_bi': 20, 'path_acc': 20},
 'asia': {'edge': 8, 'indirect_path': 10, 'no_path_bi': 10, 'path_acc': 10},
 'insurance': {'edge': 20,
  'indirect_path': 20,
  'no_path_bi': 20,
  'path_acc': 20},
 'mildew': {'edge': 20, 'indirect_path': 20, 'no_path_bi': 20, 'path_acc': 20},
 'child': {'edge': 20, 'indirect_path': 20, 'no_path_bi': 20, 'path_acc': 20},
 'cancer': {'edge': 4, 'indirect_path': 4, 'no_path_bi': 2, 'path_acc': 4},
 'water': {'edge': 20, 'indirect_path': 20, 'no_path_bi': 20, 'path_acc': 20},
 'barley': {'edge': 20, 'indirect_path': 20, 'no_path_bi': 20, 'path_acc': 20}}

In [21]:
g2 = para_record["gamma_2"]
g2_avg = {}
for d in g2:
    t = 0
    for s in g2[d]:
        t += sum(g2[d][s])
    g2_avg[d] = t / (2*len(g2[d][s]))
g2_avg.values()

dict_values([1.2162162162162165, 1.0104166666666667, 1.4351851851851851, 0.7904761904761904, 1.0916666666666666, 0.5500000000000002, 1.3411458333333333, 1.265625])

In [28]:
z1 = para_record["z_1"]
z1_avg = {}
for d in z1:
    t = 0
    num = 0
    for s in z1[d]:
        
        t += sum(z1[d][s])
        num += len(z1[d][s])
    z1_avg[d] = t / num
z1_avg.values()

dict_values([0.9578342397144431, 0.8773148148148149, 0.9074871522028775, 0.8691919191919192, 0.9806998556998557, 0.9027777777777778, 0.6679817023732718, 0.8374181384479472])

In [29]:
z2 = para_record["z_2"]
z2_avg = {}
for d in z2:
    t = 0
    num = 0
    for s in z2[d]:
        
        t += sum(z2[d][s])
        num += len(z2[d][s])
    z2_avg[d] = t / num
z2_avg.values()

dict_values([0.9578342397144431, 0.8773148148148149, 0.9074871522028775, 0.8691919191919192, 0.9806998556998557, 0.9027777777777778, 0.6679817023732718, 0.8374181384479472])

In [22]:
para_record["gamma1"].values()

dict_values([0.6651651651651652, 0.35714285714285715, 0.5213675213675214, 0.5226890756302521, 0.6631578947368421, 0.2, 0.6512096774193549, 0.5221631205673759])

In [26]:
import numpy as np
import json

datasets = ["alarm", "asia", "insurance", "mildew", "child", "cancer", "water", "barley"]
gpt_dir = "out/gpt_history/for_estimation/"

answer_type_dict = {d:{k:{"direct_edges":[],"reversed_edges":[],"indirect_paths":[],"reversed_indirect_paths":[],"no_connections":[], "common_parents":[], "common_childs":[]} for k in ["A","B","C"]} for d in datasets}
for d in datasets:
    mapping = np.loadtxt(f"BN_structure/mappings/{d}.mapping", dtype=str)
    true_dag = np.loadtxt(f"BN_structure/{d}_graph.txt", dtype=int)
    n = true_dag.shape[0]
    reachability = true_dag.copy()
    for k in range(n):
        for i in range(n):
            for j in range(n):
                reachability[i, j] = reachability[i, j] or (
                    reachability[i, k] and reachability[k, j])
    gpt_file = f"{gpt_dir}/{d}_history.json"
    json_history = json.load(open(gpt_file, "r"))
    for i, history in enumerate(json_history):
        start = history["start"]
        end = history["end"]
        if history["answer"] not in ["A","B","C"]:
            continue
        modif_dict = answer_type_dict[d][history["answer"]]
        # mapping[i] = start, mapping[j] = end, check true_dag[i][j] and true_dag[j][i]
        i = np.where(mapping == start)[0][0]
        j = np.where(mapping == end)[0][0]
        if true_dag[i][j] == 1:
            modif_dict["direct_edges"].append((start, end))
        elif true_dag[j][i] == 1:
            modif_dict["reversed_edges"].append((start, end))
        elif reachability[i][j] == 1:
            modif_dict["indirect_paths"].append((start, end))
        elif reachability[j][i] == 1:
            modif_dict["reversed_indirect_paths"].append((start, end))
        else:
            modif_dict["no_connections"].append((start, end))
            if np.sum(true_dag[i].T * true_dag[j].T) > 0:
                modif_dict["common_childs"].append((start, end))
            elif np.sum(true_dag[i] * true_dag[j]) > 0:
                modif_dict["common_parents"].append((start, end))
            
        


In [4]:
answer_type_dict

{'alarm': {'A': {'direct_edges': [('HYPOVOLEMIA', 'LVEDVOLUME'),
    ('HYPOVOLEMIA', 'STROKEVOLUME'),
    ('LVEDVOLUME', 'CVP'),
    ('LVEDVOLUME', 'PCWP'),
    ('LVFAILURE', 'LVEDVOLUME'),
    ('LVFAILURE', 'STROKEVOLUME'),
    ('STROKEVOLUME', 'CO'),
    ('ERRLOWOUTPUT', 'HRBP'),
    ('ERRCAUTER', 'HREKG'),
    ('ERRCAUTER', 'HRSAT'),
    ('TPR', 'CATECHOL'),
    ('TPR', 'BP'),
    ('KINKEDTUBE', 'PRESS'),
    ('KINKEDTUBE', 'VENTLUNG'),
    ('FIO2', 'PVSAT'),
    ('PVSAT', 'SAO2'),
    ('PULMEMBOLUS', 'PAP'),
    ('PULMEMBOLUS', 'SHUNT'),
    ('SHUNT', 'SAO2'),
    ('INTUBATION', 'MINVOL'),
    ('INTUBATION', 'SHUNT'),
    ('INTUBATION', 'PRESS'),
    ('INTUBATION', 'VENTLUNG'),
    ('INTUBATION', 'VENTALV'),
    ('DISCONNECT', 'VENTTUBE'),
    ('VENTMACH', 'VENTTUBE'),
    ('VENTTUBE', 'PRESS'),
    ('VENTTUBE', 'VENTLUNG'),
    ('VENTLUNG', 'EXPCO2'),
    ('VENTLUNG', 'MINVOL'),
    ('VENTALV', 'PVSAT'),
    ('VENTALV', 'ARTCO2'),
    ('ARTCO2', 'EXPCO2'),
    ('CATECHOL', 'HR'),


In [27]:
# calculate the number of lists in answer_type_dict
answer_type_num = {d:{k:{"direct_edges":0,"reversed_edges":0,"indirect_paths":0,"reversed_indirect_paths":0,"no_connections":0, "common_parents":0, "common_childs":0} for k in ["A","B","C"]} for d in datasets}
for d in datasets:
    for k in answer_type_dict[d]:
        for kk in answer_type_dict[d][k]:
            answer_type_num[d][k][kk] = len(answer_type_dict[d][k][kk])
answer_type_num

{'alarm': {'A': {'direct_edges': 44,
   'reversed_edges': 2,
   'indirect_paths': 37,
   'reversed_indirect_paths': 0,
   'no_connections': 49,
   'common_parents': 0,
   'common_childs': 7},
  'B': {'direct_edges': 1,
   'reversed_edges': 22,
   'indirect_paths': 6,
   'reversed_indirect_paths': 11,
   'no_connections': 21,
   'common_parents': 0,
   'common_childs': 3},
  'C': {'direct_edges': 0,
   'reversed_edges': 0,
   'indirect_paths': 0,
   'reversed_indirect_paths': 3,
   'no_connections': 88,
   'common_parents': 0,
   'common_childs': 6}},
 'asia': {'A': {'direct_edges': 8,
   'reversed_edges': 0,
   'indirect_paths': 9,
   'reversed_indirect_paths': 0,
   'no_connections': 1,
   'common_parents': 0,
   'common_childs': 0},
  'B': {'direct_edges': 0,
   'reversed_edges': 7,
   'indirect_paths': 0,
   'reversed_indirect_paths': 5,
   'no_connections': 2,
   'common_parents': 0,
   'common_childs': 0},
  'C': {'direct_edges': 0,
   'reversed_edges': 0,
   'indirect_paths': 0,


In [28]:
# calculate the ratio of each type of real structures of each anwer for each dataset
# the total number is direct_edges + reversed_edges + indirect_paths + reversed_indirect_paths + no_connections
# if total number is 0, then the ratio is inf
answer_type_ratio = {d:{k:{"direct_edges":0,"reversed_edges":0,"indirect_paths":0,"reversed_indirect_paths":0,"no_connections":0, "common_parents":0, "common_childs":0} for k in ["A","B","C"]} for d in datasets}
for d in datasets:
    for k in answer_type_dict[d]:
        valid_keys = ["direct_edges","reversed_edges","indirect_paths","reversed_indirect_paths","no_connections"]
        total_num = 0
        for key in valid_keys:
            total_num += answer_type_num[d][k][key]
        if total_num == 0:
            answer_type_ratio[d][k] = {"direct_edges":np.inf,"reversed_edges":np.inf,"indirect_paths":np.inf,"reversed_indirect_paths":np.inf,"no_connections":np.inf, "common_parents":np.inf, "common_childs":np.inf}
            continue
        for kk in answer_type_dict[d][k]:
            answer_type_ratio[d][k][kk] = answer_type_num[d][k][kk] / total_num
answer_type_ratio

{'alarm': {'A': {'direct_edges': 0.3333333333333333,
   'reversed_edges': 0.015151515151515152,
   'indirect_paths': 0.2803030303030303,
   'reversed_indirect_paths': 0.0,
   'no_connections': 0.3712121212121212,
   'common_parents': 0.0,
   'common_childs': 0.05303030303030303},
  'B': {'direct_edges': 0.01639344262295082,
   'reversed_edges': 0.36065573770491804,
   'indirect_paths': 0.09836065573770492,
   'reversed_indirect_paths': 0.18032786885245902,
   'no_connections': 0.3442622950819672,
   'common_parents': 0.0,
   'common_childs': 0.04918032786885246},
  'C': {'direct_edges': 0.0,
   'reversed_edges': 0.0,
   'indirect_paths': 0.0,
   'reversed_indirect_paths': 0.03296703296703297,
   'no_connections': 0.967032967032967,
   'common_parents': 0.0,
   'common_childs': 0.06593406593406594}},
 'asia': {'A': {'direct_edges': 0.4444444444444444,
   'reversed_edges': 0.0,
   'indirect_paths': 0.5,
   'reversed_indirect_paths': 0.0,
   'no_connections': 0.05555555555555555,
   'comm

In [25]:
import pandas as pd
# #print answer_type_ratio in a table, 
# answer_type_ratio_df = pd.DataFrame(answer_type_ratio)
# answer_type_ratio_df
# take the ratio of different structures as a column in a dataframe
answer_type_ratio_df = pd.DataFrame(answer_type_ratio)
answer_type_ratio_df = answer_type_ratio_df.stack().reset_index()
# the colum 0 is a dict of the ratio of different structures, we need to expand it to different columns
answer_type_ratio_df = pd.concat([answer_type_ratio_df.drop([0], axis=1), answer_type_ratio_df[0].apply(pd.Series)], axis=1)
#save the dataframe to a csv file
answer_type_ratio_df.to_csv("out/answer_type_ratio.csv", index=False)

In [10]:
# calculate the sum of the number of direct_edges, etc of all datasets grouped by answers in answer_type_dict
answer_type_sum = {k:{"direct_edges":0,"reversed_edges":0,"indirect_paths":0,"reversed_indirect_paths":0,"no_connections":0, "common_parents":0, "common_childs":0} for k in ["A","B","C"]}
for d in datasets:
    for k in answer_type_dict[d]:
        for kk in answer_type_dict[d][k]:
            answer_type_sum[k][kk] += len(answer_type_dict[d][k][kk])
answer_type_sum

{'A': {'direct_edges': 271,
  'reversed_edges': 54,
  'indirect_paths': 197,
  'reversed_indirect_paths': 38,
  'no_connections': 186,
  'common_parents': 42,
  'common_childs': 42},
 'B': {'direct_edges': 6,
  'reversed_edges': 141,
  'indirect_paths': 32,
  'reversed_indirect_paths': 71,
  'no_connections': 92,
  'common_parents': 20,
  'common_childs': 20},
 'C': {'direct_edges': 2,
  'reversed_edges': 9,
  'indirect_paths': 69,
  'reversed_indirect_paths': 74,
  'no_connections': 232,
  'common_parents': 53,
  'common_childs': 53}}