# Resolve and convert our CXRGraph to the same scheme as RadGraph

Resolve the annotated data from BRAT and downgrade our CXRGraph to RadGraph for a direct comparison.


In [511]:
brat_data_dir = "/Users/liao/myProjects/repo/remote_brat/data/structured_reporting/ours/liao"
to_be_annotated_dir = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/to_be_annotated"
cxrgraph_output_dir = "./outputs/cxr_graph/json4ner_re_radraph_version"

In [512]:
import os
import json
import re

In [513]:
import shutil

if os.path.exists(cxrgraph_output_dir):
    shutil.rmtree(cxrgraph_output_dir)

os.makedirs(cxrgraph_output_dir)

## Resolve BRAT result to json

In [514]:
class AnnEntityClass:
    def __init__(self, stripped_str) -> None:
        self.brat_id = ""  # T0
        self.label = ""
        self.start_index = -1  # include (char idx)
        self.end_index = -1  # not include
        self.token_str = ""
        self.att_objs = []
        
        self.id = "" # E0
        self.start_token_idx = -1 # include
        self.end_token_idx = -1 # include
        self.sent_idx = -1
        
        self.type = ""  # ANAT, OBS, LOCATT
        self.chain_info = {
            "modify": {"in": [], "out": []},
            "part_of": {"in": [], "out": []},
            "located_at": {"in": [], "out": []},
            "suggestive_of": {"in": [], "out": []},
        }
        self.resolve(stripped_str)

    def get_ann_str(self) -> str:
        return f"({self.start_token_idx},{self.end_token_idx}):{self.brat_id}\t{self.label} {self.start_index} {self.end_index}\t{self.token_str}\n"

    def resolve(self, stripped_str):
        patten = r"(T\d+)\t(.+) (\d+) (\d+)\t(.+)"
        match_obj = re.match(patten, stripped_str)
        if match_obj:
            self.brat_id, self.label, start_index, end_index, self.token_str = match_obj.groups()
            self.start_index = int(start_index)
            self.end_index = int(end_index)
            if self.label in ["Observation-Present", "Observation-Absent", "Observation-Uncertain"]:
                self.type = "OBS"
            elif self.label == "Anatomy":
                self.type = "ANAT"
            elif self.label == "Location-Attribute":
                self.type = "LOCATT"
            else:
                raise ValueError(f"Cannot identify: {self.label}")
        else:
            raise ValueError(f"Cannot resolve: {stripped_str}")

    def __repr__(self) -> str:
        return self.get_ann_str()

    def __str__(self) -> str:
        return self.get_ann_str()
    
    def __eq__(self, other):
        if isinstance(other, AnnEntityClass):
            return self.brat_id == other.brat_id
        else:
            return other == self.brat_id
    
    def __hash__(self):
        return hash(self.brat_id)
        

class AnnRelationClass:
    def __init__(self, stripped_str) -> None:
        self.brat_id = ""  # R0
        self.label = ""
        self.arg1 = ""  # from entity: T0
        self.arg2 = ""  # to entity: T1
        self.resolve(stripped_str)
        
        self.id = "" # R0

    def get_ann_str(self) -> str:
        return f"{self.brat_id}\t{self.label} Arg1:{self.arg1} Arg2:{self.arg2}\t\n"

    def __repr__(self) -> str:
        return self.get_ann_str()

    def __str__(self) -> str:
        return self.get_ann_str()

    def resolve(self, stripped_str):
        patten = r"(R\d+)\t(.+) Arg1:(T\d+) Arg2:(T\d+)"
        match_obj = re.match(patten, stripped_str)
        if match_obj:
            self.brat_id, self.label, self.arg1, self.arg2 = match_obj.groups()
        else:
            raise ValueError(f"Cannot resolve: {stripped_str}")
    
    def __eq__(self, other):
        if isinstance(other, AnnRelationClass):
            return self.brat_id == other.brat_id
        else:
            return other == self.brat_id
    
    def __hash__(self):
        return hash(self.brat_id)


class AnnAttributeClass:
    def __init__(self, stripped_str) -> None:
        self.brat_id = ""  # A0
        self.label = ""
        self.value = ""
        self.target_entity_id = ""  # T0
        self.resolve(stripped_str)
        
    def get_ann_str(self) -> str:
        if self.value:
            return f"{self.brat_id}\t{self.label} {self.target_entity_id} {self.value}"
        else:
            return f"{self.brat_id}\t{self.label} {self.target_entity_id}"
    
    def get_json_str(self) -> str:
        if self.label == "isAbnormal_OBS":
            return "is_abnormal"
        if self.label == "isNormal_OBS":
            return "is_normal"
        if self.label == "Uncertian_Tendency":
            return f"uncertainy:{self.value}"
        if self.label == "isRelative_Modifier":
            return f"is_relative_modifier:{self.value}"
        if self.label == "show_RelativeChange":
            return f"has_relative_change:{self.value}"

    def __repr__(self) -> str:
        return self.get_ann_str()

    def __str__(self) -> str:
        return self.get_ann_str()

    def resolve(self, stripped_str):
        patten = r"(A\d+)\t(.+) (T\d+) ?(.+)?"
        match_obj = re.match(patten, stripped_str)
        if match_obj:
            self.brat_id, self.label, self.target_entity_id, self.value = match_obj.groups()
        else:
            raise ValueError(f"Cannot resolve: {stripped_str}")
    
    def __eq__(self, other):
        if isinstance(other, AnnAttributeClass):
            return self.brat_id == other.brat_id
        else:
            return other == self.brat_id or other == self.target_entity_id
    
    def __hash__(self):
        return hash(self.brat_id)

In [515]:
def bart2json(dataset_name, datasplit):
    if dataset_name == "MIMIC-CXR":
        doc_key = file_name.lstrip(f"{dataset_name}_").replace("_", "/")
    else:
        doc_key = file_name.lstrip(f"{dataset_name}_").rstrip(".txt")
    
    # 加载数据：
    txt_file_name = file_name
    ann_file_name = f'{file_name.rstrip(".txt")}.ann'

    txt_file = os.path.join(brat_data_dir, dataset_name, datasplit, txt_file_name)
    ann_file = os.path.join(brat_data_dir, dataset_name, datasplit, ann_file_name)

    output_dict = {
        "doc_key": doc_key,
        "sentences": [],
        "ner": [],
        "relations": [],
    }

    # 读取原始doc：只读取第一行
    with open(txt_file, "r", encoding="utf-8") as f:
        doc_str = f.readline().strip()

    # 超过这个范围的标签都应该排除（因为我们把RadGraph的标签也一起呈现给了标注者，所以解析时需要排除这些已有的标签）
    valid_doc_len = len(doc_str)

    # 读取标签
    with open(ann_file, "r", encoding="utf-8") as f:
        ann_lines = f.readlines()
        # print(ann_lines)


    ent_obj_list = []
    rel_obj_list = []
    att_obj_list = []
    for ann_line in ann_lines:
        stripped_ann_line = ann_line.strip()
        if stripped_ann_line.startswith("T"):
            ent = AnnEntityClass(stripped_ann_line)
            ent_obj_list.append(ent)
        elif stripped_ann_line.startswith("R"):
            rel = AnnRelationClass(stripped_ann_line)
            rel_obj_list.append(rel)
        elif stripped_ann_line.startswith("A"):
            att = AnnAttributeClass(stripped_ann_line)
            att_obj_list.append(att)
            ent = ent_obj_list[ent_obj_list.index(att.target_entity_id)]
            ent.att_objs.append(att)
        else:
            raise ValueError(f"Uncatched value from .ann file: {stripped_ann_line}")
    
    ent_obj_list = list(filter(lambda ent: ent.start_index <= valid_doc_len and ent.end_index <= valid_doc_len, ent_obj_list))
    rel_obj_list = list(filter(lambda rel: rel.arg1 in ent_obj_list and rel.arg2 in ent_obj_list, rel_obj_list))
    att_obj_list = list(filter(lambda att: att.target_entity_id in ent_obj_list in ent_obj_list, att_obj_list))

    # 识别token的位置，并添加token_idx; 按句子拆分
    doc_tokens = doc_str.split(" ")
    token_start_idx_list = [] # token first char
    token_end_idx_list = [] # token last char + 1
    curr_start = 0

    sent_idx = 0
    tokidx2sentidx = []
    sent = []
    for tok_idx, token_str in enumerate(doc_tokens):
        # 识别token的位置，并添加token_idx
        token_start_idx_list.append(curr_start)
        token_end_idx_list.append(curr_start + len(token_str))
        curr_start += len(token_str) + 1 # whitespace
        
        # 按句子拆分
        tokidx2sentidx.append(sent_idx)
        sent.append(token_str)
        if token_str == "." or tok_idx == len(doc_tokens) - 1:
            output_dict["sentences"].append(sent)
            output_dict["ner"].append([])
            output_dict["relations"].append([])
            sent_idx += 1
            sent = []
    assert len(doc_tokens) == len([i for sent in output_dict["sentences"] for i in sent])

    for ent in ent_obj_list:
        ent.start_token_idx = token_start_idx_list.index(ent.start_index)
        ent.end_token_idx = token_end_idx_list.index(ent.end_index)
        assert ent.token_str == " ".join(doc_tokens[ent.start_token_idx:ent.end_token_idx + 1])
        
        starttok_sent_idx = tokidx2sentidx[ent.start_token_idx]
        endtok_sent_idx = tokidx2sentidx[ent.end_token_idx]
        ent.sent_idx = starttok_sent_idx
        assert starttok_sent_idx == endtok_sent_idx

    # 生成跟RadGraph一样的数据，然后评估
    
    # Entity
    entity_label_mapper= {"Anatomy":"ANAT-DP", "Observation-Present":"OBS-DP", "Observation-Absent":"OBS-DA", "Observation-Uncertain":"OBS-U"}
    for ent_id, ent in enumerate(sorted(ent_obj_list, key=lambda x: x.start_token_idx)):
        # Location-Attribute的label改为其指向的ent的label
        if ent.label == "Location-Attribute":
            target_rel = list(filter(lambda rel_obj: rel_obj.arg1 == ent.brat_id and rel_obj.label == "located_at", rel_obj_list))[0]
            target_ent = ent_obj_list[ent_obj_list.index(target_rel.arg2)]
            ent_label = entity_label_mapper[target_ent.label]
        else:
            ent_label = entity_label_mapper[ent.label]
        output_dict["ner"][ent.sent_idx].append([ent.start_token_idx, ent.end_token_idx, ent_label])
        

    # Relation
    for rel in rel_obj_list:
        subj = ent_obj_list[ent_obj_list.index(rel.arg1)]
        obj = ent_obj_list[ent_obj_list.index(rel.arg2)]
        subj.chain_info[rel.label]["out"].append(obj)
        obj.chain_info[rel.label]["in"].append(subj)
        

    def get_final_ele_along_chain(curr_ent, final_ents=None, recursive_keys=[("part_of", "out")], stop_keys=[]):
        if final_ents is None:
            final_ents = []
        candidate_entities = [ent for k1, k2 in recursive_keys for ent in curr_ent.chain_info[k1][k2]]
        if candidate_entities == []:
            return final_ents.append(curr_ent)
        elif stop_keys and all([True if curr_ent.chain_info[k1][k2] else False for k1, k2 in stop_keys]):
            return final_ents.append(curr_ent)
        else:
            for ent in candidate_entities:
                get_final_ele_along_chain(curr_ent=ent, final_ents=final_ents, recursive_keys=[("part_of", "out")], stop_keys= [("located_at", "in")])
        return final_ents

    for rel_id, rel in enumerate(sorted(rel_obj_list, key=lambda x: ent_obj_list[ent_obj_list.index(x.arg1)].start_token_idx)):
        subj_ent = ent_obj_list[ent_obj_list.index(rel.arg1)]
        obj_ent = ent_obj_list[ent_obj_list.index(rel.arg2)]
        # 1. 修改rel标签
        rel_label = rel.label
        if subj_ent.type == "LOCATT" or rel.label == "part_of":
            rel_label = "modify"
        # 2. 指向传递
        transfer_pointing = False
        # `a -located_at/suggestive_of-> b -part_of-> c` becomes `a --> c`
        # `a(OBS) -loc_at-> b(LOCATT) -loc_at-> c(ANAT) -part_of-> d(ANAT)` becomes `a -loc_at-> d`
        if subj_ent.type == "OBS" and obj_ent.type == "ANAT" and obj_ent.chain_info["part_of"]["out"]:
            transfer_pointing = True
        elif obj_ent.type == "LOCATT" and obj_ent.chain_info["part_of"]["out"]:
            transfer_pointing = True
        elif rel.label == "suggestive_of" and obj_ent.chain_info["part_of"]["out"]:
            transfer_pointing = True
        
        if transfer_pointing:
            new_obj_ents = get_final_ele_along_chain(curr_ent=obj_ent, recursive_keys=[("part_of", "out")])
            for new_obj in new_obj_ents:
                output_dict["relations"][subj_ent.sent_idx].append([subj_ent.start_token_idx, subj_ent.end_token_idx, new_obj.start_token_idx, new_obj.end_token_idx, rel_label])
        else:
            output_dict["relations"][subj_ent.sent_idx].append([subj_ent.start_token_idx, subj_ent.end_token_idx, obj_ent.start_token_idx, obj_ent.end_token_idx, rel_label])

    output_path = os.path.join(cxrgraph_output_dir, f"{dataset_name}-{datasplit}.json")
    with open(output_path, "a", encoding="utf-8") as f:
        f.write(json.dumps(output_dict))
        f.write("\n")

In [516]:
datasplit = "test"
for dataset_name in ["MIMIC-CXR", "CheXpert"]:
    for file_name in os.listdir(os.path.join(to_be_annotated_dir, dataset_name, "label_in_use", datasplit)):
        bart2json(dataset_name, datasplit)

In [517]:
dataset_name = "MIMIC-CXR"
for datasplit in ["train", "dev"]:
    for file_name in os.listdir(os.path.join(to_be_annotated_dir, dataset_name, "label_in_use", datasplit)):
        bart2json(dataset_name, datasplit)

# Resolve RadGraph to json (the version for ner_re model training)

In [518]:

radgraph_root_dir = "/Users/liao/Desktop/RadGraph/radgraph-extracting-clinical-entities-and-relations-from-radiology-reports-1.0.0"

radgraph_output_dir = "./outputs/radgraph/json4ner_re"

if os.path.exists(radgraph_output_dir):
    shutil.rmtree(radgraph_output_dir)

os.makedirs(radgraph_output_dir)

In [519]:
cross_sent_relations = {
    "test": [],
    "test1": [],
    "train": [],
    "dev": [],
}

for out_file_name in ["test", "test1", "train", "dev"]:
    if out_file_name == "test1":
        in_file_name = "test"
    else:
        in_file_name = out_file_name

    input_file_path = os.path.join(radgraph_root_dir, f"{in_file_name}.json")
    with open(input_file_path, "r") as f:
        docs_dict = json.loads(f.readline())

    for doc_key, doc in docs_dict.items():
        output_dict = {
            "doc_key": doc_key,
            "sentences": [],
            "ner": [],
            "relations": [],
        }
        sent_idx = 0
        tokidx2sentidx = []
        tokens = doc["text"].split(" ")
        sent = []
        for token_id, token in enumerate(tokens):
            tokidx2sentidx.append(sent_idx)
            sent.append(token)
            if token == "." or token_id == len(tokens) - 1:
                output_dict["sentences"].append(sent)
                output_dict["ner"].append([])
                output_dict["relations"].append([])
                sent_idx += 1
                sent = []
        assert len(tokens) == len([i for sent in output_dict["sentences"] for i in sent])

        if out_file_name == "test":
            doc_entities = doc["labeler_1"]["entities"]
        elif out_file_name == "test1":
            doc_entities = doc["labeler_2"]["entities"]
        else:
            doc_entities = doc["entities"]

        for ent_idx, entity in doc_entities.items():
            subj_start = entity["start_ix"]
            subj_end = entity["end_ix"]
            subj_sent_idx = tokidx2sentidx[subj_start]
            output_dict["ner"][subj_sent_idx].append([subj_start, subj_end, entity["label"]])
            for rel in entity["relations"]:
                rel_label = rel[0]
                rel_obj_idx = rel[1]
                obj_start = doc_entities[rel_obj_idx]["start_ix"]
                obj_end = doc_entities[rel_obj_idx]["end_ix"]
                obj_sent_idx = tokidx2sentidx[obj_start]
                cross_sent_relations[out_file_name].append(abs(subj_sent_idx - obj_sent_idx))
                output_dict["relations"][subj_sent_idx].append([subj_start, subj_end, obj_start, obj_end, rel_label])

        output_path = os.path.join(radgraph_output_dir, f"{out_file_name}.json")
        
        if "test" in out_file_name:
            output_path = os.path.join(radgraph_output_dir, f"All-{out_file_name}.json")
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(output_dict))
                f.write("\n")
        if "txt" in doc_key and "test" in out_file_name:
            output_path = os.path.join(radgraph_output_dir, f"MIMIC-CXR-{out_file_name}.json")
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(output_dict))
                f.write("\n")
        if "txt" not in doc_key and "test" in out_file_name:
            output_path = os.path.join(radgraph_output_dir, f"CheXpert-{out_file_name}.json")
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(output_dict))
                f.write("\n")
        if "test" not in out_file_name:
            output_path = os.path.join(radgraph_output_dir, f"MIMIC-CXR-{out_file_name}.json")
            with open(output_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(output_dict))
                f.write("\n")

# CXRGraph vs RadGraph

In [520]:
import os
import json

def cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False):
    with open(radgraph_path, "r", encoding="UTF-8") as f:
        radgraph_docs = [json.loads(line) for line in f]
        
    with open(cxrgraph_path, "r", encoding="UTF-8") as f:
        cxrgraph_docs = [json.loads(line) for line in f]
        cxrgraph_key2doc = {doc["doc_key"]:doc for doc in cxrgraph_docs}
    
    
    sum_ner, sum_rel = 0, 0
    for doc in radgraph_docs:
        for sent_ner in doc["ner"]:
            sum_ner += len(sent_ner)
        for sent_rel in doc["relations"]:
            sum_rel += len(sent_rel)

    eval_results = {
        "ner": {
            "num_gt_label": sum_ner,
            "num_pred_label": 0,
            "num_correct_label": 0,
        },
        "rel": {
            "num_gt_label": sum_rel,
            "num_pred_label": 0,
            "num_correct_label": 0,
        },
        "rel+": {
            "num_gt_label": sum_rel,
            "num_pred_label": 0,
            "num_correct_label": 0,
        },
    }

    def mix_rel_with_ners(rels, ners):
        rels_with_ner = []
        for subj_start, subj_end, obj_start, obj_end, rel_label in rels:
            subj_ners = list(filter(lambda ner: ner[0] == subj_start and ner[1] == subj_end, ners))
            subj_label = subj_ners[0][2] if subj_ners else ""
            obj_ners = list(filter(lambda ner: ner[0] == obj_start and ner[1] == obj_end, ners))
            obj_label = obj_ners[0][2] if obj_ners else ""
            rels_with_ner.append([subj_start, subj_end, obj_start, obj_end, rel_label, subj_label, obj_label])
        return rels_with_ner

    for rad_doc in radgraph_docs:
        cxr_doc = cxrgraph_key2doc[rad_doc["doc_key"]]
        
        gold_ners = [i for sent in rad_doc["ner"] for i in sent]
        pred_ners = [i for sent in cxr_doc["ner"] for i in sent]
        
        for pred_ner in pred_ners:
            eval_results["ner"]["num_pred_label"] += 1
            if pred_ner in gold_ners:
                eval_results["ner"]["num_correct_label"] += 1
        
        
        gold_rels = [i for sent in rad_doc["relations"] for i in sent]
        gold_rels_with_ner = mix_rel_with_ners(gold_rels, gold_ners)
        
        pred_rels = [i for sent in cxr_doc["relations"] for i in sent]
        pred_rels_with_ner = mix_rel_with_ners(pred_rels, pred_ners)
        
        for subj_start, subj_end, obj_start, obj_end, rel_label, subj_label, obj_label in pred_rels_with_ner:
            eval_results["rel"]["num_pred_label"] += 1
            eval_results["rel+"]["num_pred_label"] += 1
            if [subj_start, subj_end, obj_start, obj_end, rel_label] in gold_rels:
                eval_results["rel"]["num_correct_label"] += 1
                if [subj_start, subj_end, obj_start, obj_end, rel_label, subj_label, obj_label] in gold_rels_with_ner:
                    eval_results["rel+"]["num_correct_label"] += 1
                elif show_diff:
                    print("rel+ error:", rad_doc["doc_key"])
                    print("   ", [subj_start, subj_end, obj_start, obj_end, rel_label, subj_label, obj_label])
                    print("   ", list(filter(lambda rel: rel[0:5] == [subj_start, subj_end, obj_start, obj_end, rel_label], gold_rels_with_ner)))
            elif show_diff:
                print("rel error:", rad_doc["doc_key"])
                print("   ", [subj_start, subj_end, obj_start, obj_end, rel_label, subj_label, obj_label])
                print("   ", list(filter(lambda rel: rel[0:4] == [subj_start, subj_end, obj_start, obj_end], gold_rels_with_ner)))

    for eval_field, result_dict in eval_results.items():
        num_corr = result_dict["num_correct_label"]
        num_pred = result_dict["num_pred_label"]
        num_gt = result_dict["num_gt_label"]
        p = num_corr / num_pred if num_corr > 0 else 0.0
        r = num_corr / num_gt if num_corr > 0 else 0.0
        f1 = 2 * (p * r) / (p + r) if num_corr > 0 else 0.0
        print(f"[{eval_field}]: P: {p:.5f}, R: {r:.5f}, 【F1: {f1*100:.3f}】")

In [521]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/MIMIC-CXR-test.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/MIMIC-CXR-test.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.93861, R: 0.98220, 【F1: 95.991】
[rel]: P: 0.80160, R: 0.88840, 【F1: 84.277】
[rel+]: P: 0.79661, R: 0.88287, 【F1: 83.753】


In [522]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/MIMIC-CXR-test1.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/MIMIC-CXR-test.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.94601, R: 0.98841, 【F1: 96.674】
[rel]: P: 0.80857, R: 0.90111, 【F1: 85.234】
[rel+]: P: 0.80658, R: 0.89889, 【F1: 85.024】


In [523]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/MIMIC-CXR-train.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/MIMIC-CXR-train.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.90990, R: 0.93744, 【F1: 92.346】
[rel]: P: 0.77432, R: 0.81299, 【F1: 79.319】
[rel+]: P: 0.75239, R: 0.78997, 【F1: 77.072】


In [524]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/MIMIC-CXR-dev.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/MIMIC-CXR-dev.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.90625, R: 0.93975, 【F1: 92.270】
[rel]: P: 0.76688, R: 0.81136, 【F1: 78.849】
[rel+]: P: 0.75188, R: 0.79548, 【F1: 77.306】


In [525]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/CheXpert-test.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/CheXpert-test.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.89934, R: 0.92318, 【F1: 91.110】
[rel]: P: 0.72294, R: 0.73828, 【F1: 73.053】
[rel+]: P: 0.70736, R: 0.72237, 【F1: 71.479】


In [526]:
radgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/radgraph/json4ner_re/CheXpert-test1.json"
cxrgraph_path = "/Users/liao/myProjects/VSCode_workspace/cxr_graph/graph_annotation_process/outputs/cxr_graph/json4ner_re_radraph_version/CheXpert-test.json"
cxr_vs_rad(radgraph_path, cxrgraph_path, show_diff=False)

[ner]: P: 0.89868, R: 0.91938, 【F1: 90.891】
[rel]: P: 0.71515, R: 0.76340, 【F1: 73.849】
[rel+]: P: 0.70823, R: 0.75601, 【F1: 73.134】
