In [2]:
import json
import re
import itertools

In [40]:
with open("../../output/result/production_using_compound-list+filtering.json", 'r') as f:
    extracted = json.load(f)

In [41]:
def match_start(true, pred):
    return [(t, p) for t, p in itertools.product(true, pred) \
            if re.search(fr'^{re.escape(t)}', p) and t != p]

def match_end(true, pred):
    return [(t, p) for t, p in itertools.product(true, pred) \
            if re.search(fr'{re.escape(t)}$', p) and t != p]

def partial_match(true, pred, m_type=None):
    if m_type is 0:
        return list(set([(t, p) for t, p in itertools.product(true, pred) \
                if (re.search(fr'{re.escape(t)}', p) or re.search(fr'{re.escape(p)}', t))]))
    # 正解データを返す
    elif m_type is 1:
        return list(set([t for t, p in itertools.product(true, pred) \
                if (re.search(fr'{re.escape(t)}', p) or re.search(fr'{re.escape(p)}', t))]))
    # 予測データを返す
    elif m_type is 2:
        return list(set([p for t, p in itertools.product(true, pred) \
                if (re.search(fr'{re.escape(t)}', p) or re.search(fr'{re.escape(p)}', t))]))
    # 部分一致した場合->正解データ，一致しなかった場合->予測データ
    elif m_type is 3:
        return list(set([t if (re.search(fr'{re.escape(t)}', p) or re.search(fr'{re.escape(p)}', t)) else p \
                         for t, p in itertools.product(true, pred)]))


def precision(TP, FP):
        return TP / (TP + FP) if (TP + FP) != 0 else 0.0

def recall(TP, FN):
    return TP / (TP + FN) if (TP + FN) != 0 else 0.0

def f1(precision, recall):
    return 2 * precision * recall / (precision + recall) \
        if (precision + recall) != 0 else 0.0

In [42]:
annotation_size = 0
extracted_size = 0
TP = 0
FP = 0
FN = 0

for _id, v in extracted.items():
    true_set = set(v['true'])
    pred_set = set(v['predict'])
    
    partial_match_pred = set(partial_match(true_set, pred_set, m_type=3))
    
    annotation_size += len(true_set)
    extracted_size += len(pred_set)
    TP += len(partial_match_pred & pred_set)
    FP += len(partial_match_pred - true_set)
    FN += len(true_set - partial_match_pred)
    
    
score = {
    'annotation_size': annotation_size
    , 'extracted_size': extracted_size
    , 'TP': TP
    , 'FP': FP
    , 'FN': FN
    , 'precision': precision(TP, FP)
    , 'recall': recall(TP, FN)
    , 'f1': f1(precision(TP, FP), recall(TP, FN))
}

In [43]:
score

{'annotation_size': 94,
 'extracted_size': 45,
 'TP': 39,
 'FP': 32,
 'FN': 63,
 'precision': 0.5492957746478874,
 'recall': 0.38235294117647056,
 'f1': 0.4508670520231214}