In [2]:
import json
import spacy
import numpy as np
from ner.evaluator import Evaluator, StrictEvaluator

nlp = spacy.load("en_core_web_md")

with open("../data_files/ner/mockup_1.json", "r") as matcher_annot:
    matcher_annotated = json.load(matcher_annot)

with open("../data_files/ner/test_data/annotated_future_test_data.json", "r") as gf:
    golds = json.load(gf)

gold = []

for text, ent in golds:
    gold.append(ent["entities"])

print(len(matcher_annotated))

eval = StrictEvaluator("en_core_web_md")
total_ents_stats = eval._initialize_stats_dicts()
total_text_stats = total_ents_stats.pop("TEXT")

texts_stats, per_entity_stats = None, None

for idx, (text, ent) in enumerate(matcher_annotated):
    ents = ent["entities"]
    print(len(ents))
    print(len(gold[idx]))
    entity_stats, text_stats = eval._collect_stats(ents, gold[idx])

    for token in nlp(text):
        if not token.is_punct:
            text_stats["total"] += 1

    text_stats["tn"] = np.max([
            text_stats["total"] - (text_stats["tp"] + text_stats["fp"] + text_stats["fn"]),
            0
        ])
    
    for stat_key, stat_value in text_stats.items():
        assert stat_value >= 0, f"The value for \"{stat_key}\" is {stat_value}"

    for stat, value in text_stats.items():
        total_text_stats[stat] += value
    
    per_entity_stats = entity_stats.copy()

    for entity_label, entity_stats in per_entity_stats.items():
        for stat, value in entity_stats.items():
            total_ents_stats[entity_label][stat] += value


metrics = eval._compute_scores(total_text_stats, ent_label="Text")
entity_class_metrics = []
for entity_label, entity_stats in total_ents_stats.items():
    entity_class_metrics.append(eval._compute_scores(entity_stats, include_accuracy=False, ent_label=entity_label))

eval._aggregate_and_print_results(np.array(metrics), np.array(entity_class_metrics))

56
7
16
29
30
15
15
27
28
0
2
44
52
102
88
42
60
36
46
50
60
41
84
51
51
9
14
59
41
10
9
40
37
15
23
42
47
28
27
70
69
92
50
42
46
32
43
11
10
0
4
16
21
30
35
21
40
52
59
18
21
0
5
14
26
47
37
19
31
8
13
25
30
30
42
45
55
8
21
53
55
47
51
26
26
6
12
10
10
17
26
9
20
29
32
13
32
11
13
22
21
36
49
41
52
29
34
12
14
11
13
14
19
Average performance per 0 texts:
	Precision: 0.6838
	Recall: 0.5908
	Accuracy: 0.9288
	F1 Score: 0.6339
	ABBREVIATION
		Precision: 79.2857
		Recall: 59.3583
		F1 Score: 67.8899
	AIRPLANE
		Precision: 68.1416
		Recall: 56.7219
		F1 Score: 61.9095
	AIRPORT_TERM
		Precision: 69.3548
		Recall: 68.2540
		F1 Score: 68.8000
	ALTITUDE
		Precision: 75.0000
		Recall: 61.0169
		F1 Score: 67.2897
	CREW
		Precision: 71.5278
		Recall: 68.2119
		F1 Score: 69.8305
	FLIGHT_PHASE
		Precision: 65.5882
		Recall: 61.6022
		F1 Score: 63.5328
	NAV_WAYPOINT
		Precision: 40.0000
		Recall: 23.4234
		F1 Score: 29.5455
	WEATHER
		Precision: 60.8696
		Recall: 50.9091
		F1 Score: 55.4455
