<a href="https://colab.research.google.com/github/vera-pro/ArtDATIS_NER_evaluation/blob/main/Evaluate_WikiNEuRal_on_a_sample_of_typed_documents_(semi_manual%2C_interactive).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import spacy
from spacy import displacy
import seaborn as sns

def visualise(text, preds):
    ## Step 1: adding entities
    entities = []
    nlp = spacy.blank("nl")  # it should work with any language
    doc = nlp(text)
    
    ner_map = {} ## NB: our ner_map is reversed compared to the example
    cur_type = ''
    cur_start, cur_end = 0, 0
    false_positives = []

    for pred in preds: 
        ent = pred['entity']
        if ent.startswith('B') or pred['start'] > cur_end+1: ## a dirty hack in case it failed to predict 'B'
            ## Adding the previous entity if it's not empty
            if cur_type != '':
                char_span = doc.char_span(cur_start, cur_end, cur_type)
                if char_span:
                    entities.append(char_span)
                else:
                    false_positives.append((cur_start, cur_end, cur_type))

            ## Processing the new entity
            cur_type = ent[2:]
            if cur_type not in ner_map: ## it's only 'B' in the example
                ner_map[cur_type] = len(ner_map)+1
            cur_start = pred['start']
            cur_end = pred['end']
        else: ## there's only 'B' and 'I', 'O' is not included
            cur_end = pred['end']

    ## Adding the last one
    if cur_type != '':
        char_span = doc.char_span(cur_start, cur_end, cur_type)
        if char_span:
            entities.append(char_span)
        else:
            false_positives.append((cur_start, cur_end, cur_type))
            
    doc.ents = entities
    
    ## Step 2: visualising 
    colours = sns.color_palette("Set2", len(ner_map)).as_hex()
    options = {"ents": list(ner_map.keys()),
               "colors": {ent: colours[ner_map[ent]-1] for ent in ner_map.keys()}
              }

    displacy_html = displacy.render(doc, style="ent", options=options,jupyter=True)
    return false_positives

In [6]:
!gdown 1lt7WSQuWuLRv6gBSYn0zvM03zb5RZ2Vr -O data --folder

Retrieving folder list
Processing file 1HaSHxqnc4LgBCNBBCOoaZAiQugdPkkmf ner_evaluation_typed_samples_new.p
Retrieving folder list completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1HaSHxqnc4LgBCNBBCOoaZAiQugdPkkmf
To: /content/data/ner_evaluation_typed_samples_new.p
100% 1.08M/1.08M [00:00<00:00, 126MB/s]
Download completed


In [7]:
import pickle
data = pickle.load(open('/content/data/ner_evaluation_typed_samples_new.p','rb'))

In [None]:
for entry in data: 
    if 'false_positives' in entry:
        continue # already processed
    false_positives = visualise(entry['text'],entry['ner'])
    false_negatives = []
    
    mention = ''
    cur_start = 0
    print('==============')
    print("Are there any entities that we've missed?")
    print("Please enter them one by one: copy-paste the mentions from the text")
    print("in the order that they appear in the text")
    while mention != 'X':
        mention = input("Copy-paste the next entity or type \"X\" to finish, then press enter")
        start = entry['text'][cur_start:].find(mention)
        false_negatives.append((start, start+len(mention), 'ADDED'))
        cur_start = start
        
    notes = input("Do you have any comments about this text? Please type here and press enter")
    
    entry.update({'false_positives': false_positives,
                 'false_negatives':false_negatives,
                 'notes': notes})
    pickle.dump(data, open('ner_evaluation_typed_samples_annotated.p','wb'))
    to_continue = input('Press enter to continue or X to exit')
    if to_continue == 'X':
      print('****************')
      print('Please don\'t forget to save the results!')
      print('Find the file ner_evaluation_typed_samples_annotated.p on the left and click Download')
      print('Then upload this file to: https://drive.google.com/drive/u/0/folders/1lt7WSQuWuLRv6gBSYn0zvM03zb5RZ2Vr')
      print('Thank you!')
      break

Are there any entities that we've missed?
Please enter them one by one: copy-paste the mentions from the text
in the order that they appear in the text
Copy-paste the next entity or type "X" to finish, then press enterX
Do you have any comments about this text? Please type here and press enter
Press enter to continue or X to exit


Are there any entities that we've missed?
Please enter them one by one: copy-paste the mentions from the text
in the order that they appear in the text
