In [1]:
import os
import spacy
import pickle

In [2]:
os.chdir('../')

In [3]:
# New label to add
LABEL = ['ECON', 'EDU','SCC','NBE','RETH']
in_file = '/NER/sentence_samples/tsv/training_1.p'
model = None
new_model_name='Custom_NER_Model' #make sure to change when training another new
n_iter=50

# Training examples in the required format
with open (os.getcwd() +  r'%s'%in_file, 'rb') as fp:
    TRAIN_DATA = pickle.load(fp)

In [4]:
if model is not None:
    nlp = spacy.load(model)  # load existing spacy model
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank('en')  # create blank Language class
    print("Created blank 'en' model")
if 'ner' not in nlp.pipe_names:
    ner = nlp.add_pipe('ner')
else:
    ner = nlp.get_pipe('ner')

# Add the new label to ner
for i in LABEL:
    ner.add_label(i)

# Resume training
if model is None:
    optimizer = nlp.begin_training()
else:
    optimizer = nlp.entity.create_optimizer()
    move_names = list(ner.move_names)

# List of pipes you want to train
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]

# List of pipes which should remain unaffected in training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]


Created blank 'en' model


In [5]:
# Importing requirements
from spacy.util import minibatch, compounding
from spacy.training.example import Example
import random

In [6]:
# Begin training by disabling other pipeline components
with nlp.disable_pipes(*other_pipes):  # since only training NER
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        batches = minibatch(TRAIN_DATA, size=compounding(1.0, 16.0, 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            example = []
            # Update the model with iterating each text
            for i in range(len(texts)):
                doc = nlp.make_doc(texts[i])
                example.append(Example.from_dict(doc, annotations[i]))
            
            # Update the model
            nlp.update(example, sgd=optimizer, drop=0.35,
                       losses=losses)
        print('Losses', losses)

Losses {'ner': 19078.004019477565}
Losses {'ner': 15225.171329901244}
Losses {'ner': 14759.985806808134}
Losses {'ner': 14614.118398110262}
Losses {'ner': 14420.400937770713}
Losses {'ner': 14260.926852648141}
Losses {'ner': 14177.790920692327}
Losses {'ner': 14218.117819534533}
Losses {'ner': 14147.217091704444}
Losses {'ner': 14096.440869518547}
Losses {'ner': 14021.705497496301}
Losses {'ner': 14122.238242318987}
Losses {'ner': 14180.485968848921}
Losses {'ner': 14362.280625235211}
Losses {'ner': 14441.846285267391}
Losses {'ner': 14505.60390034982}
Losses {'ner': 14596.388501769861}
Losses {'ner': 14637.272528973064}
Losses {'ner': 14599.315565408173}
Losses {'ner': 14811.655697095657}
Losses {'ner': 14760.853716921243}
Losses {'ner': 14741.876350499833}
Losses {'ner': 14749.435829107053}
Losses {'ner': 14752.786531331261}
Losses {'ner': 14859.37415949061}
Losses {'ner': 14710.879137070719}
Losses {'ner': 15036.256068015826}
Losses {'ner': 14948.822378814615}
Losses {'ner': 14943.6

In [7]:
# Output directory
from pathlib import Path
#output_dir= os.getcwd() + r'\Models'
output_dir = Path('NER\Models\Custom_NER_Model')

In [8]:
# Saving the model to the output directory
if not output_dir.exists():
  output_dir.mkdir()
nlp.meta['name'] = 'cus_ner'  # rename model
nlp.to_disk(output_dir)
print("Saved model to", output_dir)

Saved model to NER\Models\Custom_NER_Model
