In [None]:
# NER - Named Entity Recognizer 
# Extracts individual entities for instance -> Tesla can be a name of a person and also a company, 
# NER can differentiate between NER person and company 
# NER can also be done using HugginFace
# There are other ways to add custom NER that can be found in the spacy library and other documentation (for example - specifying certain pattern) shared in the video

# CFR (Conditional Random Fields) and BERT is also used popularly for NER

In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [2]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [3]:
doc = nlp("Tesla Inc. is going to aquire Twitter for $45 Dollars.")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc.  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
$45 Dollars  |  MONEY  |  Monetary values, including unit


In [4]:
from spacy import displacy

displacy.render(doc, style="ent")

In [7]:
nlp.pipe_labels["ner"]

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [13]:
from spacy.tokens import Span

s1 = Span(doc, 0, 1, label='ORG')
s2 = Span(doc, 6, 7, label='ORG')

doc.set_ents([s1, s2], default = "unmodified")


In [14]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla Inc.  |  ORG
aquire  |  ORG
Twitter  |  ORG
$45 Dollars  |  MONEY
