<a href="https://colab.research.google.com/github/yeesem/Natural-Laguage-Processing/blob/main/Named_Entity_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
import spacy

In [42]:
nlp = spacy.load('en_core_web_sm')

# **Named Entity Recognition**

In [43]:
def show_ents(doc):
  print(doc.ents , '\n')
  if doc.ents:
    for ent in doc.ents:
      print(ent.text + ' - ' + ent.label_ + " " + spacy.explain(ent.label_))

  else:
    print("No entities found")

In [44]:
doc = nlp(u"Hi how are you?")

In [45]:
show_ents(doc)

() 

No entities found


In [46]:
doc = nlp(u"May i go to Washington, DC next May to see the Washington Monument")

In [47]:
show_ents(doc)

(Washington, DC, next May, the Washington Monument) 

Washington, DC - GPE Countries, cities, states
next May - DATE Absolute or relative dates or periods
the Washington Monument - ORG Companies, agencies, institutions, etc.


In [48]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock")

In [49]:
show_ents(doc)

(500 dollars, Microsoft) 

500 dollars - MONEY Monetary values, including unit
Microsoft - ORG Companies, agencies, institutions, etc.


In [50]:
doc = nlp(u"Tesla to build a Malaysia factory for $6 million")

In [51]:
show_ents(doc)

(Malaysia, $6 million) 

Malaysia - GPE Countries, cities, states
$6 million - MONEY Monetary values, including unit


In [52]:
from spacy.tokens import Span

In [53]:
ORG = doc.vocab.strings[u'ORG']

In [54]:
ORG

383

In [55]:
new_ent = Span(doc,0,1,label=ORG)

In [56]:
doc.ents = list(doc.ents) + [new_ent]

In [57]:
show_ents(doc)

(Tesla, Malaysia, $6 million) 

Tesla - ORG Companies, agencies, institutions, etc.
Malaysia - GPE Countries, cities, states
$6 million - MONEY Monetary values, including unit


In [75]:
doc2 = nlp(u"Tesla is going to build a factory in PB")

In [76]:
GPE = doc2.vocab.strings[u'GPE']

In [77]:
GPE

384

In [82]:
new_ent = Span(doc2,8,9,label = GPE)

In [83]:
doc2.ents = list(doc2.ents) + [new_ent]

In [84]:
show_ents(doc2)

(Tesla, PB) 

Tesla - ORG Companies, agencies, institutions, etc.
PB - GPE Countries, cities, states


In [86]:
doc = nlp(u"Our compnay created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show")

In [87]:
show_ents(doc)

() 

No entities found


In [88]:
from spacy.matcher import PhraseMatcher

In [89]:
matcher = PhraseMatcher(nlp.vocab)

In [90]:
phrase_list = ['vacuum cleaner','vacuum-cleaner']

In [91]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [93]:
matcher.add('newproduct',None,*phrase_patterns)

In [94]:
found_matches = matcher(doc)

In [95]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [96]:
from spacy.tokens import Span

In [97]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [98]:
new_ents = [Span(doc,match[1],match[2],label = PROD) for match in found_matches]

In [99]:
doc.ents = list(doc.ents) + new_ents

In [100]:
show_ents(doc)

(vacuum cleaner, vacuum-cleaner) 

vacuum cleaner - PRODUCT Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT Objects, vehicles, foods, etc. (not services)


In [101]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")

In [102]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [103]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2

# **Visualizing Name Entity Recognition**

In [104]:
from spacy import displacy

In [105]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand Iphone for a $6 million profit.")

In [106]:
displacy.render(doc,style = 'ent',jupyter = True)

In [117]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand Iphone 13 for a $6 million profit."
          u"By contrast, Sony only sold 8 thousand music players.")

In [118]:
displacy.render(doc,style = 'ent',jupyter = True)

In [119]:
for sent in doc.sents:
  displacy.render(nlp(sent.text),style = 'ent',jupyter = True)
  print('\n')









In [126]:
#Customise
#colors = {'ORG':'red'}
colors = {'ORG':'linear-gradient(purple,red,yellow)'}
#colors = {'ORG':'radial-gradient(yellow,green)'}
options = {'ents':['PRODUCT','ORG'],'colors':colors}

In [127]:
displacy.render(doc,style = 'ent',jupyter = True,options = options)

In [128]:
displacy.serve(doc,style='ent',options=options)


Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
