In [3]:
#perform standard import
import spacy
nlp = spacy.load('en_core_web_sm')

In [4]:
# write a function to display basic entity info:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
        else:
              print('No named entities found.')

In [5]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.
No named entities found.


In [6]:
doc = nlp(u'Can I please borrow 500 dollars from you to by some Microsoft stock?')
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.
No named entities found.


In [7]:
doc = nlp(u'Can I please borrow 500 dollars from you to by some Microsoft stock?')

for ent in doc.ents:
    print(ent.text, ent.start, ent.end, ent.start_char, ent.end_char, ent.label_)

500 dollars 4 6 20 31 MONEY
Microsoft 11 12 52 61 ORG


In [8]:
# adding a named entity  to span


In [9]:
# Adding a Named Entity to SPAN

In [10]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')

show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit
No named entities found.


In [11]:
from spacy.tokens import Span


#Get the hash value of the ORG entity label
ORG = doc.vocab.strings[u'ORG']

#create a span for the new entity
new_ent = Span(doc, 0, 1, label=ORG)

#Add the entity to the existing  Doc object
doc.ents = list(doc.ents) + [new_ent]

In [12]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit
No named entities found.


In [13]:
# ADDING NAMED ENTITIES TO ALL MATCHING SPAN

In [14]:
doc = nlp(u'Our company plans to introduce a new vacuum  cleaner.'
          u'if successful, the vacuum-cleaner will be our first product.')
show_ents(doc)

first - ORDINAL - "first", "second", etc.
No named entities found.


In [15]:
# IMPORT PhraseMatcher and create a matcher object
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)


In [16]:
#CREATE THE DESIRED PHASE PATTERNS:
phrase_list = ['vacuum  cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [17]:
# Apply the matcher to our Doc object:
matcher.add('newproduct', None, *phrase_patterns)

# Apply the matcher to our Doc object:
matches = matcher(doc)

# See what matches occur:

matches


[(2689272359382549672, 13, 16)]

In [18]:
from spacy.tokens import Span

PROD = doc.vocab.strings[u'PRODUCT']

new_ents = [Span(doc, match[1],match[2],label=PROD) for match in matches]

doc.ents = list(doc.ents) + new_ents

In [19]:
show_ents(doc)


vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.
No named entities found.


In [20]:
# Counting entities
doc = nlp(u'Originally priced at $29.50, the sweater was marked down to five dollars.')
show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five dollars - MONEY - Monetary values, including unit
No named entities found.


In [21]:
len([ent  for ent in doc.ents if ent.label=='MONEY'])

0

In [22]:
# Line Break

In [23]:
spacy._version_

AttributeError: module 'spacy' has no attribute '_version_'

In [None]:
doc = nlp(u'Originally priced at $29.50,\nthe sweater was marked down to five dollars,')
show_ents(doc)

In [None]:
# NOUN CHUNKS
doc = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc.noun_chunks:
    print(chunk.text+' - '+chunk.root.text+' - '+chunk.root.dep_+' - '+chunk.root.head.text)

In [None]:
len(doc.noun_chunks)

In [24]:
len(list(doc.noun_chunks))

2

In [25]:
# Visuailization named entities
import spacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy

In [26]:
doc = nlp(u'over the last quater Apple sold nearly 20 thousand ipods for a profit of $6 million.'
          u'By contrast, Sony sold only 7 thousand Walkman music players.')
displacy.render(doc, style='ent', jupyter=True)

In [27]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent' , jupyter=True)

In [28]:
doc2 = nlp(u'Over the last quater Apple sold nearly 20 thoousand iPods for a profit of $6 million.'
          u'By contrast, my kids sold a lot of lemonade.')

In [29]:
for sent in doc2.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True)



In [43]:
for sent in doc2.sents:
    docx = nlp(sent.text)
    if docx.ents:
        displacy.render(docx, style='ent', jupyter=True)
    else:
        print(docx.text)

By contrast, my kids sold a lot of lemonade.


In [37]:
#Viewing Specific Entities

#You can pass a list of entity types to restrict the visualization:

In [31]:
options = {'ents': ['ORG', 'MONEY']}

displacy.render(doc, style='ent', jupyter=True, options=options)

In [38]:
#Coustomizing Colors and Effects

#You can also pass background color and gradient options:

In [33]:
colors = {'ORG': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)','PRODUCT':'radial-gradient(yellow,green)'}

options = {'ents': ['ORG', 'PRODUCT'], 'color':colors}

displacy.render(doc, style='ent' , jupyter=True, options=options)