In [2]:
#Import spaCy and load the language library
import spacy
nlp = spacy.load("en_core_web_sm")

In [3]:
#Create a string that includes opening and closing quotation marks
mystring='"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [4]:
# create a doc object and explore tokens
doc=nlp(mystring)
for token in doc:
    print(token.text, end=" | ")

" | We | 're | moving | to | L.A. | ! | " | 

In [5]:
# Prefixes, suffixes, and infixes

In [7]:
doc2=nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at https://oursite.com.!")
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
https://oursite.com
.
!


In [9]:
doc3=nlp(u"A 5km NYC cab ride costs $10.30")
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [10]:
doc4=nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [12]:
#Count tokens
print(len(doc))

8


In [13]:
#Count vocabulary
print(len(doc.vocab))

794


In [None]:
#Tokens can be reterived by index postion and sliced

're


In [18]:
doc5=nlp(u"It is better to give than to receive.")
#Retrive the third token
doc5[2]

better

In [19]:
# Retrieve three tokens from the middle
doc5[2:5]

better to give

In [20]:
# Retrieve the last four tokens
doc5[-4:]

than to receive.

In [21]:
# Tokens cannot be reassigned
doc6=nlp(u"My dinner was horrible.")
doc7=nlp(u"My dinner was delicious.")


In [22]:
# Try to change "My dinner was horrible." to "My dinner was delicious."
doc6[3]=doc7[3]


TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [26]:
# Named entities

doc8=nlp(u"Apple to build the Hong Kong factory for $6 billion")

for token in doc8:
    print(token.text,end=" | ")

print("\n------")

for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple | to | build | the | Hong | Kong | factory | for | $ | 6 | billion | 
------
Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 billion - MONEY - Monetary values, including unit


In [None]:
# Length of named entities
len(doc8.ents)

3

In [32]:
# Noun Chunks

doc9= nlp(u"Autonomous cars shift insurance liability toward manufacturers")
for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [33]:
doc10=nlp(u"Red cars do not carry higher insurance rates.")
for chunk in doc10.noun_chunks:
    print(chunk.text)

Red cars
higher insurance rates


In [35]:
doc11=nlp(u"He was a one-eyed, one-honored, flying, purple people-eater.")
for chunk in doc11.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-honored, flying, purple people-eater


In [43]:
# Built-in visualizers
# Visualizing the dependency parse
from spacy import displacy
doc=nlp(u"Apple is going to build a U.K. factory for $6 billion.")
displacy.render(doc,style="dep",jupyter=True,options={"distance": 115})


In [46]:
# Visualizing the entity recognizer
doc=nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.")
displacy.render(doc,style="ent",jupyter=True)
