### Tokenization:

In [2]:
import spacy # importing the library

In [3]:
nlp = spacy.load('en_core_web_sm') # loading the vocabulary of words!

In [3]:
mystring = '"We\'re moving to L.A!"'

In [4]:
mystring

'"We\'re moving to L.A!"'

In [5]:
print(mystring)

"We're moving to L.A!"


In [6]:
doc = nlp(mystring) # mystring document 

token_list = []

for token in doc:
    print(token.text) # reports back the text of each token in 'doc' string
    token_list.append(token.text) 
    
print(token_list)
print('\n')
print(len(token_list))

"
We
're
moving
to
L.A
!
"
['"', 'We', "'re", 'moving', 'to', 'L.A', '!', '"']


8


In [7]:
doc2 = nlp(u"We're here to help! send us snail-mail at support@oursite.com, or contact us at http://www.outsite.com")

In [8]:
for token in doc2:
    print(token) # Spacy is smart enough to understand the the punctuations, website links and simple words

We
're
here
to
help
!
send
us
snail
-
mail
at
support@oursite.com
,
or
contact
us
at
http://www.outsite.com


In [9]:
doc3 = nlp(u"A 5km NYC cab ride costs $10.30")

for token in doc3:
    print(token.text) # Here the distance unit and $ sign have assigned their own tokens! and Spacy is smart enough to keep the price together

A
5
km
NYC
cab
ride
costs
$
10.30


In [10]:
doc4 = nlp(u"Let's visit St. Louis in U.S next year!")

for token in doc4:
    print(token.text)

Let
's
visit
St.
Louis
in
U.S
next
year
!


In [11]:
len(doc4.vocab) # reports back the length of 'en_core_web_sm' vocabulary we imported!

57852

In [12]:
type(doc4)

spacy.tokens.doc.Doc

### Named Entity Recognition:

In [13]:
doc5 = nlp(u"Apple to built a Hong Kong factory for $6 millions!")

In [14]:
for token in doc5:
    print(token.text, end = '| ')

Apple| to| built| a| Hong| Kong| factory| for| $| 6| millions| !| 

In [15]:
for entity in doc5.ents:
    print(entity) # reports back the entities in context
    print(entity.label_) # off of the entity, it can also extract the labels for each
    print('\n')

Apple
ORG


Hong Kong
GPE


$6 millions
MONEY




In [16]:
# Lets say we want to know, the meanings of each label for entities
# Well, we have a simple spacy method for this

for entity in doc5.ents:
    print(entity)
    print(str(spacy.explain(entity.label_)))
    print(entity.label_)
    print('\n')

Apple
Companies, agencies, institutions, etc.
ORG


Hong Kong
Countries, cities, states
GPE


$6 millions
Monetary values, including unit
MONEY




### Noun Chunks:

In [17]:
# with Spacy we can also easily grab the Noun chunks from our text
# Noun chunk: A word associated with a noun, essentially more highlighting it's meanings

# e.g Autonomous Cars, Here, Cars is a noun which is associated with the word 'Autonomous'

doc6 = nlp(u"Autonomous cars shift insurance liability toward the manufacturers")

for chunk in doc6.noun_chunks:
    print(chunk, end = ' | ')
    # report back the noun chunks!

Autonomous cars | insurance liability | the manufacturers | 

### Visualization with Spacy:

In [5]:
from spacy import displacy # Module for visualization in spacy

In [19]:
doc7 = nlp(u"Apple is going to build a U.K factory for $6 million")

In [27]:
displacy.render(doc7, style = 'dep', jupyter = True, options = {'distance': 80})  # reports back the contextual visualizations

# style = 'dep' refers to syntactic dependency!

In [33]:
# Visualizing Entity Recognizer!

doc8 = nlp(u"Over the last quarter, Apple sold nealy 20 thousand iPods for a profit of $4 millions")

In [35]:
displacy.render(doc8, style = 'ent', jupyter = True) # and have a look at the results just by changing the styling!

In [None]:
doc9 = nlp(u"Hurry up! We're running out of time.")

displacy.serve(doc9, style = 'dep') # run this code when you are using some other editor instead of Jupyter Notebook!


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [01/Aug/2019 13:09:34] "GET / HTTP/1.1" 200 5570
127.0.0.1 - - [01/Aug/2019 13:09:34] "GET /favicon.ico HTTP/1.1" 200 5570


In [1]:
# So let's check out port 5000 i.e 127.0.0.1/5000